aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni')
-rw-r--r--native/jni/Android.mk4
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp48
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp2
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h4
-rw-r--r--native/jni/src/suggest/core/suggest.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp126
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h65
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp298
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp149
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h178
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp44
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp175
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h203
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp182
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp43
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp125
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h51
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp215
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h)51
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp51
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp107
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h6
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h9
41 files changed, 1955 insertions, 622 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index d83bdadfa..0594ddff0 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -70,9 +70,10 @@ LATIN_IME_CORE_SRC_FILES := \
bigram/bigram_list_read_write_utils.cpp \
bigram/dynamic_bigram_list_policy.cpp \
header/header_policy.cpp \
- header/header_reading_utils.cpp \
+ header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
dictionary_structure_with_buffer_policy_factory.cpp \
+ dynamic_patricia_trie_gc_event_listeners.cpp \
dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \
dynamic_patricia_trie_reading_helper.cpp \
@@ -84,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \
+ dict_file_writing_utils.cpp \
format_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 7f47493b2..7761ec4d5 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -26,12 +26,55 @@
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "utils/autocorrection_threshold_utils.h"
namespace latinime {
class ProximityInfo;
+// TODO: Move to makedict.
+static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz,
+ jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray,
+ jobjectArray attributeValueStringArray) {
+ const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
+ char filePathChars[filePathUtf8Length + 1];
+ env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
+ filePathChars[filePathUtf8Length] = '\0';
+
+ const int keyCount = env->GetArrayLength(attributeKeyStringArray);
+ const int valueCount = env->GetArrayLength(attributeValueStringArray);
+ if (keyCount != valueCount) {
+ return false;
+ }
+
+ HeaderReadWriteUtils::AttributeMap attributeMap;
+ for (int i = 0; i < keyCount; i++) {
+ jstring keyString = static_cast<jstring>(
+ env->GetObjectArrayElement(attributeKeyStringArray, i));
+ const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
+ char keyChars[keyUtf8Length + 1];
+ env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
+ keyChars[keyUtf8Length] = '\0';
+ HeaderReadWriteUtils::AttributeMap::key_type key;
+ HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
+
+ jstring valueString = static_cast<jstring>(
+ env->GetObjectArrayElement(attributeValueStringArray, i));
+ const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
+ char valueChars[valueUtf8Length + 1];
+ env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
+ valueChars[valueUtf8Length] = '\0';
+ HeaderReadWriteUtils::AttributeMap::mapped_type value;
+ HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
+
+ attributeMap[key] = value;
+ }
+
+ return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion),
+ &attributeMap);
+}
+
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
@@ -282,6 +325,11 @@ static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jcl
static const JNINativeMethod sMethods[] = {
{
+ const_cast<char *>("createEmptyDictFileNative"),
+ const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile)
+ },
+ {
const_cast<char *>("openNative"),
const_cast<char *>("(Ljava/lang/String;JJZ)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_open)
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 5ba71c168..71f4ef6ea 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -147,7 +147,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
- return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
+ return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
}
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index aecf41386..4633c07b0 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -68,7 +68,7 @@ class MultiBigramMap {
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nodePos) {
- const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
@@ -112,7 +112,7 @@ class MultiBigramMap {
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY;
- const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 24d1b8ba1..b95488ebd 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -52,9 +52,9 @@ class DictionaryStructureWithBufferPolicy {
virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
- virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
+ virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
- virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
+ virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0;
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 0c925be25..b1340e12f 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -223,7 +223,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfNode(terminalDicNode->getPos()));
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
index 26015d512..6ff95cac4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
@@ -33,10 +33,9 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const {
- const BigramListReadWriteUtils::BigramFlags flags =
- BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
- *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- mBigramsBuf, flags, pos);
+ BigramListReadWriteUtils::BigramFlags flags;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
+ outBigramPos, pos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index 09e832f07..1926b9831 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,7 +16,9 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@@ -38,23 +40,31 @@ const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-/* static */ BigramListReadWriteUtils::BigramFlags
- BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf,
- int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
+ int *const outTargetPtNodePos, int *const bigramEntryPos) {
+ const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
+ bigramEntryPos);
+ if (outBigramFlags) {
+ *outBigramFlags = bigramFlags;
+ }
+ const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
+ bigramEntryPos);
+ if (outTargetPtNodePos) {
+ *outTargetPtNodePos = targetPos;
+ }
}
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
- int *const pos) {
- BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
- while (hasNext(flags)) {
- *pos += attributeAddressSize(flags);
- flags = getFlagsAndForwardPointer(bigramsBuf, pos);
- }
- *pos += attributeAddressSize(flags);
+ int *const bigramListPos) {
+ BigramFlags flags;
+ do {
+ getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
+ bigramListPos);
+ } while(hasNext(flags));
}
-/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
int offset = 0;
const int origin = *pos;
@@ -69,8 +79,10 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
break;
}
- if (offset == 0) {
+ if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
+ } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
+ return origin;
}
if (isOffsetNegative(flags)) {
return origin - offset;
@@ -79,4 +91,92 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
}
}
+/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
+ BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
+ const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
+ int readingPos = entryPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= buffer->getOriginalBufferSize();
+ }
+ BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer->getBuffer(usesAdditionalBuffer), &readingPos);
+ if (hasNext) {
+ bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
+ } else {
+ bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
+ }
+ int writingPos = entryPos;
+ return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
+}
+
+/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
+ const bool hasNext, int *const writingPos) {
+ BigramFlags flags;
+ if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
+ return false;
+ }
+ return writeBigramEntry(buffer, flags, targetPos, writingPos);
+}
+
+/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
+ BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
+ const int targetPtNodePos, int *const writingPos) {
+ const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
+ const BigramFlags flagsToWrite = (offset < 0) ?
+ (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
+ if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
+ return false;
+ }
+ const uint32_t absOffest = abs(offset);
+ const int bigramTargetFieldSize = attributeAddressSize(flags);
+ return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
+ writingPos);
+}
+
+// Returns true if the bigram entry is valid and put entry flags into out*.
+/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
+ const int targetPtNodePos, const int probability, const bool hasNext,
+ BigramFlags *const outBigramFlags) {
+ BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
+ if (hasNext) {
+ flags |= FLAG_ATTRIBUTE_HAS_NEXT;
+ }
+ const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
+ if (offset < 0) {
+ flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ }
+ const uint32_t absOffest = abs(offset);
+ if ((absOffest >> 24) != 0) {
+ // Offset is too large.
+ return false;
+ } else if ((absOffest >> 16) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ } else if ((absOffest >> 8) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ } else {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ }
+ // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
+ // writing.
+ // TODO: Remove following 2 lines and optimize memory space.
+ flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ *outBigramFlags = flags;
+ return true;
+}
+
+/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
+ const int entryPos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else {
+ const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
+ if (offset == 0) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ } else {
+ return offset;
+ }
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
index 884bcd7a9..eabe4e099 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -24,11 +24,15 @@
namespace latinime {
+class BufferWithExtendableBuffer;
+
class BigramListReadWriteUtils {
public:
typedef uint8_t BigramFlags;
- static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos);
+ static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
+ BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
+ int *const bigramEntryPos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
@@ -39,10 +43,7 @@ public:
}
// Bigrams reading methods
- static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos);
-
- static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
- const BigramFlags flags, int *const pos);
+ static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
// Returns the size of the bigram position field that is stored in bigram flags.
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
@@ -58,50 +59,19 @@ public:
*/
}
- static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) {
- return flags | FLAG_ATTRIBUTE_HAS_NEXT;
- }
+ static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
+ const bool hasNext, const int entryPos);
static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
const int probability) {
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
}
- // Returns true if the bigram entry is valid and put entry values into out*.
- static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
- const int entryPos, const int targetPos, const int probability, const bool hasNext,
- BigramFlags *const outBigramFlags, uint32_t *const outOffset,
- int *const outOffsetFieldSize) {
- if (targetPos == NOT_A_DICT_POS) {
- return false;
- }
- BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
- if (hasNext) {
- flags |= FLAG_ATTRIBUTE_HAS_NEXT;
- }
- const int targetFieldPos = entryPos + 1;
- const int offset = targetPos - targetFieldPos;
- if (offset < 0) {
- flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- }
- const uint32_t absOffest = abs(offset);
- if ((absOffest >> 24) != 0) {
- // Offset is too large.
- return false;
- } else if ((absOffest >> 16) != 0) {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- *outOffsetFieldSize = 3;
- } else if ((absOffest >> 8) != 0) {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- *outOffsetFieldSize = 2;
- } else {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- *outOffsetFieldSize = 1;
- }
- *outBigramFlags = flags;
- *outOffset = absOffest;
- return true;
- }
+ static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
+ const int targetPos, const int probability, const bool hasNext, int *const writingPos);
+
+ static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
+ const int targetOffset, int *const writingPos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@@ -115,9 +85,18 @@ private:
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT;
+ // Returns true if the bigram entry is valid and put entry flags into out*.
+ static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
+ const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
+
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
}
+
+ static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
+ const BigramFlags flags, int *const pos);
+
+ static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 4c44d22fd..29307b56a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -16,58 +16,73 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
namespace latinime {
-const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
+const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
+const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
+ *bigramEntryPos -= mBuffer->getOriginalBufferSize();
}
- const BigramListReadWriteUtils::BigramFlags flags =
- BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
- int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- buffer, flags, pos);
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int originalBigramPos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
+ &originalBigramPos, bigramEntryPos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
- *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
+ *bigramEntryPos += mBuffer->getOriginalBufferSize();
}
}
-void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
}
- BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
+ BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
+ *bigramListPos += mBuffer->getOriginalBufferSize();
}
}
-bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
- int *outBigramsCount) {
+bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
+ int *const fromPos, int *const toPos, int *const outBigramsCount) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
*outBigramsCount = 0;
- BigramListReadWriteUtils::BigramFlags flags;
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ int lastWrittenEntryPos = NOT_A_DICT_POS;
do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
// The buffer address can be changed after calling buffer writing methods.
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
- int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- buffer, flags, fromPos);
+ int originalBigramPos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ fromPos);
if (originalBigramPos == NOT_A_DICT_POS) {
// skip invalid bigram entry.
continue;
@@ -76,132 +91,223 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- BigramListReadWriteUtils::BigramFlags newBigramFlags;
- uint32_t newBigramOffset;
- int newBigramOffsetFieldSize;
- if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
- *toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
- BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
- &newBigramOffsetFieldSize)) {
+ if (bigramPos == NOT_A_DICT_POS) {
+ // Target PtNode has been invalidated.
continue;
}
- // Write bigram entry. Target buffer is always the additional buffer.
- if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
+ lastWrittenEntryPos = *toPos;
+ if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
+ BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
return false;
}
- if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
- toPos)) {
+ (*outBigramsCount)++;
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // Makes the last entry the terminal of the list. Updates the flags.
+ if (lastWrittenEntryPos != NOT_A_DICT_POS) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
+ lastWrittenEntryPos)) {
return false;
}
- (*outBigramsCount)++;
- } while(BigramListReadWriteUtils::hasNext(flags));
+ }
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
}
return true;
}
-bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos,
- const int probability, int *const pos) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
+// has been deleted or is not a valid terminal.
+bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
+ int *const bigramListPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ bigramListPos);
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ // This entry has already been removed.
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramTargetNodePos =
+ followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
+ // TODO: Update probability for supporting probability decaying.
+ if (nodeReader.isDeleted() || !nodeReader.isTerminal()
+ || bigramTargetNodePos == NOT_A_DICT_POS) {
+ // The target is no longer valid terminal. Invalidate the current bigram entry.
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
+ return false;
+ }
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return true;
+}
+
+// Updates bigram target PtNode positions in the list after the placing step in GC.
+bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
}
- BigramListReadWriteUtils::BigramFlags flags;
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
do {
- int entryPos = *pos;
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ int bigramTargetPtNodePos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
+ bigramListPos);
+ if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
+ }
+
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
+ if (it != ptNodePositionRelocationMap->end()) {
+ bigramTargetPtNodePos = it->second;
+ } else {
+ bigramTargetPtNodePos = NOT_A_DICT_POS;
+ }
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ bigramTargetPtNodePos, &bigramEntryPos)) {
+ return false;
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return true;
+}
+
+bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
+ const int probability, int *const bigramListPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int entryPos = *bigramListPos;
if (usesAdditionalBuffer) {
entryPos += mBuffer->getOriginalBufferSize();
}
+ int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
- int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- buffer, flags, pos);
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ bigramListPos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
- if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) {
+ if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
// Update this bigram entry.
const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setProbabilityInFlags(flags, probability);
- return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos);
+ BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
+ return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
+ originalBigramPos, &entryPos);
}
- if (BigramListReadWriteUtils::hasNext(flags)) {
+ if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
continue;
}
// The current last entry is found.
// First, update the flags of the last entry.
- const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setHasNextFlag(flags);
- if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
return false;
}
if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
+ *bigramListPos += mBuffer->getOriginalBufferSize();
}
// Then, add a new entry after the last entry.
- return writeNewBigramEntry(bigramPos, probability, pos);
- } while(BigramListReadWriteUtils::hasNext(flags));
+ return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
// We return directly from the while loop.
ASSERT(false);
return false;
}
-bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability,
+bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos) {
- BigramListReadWriteUtils::BigramFlags newBigramFlags;
- uint32_t newBigramOffset;
- int newBigramOffsetFieldSize;
- if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
- *writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
- &newBigramOffset, &newBigramOffsetFieldSize)) {
- return false;
- }
- // Write bigram flags.
- if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
- return false;
- }
- // Write bigram positon offset.
- if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
- writingPos)) {
- return false;
- }
- return true;
+ // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
+ return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
+ probability, false /* hasNext */, writingPos);
}
-bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) {
+bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
int pos = bigramListPos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
- BigramListReadWriteUtils::BigramFlags flags;
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = pos;
+ int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos);
- int bigramOffsetFieldPos = pos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
if (usesAdditionalBuffer) {
- bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
}
- int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- buffer, flags, &pos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- if (bigramPos != targetBigramPos) {
+ if (bigramPos != bigramTargetPos) {
continue;
}
- // Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid.
- const int bigramOffsetFieldSize = BigramListReadWriteUtils::attributeAddressSize(flags);
- if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize,
- &bigramOffsetFieldPos)) {
- return false;
- }
- return true;
- } while(BigramListReadWriteUtils::hasNext(flags));
+ // Target entry is found. Write an invalid target position to mark the bigram invalid.
+ return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
return false;
}
@@ -212,14 +318,14 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
}
int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBuffer(currentPos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
int bigramLinkCount = 0;
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
currentPos = nodeReader.getBigramLinkedNodePos();
- nodeReader.fetchNodeInfoFromBuffer(currentPos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
bigramLinkCount++;
- if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
- AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
+ if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
+ AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
ASSERT(false);
return NOT_A_DICT_POS;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index dafb62d80..8ea318a41 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -21,13 +21,13 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
namespace latinime {
+class BufferWithExtendableBuffer;
+class DictionaryShortcutsStructurePolicy;
+
/*
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
*/
@@ -40,27 +40,36 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
- int *const pos) const;
+ int *const bigramEntryPos) const;
+
+ void skipAllBigrams(int *const bigramListPos) const;
+
+ // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
+ // bufferToWrite and advance these positions after bigram lists. This method skips invalid
+ // bigram entries and write the valid bigram entry count to outBigramsCount.
+ bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
+ int *const toPos, int *const outBigramsCount) const;
- void skipAllBigrams(int *const pos) const;
+ bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
- // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
- // positions after bigram lists. This method skips invalid bigram entries and write the valid
- // bigram entry count to outBigramsCount.
- bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
+ bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap);
- bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
+ bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
+ int *const bigramListPos);
- bool writeNewBigramEntry(const int bigramPos, const int probability,
+ bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos);
// Return if targetBigramPos is found or not.
- bool removeBigram(const int bigramListPos, const int targetBigramPos);
+ bool removeBigram(const int bigramListPos, const int bigramTargetPos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
- static const int BIGRAM_LINK_COUNT_LIMIT;
+ static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
+ static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
new file mode 100644
index 000000000..c60e45819
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+
+namespace latinime {
+
+bool DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
+ // children.
+ bool isUselessPtNode = !node->isTerminal();
+ if (mChildrenValue > 0) {
+ isUselessPtNode = false;
+ } else if (node->isTerminal()) {
+ // Remove children as all children are useless.
+ int writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
+ return false;
+ }
+ }
+ if (isUselessPtNode) {
+ // Current PtNode is no longer needed. Mark it as deleted.
+ if (!mWritingHelper->markNodeAsDeleted(node)) {
+ return false;
+ }
+ } else {
+ valueStack.back() += 1;
+ }
+ return true;
+}
+
+// Writes dummy PtNode array size when the head of PtNode array is read.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onDescend(const int ptNodeArrayPos) {
+ mValidPtNodeCount = 0;
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
+ ptNodeArrayPos, writingPos));
+ // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
+ // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
+ mPtNodeArraySizeFieldPos = writingPos;
+ return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, 0 /* arraySize */, &writingPos);
+}
+
+// Write PtNode array terminal and actual PtNode array size.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onReadingPtNodeArrayTail() {
+ int writingPos = mBufferToWrite->getTailPosition();
+ // Write PtNode array terminal.
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Write actual PtNode array size.
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (node->isDeleted()) {
+ // Current PtNode is not written in new buffer because it has been deleted.
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), NOT_A_DICT_POS));
+ return true;
+ }
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), writingPos));
+ mValidPtNodeCount++;
+ // Writes current PtNode.
+ return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
+ node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
+ node->getProbability(), &writingPos);
+}
+
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // Updates parent position.
+ int parentPos = node->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
+ parentPos, node->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = node->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
+ childrenPos, &writingPos)) {
+ return false;
+ }
+
+ // Updates bigram target PtNode positions in the bigram list.
+ int bigramsPos = node->getBigramsPos();
+ if (bigramsPos != NOT_A_DICT_POS) {
+ if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
+ &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
new file mode 100644
index 000000000..4256f22fb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class DynamicPatriciaTrieGcEventListeners {
+ public:
+ // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
+ // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
+ // TODO: Concatenate non-terminal PtNodes.
+ class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ BufferWithExtendableBuffer *const buffer)
+ : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
+ mChildrenValue(0) {}
+
+ ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
+
+ bool onAscend() {
+ if (valueStack.empty()) {
+ return false;
+ }
+ mChildrenValue = valueStack.back();
+ valueStack.pop_back();
+ return true;
+ }
+
+ bool onDescend(const int ptNodeArrayPos) {
+ valueStack.push_back(0);
+ return true;
+ }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ BufferWithExtendableBuffer *const mBuffer;
+ std::vector<int> valueStack;
+ int mChildrenValue;
+ };
+
+ // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
+ // entries.
+ class TraversePolicyToUpdateBigramProbability
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
+ : mBigramPolicy(bigramPolicy) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (!node->isDeleted()) {
+ int pos = node->getBigramsPos();
+ if (pos != NOT_A_DICT_POS) {
+ if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
+
+ DynamicBigramListPolicy *const mBigramPolicy;
+ };
+
+ class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
+ mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
+ mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos);
+
+ bool onReadingPtNodeArrayTail();
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
+ int mValidPtNodeCount;
+ int mPtNodeArraySizeFieldPos;
+ };
+
+ class TraversePolicyToUpdateAllPositionFields
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPositionFields(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ DynamicBigramListPolicy *const bigramPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
+ mBufferToWrite(bufferToWrite),
+ mDictPositionRelocationMap(dictPositionRelocationMap) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
+ };
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index 737098423..456352c17 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -23,26 +23,27 @@
namespace latinime {
-void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
- const int maxCodePointCount, int *const outCodePoints) {
- if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) {
+void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
- nodePos, mBuffer->getTailPosition());
+ ptNodePos, mBuffer->getTailPosition());
ASSERT(false);
invalidatePtNodeInfo();
return;
}
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- int pos = nodePos;
- mHeadPos = nodePos;
+ int pos = ptNodePos;
+ mHeadPos = ptNodePos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPos =
- DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
- mParentPos = (parentPos != 0) ? nodePos + parentPos : NOT_A_DICT_POS;
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
@@ -99,7 +100,8 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
// Read destination node if the read node is a moved node.
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
// The destination position is stored at the same place as the parent position.
- fetchNodeInfoFromBufferAndProcessMovedNode(mParentPos, maxCodePointCount, outCodePoints);
+ fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
+ outCodePoints);
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
index 6ef5f5813..3b36d425f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -48,17 +48,17 @@ class DynamicPatriciaTrieNodeReader {
~DynamicPatriciaTrieNodeReader() {}
- // Reads node information from dictionary buffer and updates members with the information.
- AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) {
- fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos , 0 /* maxCodePointCount */,
- 0 /* outCodePoints */);
+ // Reads PtNode information from dictionary buffer and updates members with the information.
+ AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
+ fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
+ 0 /* maxCodePointCount */, 0 /* outCodePoints */);
}
- AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
- const int maxCodePointCount, int *const outCodePoints) {
+ AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
mSiblingPos = NOT_A_DICT_POS;
mBigramLinkedNodePos = NOT_A_DICT_POS;
- fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
+ fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
}
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
@@ -154,8 +154,8 @@ class DynamicPatriciaTrieNodeReader {
int mBigramPos;
int mSiblingPos;
- void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount,
- int *const outCodePoints);
+ void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int maxCodePointCount, int *const outCodePoints);
void invalidatePtNodeInfo();
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 3cfbfd85b..42397c19e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -35,7 +35,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
@@ -48,7 +48,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
}
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int nodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
// This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order.
@@ -56,9 +56,9 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read the terminal node and get its probability.
- readingHelper.initWithNodePos(nodePos);
+ readingHelper.initWithPtNodePos(ptNodePos);
if (!readingHelper.isValidTerminalNode()) {
- // Node at the nodePos is not a valid terminal node.
+ // Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
@@ -67,7 +67,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
// Then, following parent node link to the dictionary root and fetch node code points.
while (!readingHelper.isEnd()) {
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
- // The nodePos is not a valid terminal node position in the dictionary.
+ // The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
@@ -98,7 +98,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithNodeArrayPos(getRootPosition());
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
@@ -148,39 +148,39 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
}
}
-int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
- if (nodePos == NOT_A_DICT_POS) {
+int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoFromBuffer(nodePos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
}
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
}
-int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_DICT_POS) {
+int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoFromBuffer(nodePos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
}
return nodeReader.getShortcutPos();
}
-int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_DICT_POS) {
+int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoFromBuffer(nodePos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
}
@@ -195,7 +195,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithNodeArrayPos(getRootPosition());
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy);
return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
@@ -248,7 +248,9 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return;
}
- // TODO: Implement.
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFile(filePath, &mHeaderPolicy);
}
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
@@ -256,7 +258,9 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
- // TODO: Implement.
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
}
bool DynamicPatriciaTriePolicy::needsToRunGC() const {
@@ -264,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
- // TODO: Implement.
- return false;
+ // TODO: Implement more properly.
+ return mBufferWithExtendableBuffer.isNearSizeLimit();
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 2cbb0ff3b..06d8095d8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -33,7 +33,7 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
@@ -51,7 +51,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord,
@@ -59,11 +59,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getProbability(const int unigramProbability, const int bigramProbability) const;
- int getUnigramProbabilityOfPtNode(const int nodePos) const;
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
- int getShortcutPositionOfNode(const int nodePos) const;
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
- int getBigramsPositionOfNode(const int nodePos) const;
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index a0b5be6a4..f4a2ef389 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -23,36 +23,167 @@ namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+
+// Visits all PtNodes in post-order depth first manner.
+// For example, visits c -> b -> y -> x -> a for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ while (!isEnd()) {
+ if (!alreadyVisitedChildren) {
+ if (mNodeReader.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode();
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ return false;
+ }
+ readNextSiblingNode();
+ if (isEnd()) {
+ // All PtNodes in current linked PtNode arrays have been visited.
+ // Return to the parent.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ if (!listener->onAscend()) {
+ return false;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ } else {
+ // Process sibling PtNode.
+ alreadyVisitedChildren = false;
+ }
+ }
+ }
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
+// that PtNodes are written in the dictionary buffer.
+// For example, visits a -> b -> x -> c -> y for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedAllPtNodesInArray = false;
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ while (!isEnd()) {
+ if (alreadyVisitedAllPtNodesInArray) {
+ if (alreadyVisitedChildren) {
+ // Move to next sibling PtNode's children.
+ readNextSiblingNode();
+ if (isEnd()) {
+ // Return to the parent PTNode.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ alreadyVisitedAllPtNodesInArray = true;
+ } else {
+ alreadyVisitedChildren = false;
+ }
+ } else {
+ if (mNodeReader.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode();
+ // Push state to return the head of PtNode array.
+ pushReadingStateToStack();
+ alreadyVisitedAllPtNodesInArray = false;
+ alreadyVisitedChildren = false;
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ return false;
+ }
+ readNextSiblingNode();
+ if (isEnd()) {
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ // Return to the head of current PtNode array.
+ popReadingStateFromStack();
+ alreadyVisitedAllPtNodesInArray = true;
+ }
+ }
+ }
+ popReadingStateFromStack();
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
-void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
+void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
+ mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
- mPos -= mBuffer->getOriginalBufferSize();
+ mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
- mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
- &mPos);
+ mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &mReadingState.mPos);
if (usesAdditionalBuffer) {
- mPos += mBuffer->getOriginalBufferSize();
+ mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
// Count up nodes and node arrays to avoid infinite loop.
- mTotalNodeCount += mNodeCount;
- mNodeArrayCount++;
- if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
- || mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
+ mReadingState.mNodeArrayCount++;
+ if (mReadingState.mNodeCount < 0
+ || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
- mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
- mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
+ MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false);
mIsError = true;
- mPos = NOT_A_DICT_POS;
+ mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- if (mNodeCount == 0) {
+ if (mReadingState.mNodeCount == 0) {
// Empty node array. Try following forward link.
followForwardLink();
}
@@ -60,24 +191,24 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
// Follow the forward link and read the next node array if exists.
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
- mPos -= mBuffer->getOriginalBufferSize();
+ mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
const int forwardLinkPosition =
- DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
+ DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
if (usesAdditionalBuffer) {
- mPos += mBuffer->getOriginalBufferSize();
+ mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
- mPosOfLastForwardLinkField = mPos;
+ mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link.
- mPos += forwardLinkPosition;
- nextNodeArray();
+ mReadingState.mPos += forwardLinkPosition;
+ nextPtNodeArray();
} else {
// All node arrays have been read.
- mPos = NOT_A_DICT_POS;
+ mReadingState.mPos = NOT_A_DICT_POS;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index 120fd7699..c6d8ddcf7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -17,6 +17,9 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+#include <cstddef>
+#include <vector>
+
#include "defines.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
@@ -34,12 +37,35 @@ class DictionaryShortcutsStructurePolicy;
*/
class DynamicPatriciaTrieReadingHelper {
public:
+ class TraversingEventListener {
+ public:
+ virtual ~TraversingEventListener() {};
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onAscend() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onDescend(const int ptNodeArrayPos) = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onReadingPtNodeArrayTail() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) = 0;
+
+ protected:
+ TraversingEventListener() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
+ };
+
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
- mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
- mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
+ : mIsError(false), mReadingState(), mBuffer(buffer),
+ mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
~DynamicPatriciaTrieReadingHelper() {}
@@ -48,41 +74,43 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE bool isEnd() const {
- return mPos == NOT_A_DICT_POS;
+ return mReadingState.mPos == NOT_A_DICT_POS;
}
- // Initialize reading state with the head position of a node array.
- AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
- if (nodeArrayPos == NOT_A_DICT_POS) {
- mPos = NOT_A_DICT_POS;
+ // Initialize reading state with the head position of a PtNode array.
+ AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
+ if (ptNodeArrayPos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
} else {
mIsError = false;
- mPos = nodeArrayPos;
- mNodeCount = 0;
- mPrevTotalCodePointCount = 0;
- mTotalNodeCount = 0;
- mNodeArrayCount = 0;
- mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- nextNodeArray();
+ mReadingState.mPos = ptNodeArrayPos;
+ mReadingState.mPrevTotalCodePointCount = 0;
+ mReadingState.mTotalNodeCount = 0;
+ mReadingState.mNodeArrayCount = 0;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ nextPtNodeArray();
if (!isEnd()) {
- fetchNodeInfo();
+ fetchPtNodeInfo();
}
}
}
// Initialize reading state with the head position of a node.
- AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
- if (nodePos == NOT_A_DICT_POS) {
- mPos = NOT_A_DICT_POS;
+ AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
} else {
mIsError = false;
- mPos = nodePos;
- mNodeCount = 1;
- mPrevTotalCodePointCount = 0;
- mTotalNodeCount = 1;
- mNodeArrayCount = 1;
- mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- fetchNodeInfo();
+ mReadingState.mPos = ptNodePos;
+ mReadingState.mNodeCount = 1;
+ mReadingState.mPrevTotalCodePointCount = 0;
+ mReadingState.mTotalNodeCount = 1;
+ mReadingState.mNodeArrayCount = 1;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ fetchPtNodeInfo();
}
}
@@ -100,12 +128,12 @@ class DynamicPatriciaTrieReadingHelper {
// Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
- return mPrevTotalCodePointCount;
+ return mReadingState.mPrevTotalCodePointCount;
}
// Return code point count include the last read node's code points.
AK_FORCE_INLINE int getTotalCodePointCount() const {
- return mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
+ return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
}
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
@@ -121,85 +149,136 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE void readNextSiblingNode() {
- mNodeCount -= 1;
- mPos = mNodeReader.getSiblingNodePos();
- if (mNodeCount <= 0) {
+ mReadingState.mNodeCount -= 1;
+ mReadingState.mPos = mNodeReader.getSiblingNodePos();
+ if (mReadingState.mNodeCount <= 0) {
// All nodes in the current node array have been read.
followForwardLink();
if (!isEnd()) {
- fetchNodeInfo();
+ fetchPtNodeInfo();
}
} else {
- fetchNodeInfo();
+ fetchPtNodeInfo();
}
}
// Read the first child node of the current node.
AK_FORCE_INLINE void readChildNode() {
if (mNodeReader.hasChildren()) {
- mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mTotalNodeCount = 0;
- mNodeArrayCount = 0;
- mPos = mNodeReader.getChildrenPos();
- mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mReadingState.mTotalNodeCount = 0;
+ mReadingState.mNodeArrayCount = 0;
+ mReadingState.mPos = mNodeReader.getChildrenPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
- nextNodeArray();
+ nextPtNodeArray();
if (!isEnd()) {
- fetchNodeInfo();
+ fetchPtNodeInfo();
}
} else {
- mPos = NOT_A_DICT_POS;
+ mReadingState.mPos = NOT_A_DICT_POS;
}
}
// Read the parent node of the current node.
AK_FORCE_INLINE void readParentNode() {
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
- mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mTotalNodeCount = 1;
- mNodeArrayCount = 1;
- mNodeCount = 1;
- mPos = mNodeReader.getParentPos();
- mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- fetchNodeInfo();
+ mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mReadingState.mTotalNodeCount = 1;
+ mReadingState.mNodeArrayCount = 1;
+ mReadingState.mNodeCount = 1;
+ mReadingState.mPos = mNodeReader.getParentPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ fetchPtNodeInfo();
} else {
- mPos = NOT_A_DICT_POS;
+ mReadingState.mPos = NOT_A_DICT_POS;
}
}
AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
- return mPosOfLastForwardLinkField;
+ return mReadingState.mPosOfLastForwardLinkField;
+ }
+
+ AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
+ return mReadingState.mPosOfLastPtNodeArrayHead;
+ }
+
+ AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
}
+ bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
+
+ bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener);
+
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
+ class ReadingState {
+ public:
+ // Note that copy constructor and assignment operator are used for this class to use
+ // std::vector.
+ ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
+ mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
+
+ int mPos;
+ // Node count of a node array.
+ int mNodeCount;
+ int mPrevTotalCodePointCount;
+ int mTotalNodeCount;
+ int mNodeArrayCount;
+ int mPosOfLastForwardLinkField;
+ int mPosOfLastPtNodeArrayHead;
+ };
+
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const size_t MAX_READING_STATE_STACK_SIZE;
bool mIsError;
- int mPos;
- // Node count of a node array.
- int mNodeCount;
- int mPrevTotalCodePointCount;
- int mTotalNodeCount;
- int mNodeArrayCount;
- int mPosOfLastForwardLinkField;
+ ReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
DynamicPatriciaTrieNodeReader mNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
+ std::vector<ReadingState> mReadingStateStack;
- void nextNodeArray();
+ void nextPtNodeArray();
void followForwardLink();
- AK_FORCE_INLINE void fetchNodeInfo() {
- mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH,
- mMergedNodeCodePoints);
+ AK_FORCE_INLINE void fetchPtNodeInfo() {
+ mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
+ MAX_WORD_LENGTH, mMergedNodeCodePoints);
if (mNodeReader.getCodePointCount() <= 0) {
// Empty node is not allowed.
mIsError = true;
- mPos = NOT_A_DICT_POS;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ AK_FORCE_INLINE void pushReadingStateToStack() {
+ if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
+ AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingStateStack.push_back(mReadingState);
+ }
+ }
+
+ AK_FORCE_INLINE void popReadingStateFromStack() {
+ if (mReadingStateStack.empty()) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingState = mReadingStateStack.back();
+ mReadingStateStack.pop_back();
+ fetchPtNodeInfo();
}
}
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 8428c0b15..d68446db6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -28,24 +28,42 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
+// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
+// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
+const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+
/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
const int pos) {
int linkAddressPos = pos;
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
}
-/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
+/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
+/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
+ if (parentOffset == DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
+ return ptNodePos;
+ } else {
+ return parentOffset + ptNodePos;
+ }
+}
+
/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const int base = *pos;
const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
- if (offset == 0) {
- // 0 offset means that the node does not have children.
+ if (offset == DICT_OFFSET_INVALID) {
+ // The PtNode does not have children.
return NOT_A_DICT_POS;
+ } else if (offset == DICT_OFFSET_ZERO_OFFSET) {
+ return base;
} else {
return base + offset;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index db5f9b1bd..67c3cc57e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -27,13 +27,19 @@ class DynamicPatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
+ static const int DICT_OFFSET_INVALID;
+ static const int DICT_OFFSET_ZERO_OFFSET;
+
static int getForwardLinkPosition(const uint8_t *const buffer, const int pos);
static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
return forwardLinkAddress != 0;
}
- static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+ static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getParentPtNodePos(const int parentOffset, const int ptNodePos);
static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 311d31e5d..578645cd5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -17,16 +17,22 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/hash_map_compat.h"
namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
@@ -83,7 +89,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
const int probability) {
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
+ nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
mMergedNodeCodePoints);
// Move node to add bigram entry.
const int newNodePos = mBuffer->getTailPosition();
@@ -91,13 +97,13 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
return false;
}
int writingPos = newNodePos;
- // Write a new PtNode using original PtNode's info to the tail of the dictionary.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(),
+ // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
&writingPos)) {
return false;
}
- nodeReader.fetchNodeInfoFromBuffer(newNodePos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list.
int bigramListPos = nodeReader.getBigramsPos();
@@ -124,13 +130,56 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
// Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBuffer(word0Pos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
}
+void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
+ const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const fileName, const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
+ return;
+ }
+ BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
+ MAX_DICTIONARY_SIZE);
+ if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
+}
+
+bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
+ const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
+ int pos = nodeToUpdate->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */);
+ int writingPos = nodeToUpdate->getHeadPos();
+ // Update flags.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+}
+
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
const int bigramLinkedNodePos) {
@@ -153,9 +202,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
return false;
}
// Update moved position, which is stored in the parent offset field.
- const int movedPosOffset = movedPos - originalNode->getHeadPos();
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- mBuffer, movedPosOffset, &writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
@@ -168,13 +216,12 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
// Update children's parent position.
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos());
+ readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
while (!readingHelper.isEnd()) {
- const int childPtNodeWrittenPos = nodeReader->getHeadPos();
- const int parentOffset = movedPos - childPtNodeWrittenPos;
- int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */;
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- mBuffer, parentOffset, &parentOffsetFieldPos)) {
+ int parentOffsetFieldPos = nodeReader->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
@@ -186,7 +233,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
}
// Write new PtNode at writingPos.
-bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted,
+bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
+ BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
const bool isNotAWord, const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, const int childrenPos,
const int originalBigramListPos, const int originalShortcutListPos,
@@ -194,38 +242,38 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
const int nodePos = *writingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
- writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
+ 0 /* nodeFlags */, writingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
- const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS;
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBuffer,
- parentOffset, writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
+ parentPos, nodePos, writingPos)) {
return false;
}
// Write code points
- if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
- codePointCount, writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
+ codePoints, codePointCount, writingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (probability != NOT_A_PROBABILITY) {
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
probability, writingPos)) {
return false;
}
}
// Write children position
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
childrenPos, writingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (originalShortcutListPos != NOT_A_DICT_POS) {
int fromPos = originalShortcutListPos;
- if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(&fromPos, writingPos)) {
+ if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
+ writingPos)) {
return false;
}
}
@@ -233,7 +281,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
- if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
+ if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
return false;
}
}
@@ -245,27 +293,29 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos,
+bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
+ BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */,
- parentPos, codePoints, codePointCount, probability,
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
+ false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
}
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
+ BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(),
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
originalNode->getChildrenPos(), originalNode->getBigramsPos(),
originalNode->getShortcutPos(), writingPos);
@@ -299,8 +349,9 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
- if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
- codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) {
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
+ originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
+ probability, &movedPos)) {
return false;
}
}
@@ -328,8 +379,8 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
1 /* arraySize */, &writingPos)) {
return false;
}
- if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability,
- &writingPos)) {
+ if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
+ probability, &writingPos)) {
return false;
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@@ -358,8 +409,9 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
- if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints,
- overlappingCodePointCount, newProbability, &writingPos)) {
+ if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
+ reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
+ &writingPos)) {
return false;
}
const int actualChildrenPos = writingPos;
@@ -371,14 +423,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
- if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
+ firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
- if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
+ if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
@@ -396,7 +449,7 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
@@ -406,4 +459,53 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
return true;
}
+bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ BufferWithExtendableBuffer *const bufferToWrite) {
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ this, mBuffer);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(mBigramPolicy);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instance for the GCed dictionary.
+ DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
+ DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
+ // Create reading helper for the GCed dictionary.
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
+ &newDictShortcutPolicy);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ return true;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index 20e35abcf..fe1b2437a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -17,7 +17,10 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#include <stdint.h>
+
#include "defines.h"
+#include "utils/hash_map_compat.h"
namespace latinime {
@@ -26,9 +29,24 @@ class DynamicBigramListPolicy;
class DynamicPatriciaTrieNodeReader;
class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy;
+class HeaderPolicy;
class DynamicPatriciaTrieWritingHelper {
public:
+ typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
+ typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy)
@@ -46,10 +64,27 @@ class DynamicPatriciaTrieWritingHelper {
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
+ void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);
+
+ void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
+ const HeaderPolicy *const headerPolicy);
+
+ // CAVEAT: This method must be called only from inner classes of
+ // DynamicPatriciaTrieGcEventListeners.
+ bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
+
+ // CAVEAT: This method must be called only from this class or inner classes of
+ // DynamicPatriciaTrieGcEventListeners.
+ bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
static const int CHILDREN_POSITION_FIELD_SIZE;
+ static const size_t MAX_DICTIONARY_SIZE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
@@ -58,18 +93,15 @@ class DynamicPatriciaTrieWritingHelper {
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos, const int bigramLinkedNodePos);
- bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
+ bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool isBlacklisted, const bool isNotAWord,
const int parentPos, const int *const codePoints, const int codePointCount,
const int probability, const int childrenPos, const int originalBigramListPos,
const int originalShortcutListPos, int *const writingPos);
- bool writePtNodeToBuffer(const int parentPos, const int *const codePoints,
- const int codePointCount, const int probability, int *const writingPos);
-
- bool writePtNodeToBufferByCopyingPtNodeInfo(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos);
+ bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const int parentPos, const int *const codePoints, const int codePointCount,
+ const int probability, int *const writingPos);
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
@@ -89,6 +121,8 @@ class DynamicPatriciaTrieWritingHelper {
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
+
+ bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
index b261e594d..30ff10cd6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
@@ -36,21 +36,33 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
+ BufferWithExtendableBuffer *const buffer, const int rootPos) {
+ int writingPos = rootPos;
+ if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */,
+ &writingPos);
+}
+
/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos) {
- const int offset = (forwardLinkPos != NOT_A_DICT_POS) ?
- forwardLinkPos - (*forwardLinkFieldPos) : 0;
- return writeDictOffset(buffer, offset, forwardLinkFieldPos);
+ return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const size_t arraySize,
int *const arraySizeFieldPos) {
- if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
+ // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
+ // simplify updating process.
+ // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays.
+ /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE,
arraySizeFieldPos);
- } else if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
+ } else */
+ if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE,
arraySizeFieldPos);
@@ -69,11 +81,10 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
}
// Note that parentOffset is offset from node's head position.
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- BufferWithExtendableBuffer *const buffer, const int parentOffset,
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
int *const parentPosFieldPos) {
- int offset = (parentOffset != NOT_A_DICT_POS) ? parentOffset : 0;
- return writeDictOffset(buffer, offset, parentPosFieldPos);
+ return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
@@ -106,13 +117,19 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
int *const childrenPositionFieldPos) {
- int offset = (childrenPosition != NOT_A_DICT_POS) ?
- childrenPosition - (*childrenPositionFieldPos) : 0;
- return writeDictOffset(buffer, offset, childrenPositionFieldPos);
+ return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
+ childrenPositionFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
- BufferWithExtendableBuffer *const buffer, const int offset, int *const offsetFieldPos) {
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
+ int *const offsetFieldPos) {
+ int offset = targetPos - basePos;
+ if (targetPos == NOT_A_DICT_POS) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else if (offset == 0) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ }
if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
AKLOGI("offset cannot be written because the offset is too large or too small: %d",
offset);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
index 183ede444..af76bc6b5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
@@ -28,6 +28,10 @@ class BufferWithExtendableBuffer;
class DynamicPatriciaTrieWritingUtils {
public:
+ static const int NODE_FLAG_FIELD_SIZE;
+
+ static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos);
+
static bool writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos);
@@ -39,8 +43,8 @@ class DynamicPatriciaTrieWritingUtils {
const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
int *const nodeFlagsFieldPos);
- static bool writeParentOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const int parentPosition, int *const parentPosFieldPos);
+ static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int parentPosition, const int basePos, int *const parentPosFieldPos);
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
@@ -63,11 +67,10 @@ class DynamicPatriciaTrieWritingUtils {
static const int MAX_DICT_OFFSET_VALUE;
static const int MIN_DICT_OFFSET_VALUE;
static const int DICT_OFFSET_NEGATIVE_FLAG;
- static const int NODE_FLAG_FIELD_SIZE;
static const int PROBABILITY_FIELD_SIZE;
- static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset,
- int *const offsetFieldPos);
+ static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
+ const int basePos, int *const offsetFieldPos);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 196da5c97..7bbeacaa0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -17,13 +17,17 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include <cstddef>
+#include <cstdio>
+#include <ctime>
namespace latinime {
+
+// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
-const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f;
+const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
// Used for logging. Question mark is used to indicate that the key is not found.
@@ -35,8 +39,8 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
return;
}
std::vector<int> keyCodePointVector;
- insertCharactersIntoVector(key, &keyCodePointVector);
- HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
+ HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
+ HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
if (it == mAttributeMap.end()) {
// The key was not found.
outValue[0] = '?';
@@ -51,80 +55,77 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
}
float HeaderPolicy::readMultipleWordCostMultiplier() const {
- int attributeValue = 0;
- if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) {
- if (attributeValue <= 0) {
- return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
- }
- return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue);
- } else {
- return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER;
+ std::vector<int> keyVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector);
+ const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
+ if (demotionRate <= 0) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
+ return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
}
bool HeaderPolicy::readUsesForgettingCurveFlag() const {
- int attributeValue = 0;
- if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) {
- return attributeValue != 0;
- } else {
- return false;
- }
+ std::vector<int> keyVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector);
+ return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
+ false /* defaultValue */);
}
-// Returns S_INT_MIN when the key is not found or the value is invalid.
+// Returns current time when the key is not found or the value is invalid.
int HeaderPolicy::readLastUpdatedTime() const {
- int attributeValue = 0;
- if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) {
- return attributeValue;
- } else {
- return S_INT_MIN;
- }
+ std::vector<int> keyVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector);
+ return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
+ time(0) /* defaultValue */);
}
-// Returns whether the key is found or not and stores the found value into outValue.
-bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const {
- std::vector<int> keyVector;
- insertCharactersIntoVector(key, &keyVector);
- HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector);
- if (it == mAttributeMap.end()) {
- // The key was not found.
+bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool updatesLastUpdatedTime) const {
+ int writingPos = 0;
+ if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
+ &writingPos)) {
return false;
}
- *outValue = parseIntAttributeValue(&(it->second));
- return true;
-}
-
-/* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes(
- const uint8_t *const dictBuf) {
- HeaderReadingUtils::AttributeMap attributeMap;
- HeaderReadingUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
- return attributeMap;
-}
-
-/* static */ int HeaderPolicy::parseIntAttributeValue(
- const std::vector<int> *const attributeValue) {
- int value = 0;
- bool isNegative = false;
- for (size_t i = 0; i < attributeValue->size(); ++i) {
- if (i == 0 && attributeValue->at(i) == '-') {
- isNegative = true;
- } else {
- if (!isdigit(attributeValue->at(i))) {
- // If not a number, return S_INT_MIN
- return S_INT_MIN;
- }
- value *= 10;
- value += attributeValue->at(i) - '0';
+ if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Temporarily writes a dummy header size.
+ int headerSizeFieldPos = writingPos;
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */,
+ &writingPos)) {
+ return false;
+ }
+ if (updatesLastUpdatedTime) {
+ // Set current time as a last updated time.
+ HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
+ std::vector<int> updatedTimekey;
+ HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
+ HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0));
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
+ &writingPos)) {
+ return false;
}
+ } else {
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap,
+ &writingPos)) {
+ return false;
+ }
+ }
+ // Writes an actual header size.
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos,
+ &headerSizeFieldPos)) {
+ return false;
}
- return isNegative ? -value : value;
+ return true;
}
-/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters,
- std::vector<int> *const vector) {
- for (int i = 0; characters[i]; ++i) {
- vector->push_back(characters[i]);
- }
+/* static */ HeaderReadWriteUtils::AttributeMap
+ HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
+ HeaderReadWriteUtils::AttributeMap attributeMap;
+ HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
+ return attributeMap;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 930b475c7..e97c08ca4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -17,25 +17,37 @@
#ifndef LATINIME_HEADER_POLICY_H
#define LATINIME_HEADER_POLICY_H
-#include <cctype>
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
- explicit HeaderPolicy(const uint8_t *const dictBuf)
- : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)),
- mSize(HeaderReadingUtils::getHeaderSize(dictBuf)),
- mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)),
+ // Reads information from existing dictionary buffer.
+ HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
+ : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
+ mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
+ mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(readUsesForgettingCurveFlag()),
mLastUpdatedTime(readLastUpdatedTime()) {}
+ // Constructs header information using an attribute map.
+ HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap)
+ : mDictFormatVersion(dictFormatVersion),
+ mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
+ attributeMap)), mSize(0), mAttributeMap(*attributeMap),
+ mMultiWordCostMultiplier(readUsesForgettingCurveFlag()),
+ mUsesForgettingCurve(readUsesForgettingCurveFlag()),
+ mLastUpdatedTime(readLastUpdatedTime()) {}
+
~HeaderPolicy() {}
AK_FORCE_INLINE int getSize() const {
@@ -43,16 +55,15 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
}
AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
+ return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return HeaderReadingUtils::requiresFrenchLigatureProcessing(
- mDictionaryFlags);
+ return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
@@ -70,19 +81,22 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
+ bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool updatesLastUpdatedTime) const;
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const char *const USES_FORGETTING_CURVE_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
- static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER;
+ static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
- const uint8_t *const mDictBuf;
- const HeaderReadingUtils::DictionaryFlags mDictionaryFlags;
+ const FormatUtils::FORMAT_VERSION mDictFormatVersion;
+ const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
const int mSize;
- HeaderReadingUtils::AttributeMap mAttributeMap;
+ HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
const bool mUsesForgettingCurve;
const int mLastUpdatedTime;
@@ -93,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
int readLastUpdatedTime() const;
- bool getAttributeValueAsInt(const char *const key, int *const outValue) const;
-
- static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes(
+ static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
-
- static int parseIntAttributeValue(const std::vector<int> *const attributeValue);
-
- static void insertCharactersIntoVector(
- const char *const characters, std::vector<int> *const vector);
};
} // namespace latinime
#endif /* LATINIME_HEADER_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
new file mode 100644
index 000000000..3b1c78085
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+
+#include <cctype>
+#include <cstdio>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
+
+const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
+const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
+
+const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
+const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
+const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
+ "REQUIRES_GERMAN_UMLAUT_PROCESSING";
+const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
+ "REQUIRES_FRENCH_LIGATURE_PROCESSING";
+
+/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
+ + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
+ return ByteArrayUtils::readUint16(dictBuf,
+ HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ AttributeMap::key_type key;
+ insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key);
+ const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key,
+ false /* defaultValue */);
+ key.clear();
+ insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key);
+ const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key,
+ false /* defaultValue */);
+ key.clear();
+ insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key);
+ const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key,
+ false /* defaultValue */);
+ DictionaryFlags dictflags = NO_FLAGS;
+ dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
+ dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
+ dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
+ return dictflags;
+}
+
+/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
+ AttributeMap *const headerAttributes) {
+ const int headerSize = getHeaderSize(dictBuf);
+ int pos = getHeaderOptionsPosition();
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return;
+ }
+ int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
+ int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
+ while (pos < headerSize) {
+ const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
+ std::vector<int> key;
+ key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
+ const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
+ std::vector<int> value;
+ value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
+ headerAttributes->insert(AttributeMap::value_type(key, value));
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
+ BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
+ int *const writingPos) {
+ if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
+ writingPos)) {
+ return false;
+ }
+ switch (version) {
+ case FormatUtils::VERSION_2:
+ // Version 2 dictionary writing is not supported.
+ return false;
+ case FormatUtils::VERSION_3:
+ return buffer->writeUintAndAdvancePosition(3 /* data */,
+ HEADER_DICTIONARY_VERSION_SIZE, writingPos);
+ default:
+ return false;
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
+ BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
+ int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
+ BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
+ BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
+ int *const writingPos) {
+ for (AttributeMap::const_iterator it = headerAttributes->begin();
+ it != headerAttributes->end(); ++it) {
+ // Write a key.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ // Write a value.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const bool value) {
+ setIntAttribute(headerAttributes, key, value ? 1 : 0);
+}
+
+/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int value) {
+ AttributeMap::mapped_type valueVector;
+ char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
+ snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value);
+ insertCharactersIntoVector(charBuf, &valueVector);
+ (*headerAttributes)[*key] = valueVector;
+}
+
+/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
+ const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
+ const bool defaultValue) {
+ const int intDefaultValue = defaultValue ? 1 : 0;
+ const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
+ return intValue != 0;
+}
+
+/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
+ const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
+ const int defaultValue) {
+ AttributeMap::const_iterator it = headerAttributes->find(*key);
+ if (it != headerAttributes->end()) {
+ int value = 0;
+ bool isNegative = false;
+ for (size_t i = 0; i < it->second.size(); ++i) {
+ if (i == 0 && it->second.at(i) == '-') {
+ isNegative = true;
+ } else {
+ if (!isdigit(it->second.at(i))) {
+ // If not a number.
+ return defaultValue;
+ }
+ value *= 10;
+ value += it->second.at(i) - '0';
+ }
+ }
+ return isNegative ? -value : value;
+ }
+ return defaultValue;
+}
+
+/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
+ std::vector<int> *const vector) {
+ for (int i = 0; characters[i]; ++i) {
+ vector->push_back(characters[i]);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index 5716198fb..caa5097f6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -14,18 +14,21 @@
* limitations under the License.
*/
-#ifndef LATINIME_HEADER_READING_UTILS_H
-#define LATINIME_HEADER_READING_UTILS_H
+#ifndef LATINIME_HEADER_READ_WRITE_UTILS_H
+#define LATINIME_HEADER_READ_WRITE_UTILS_H
#include <map>
#include <stdint.h>
#include <vector>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
-class HeaderReadingUtils {
+class BufferWithExtendableBuffer;
+
+class HeaderReadWriteUtils {
public:
typedef uint16_t DictionaryFlags;
typedef std::map<std::vector<int>, std::vector<int> > AttributeMap;
@@ -51,11 +54,44 @@ class HeaderReadingUtils {
+ HEADER_SIZE_FIELD_SIZE;
}
+ static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap(
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
AttributeMap *const headerAttributes);
+ static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
+ const FormatUtils::FORMAT_VERSION version, int *const writingPos);
+
+ static bool writeDictionaryFlags(BufferWithExtendableBuffer *const buffer,
+ const DictionaryFlags flags, int *const writingPos);
+
+ static bool writeDictionaryHeaderSize(BufferWithExtendableBuffer *const buffer,
+ const int size, int *const writingPos);
+
+ static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
+ const AttributeMap *const headerAttributes, int *const writingPos);
+
+ /**
+ * Methods for header attributes.
+ */
+ static void setBoolAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const bool value);
+
+ static void setIntAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int value);
+
+ static bool readBoolAttributeValue(const AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const bool defaultValue);
+
+ static int readIntAttributeValue(const AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int defaultValue);
+
+ static void insertCharactersIntoVector(const char *const characters,
+ AttributeMap::key_type *const key);
+
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
static const int MAX_ATTRIBUTE_KEY_LENGTH;
static const int MAX_ATTRIBUTE_VALUE_LENGTH;
@@ -72,7 +108,10 @@ class HeaderReadingUtils {
static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
- static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+
+ static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
+ static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
};
}
-#endif /* LATINIME_HEADER_READING_UTILS_H */
+#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
deleted file mode 100644
index 186c043c1..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/header/header_reading_utils.h"
-
-#include <vector>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
-
-namespace latinime {
-
-const int HeaderReadingUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
-const int HeaderReadingUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
-
-const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
-const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
-const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2;
-const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4;
-
-const HeaderReadingUtils::DictionaryFlags HeaderReadingUtils::NO_FLAGS = 0;
-// Flags for special processing
-// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
-// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-
-/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) {
- // See the format of the header in the comment in
- // BinaryDictionaryFormatUtils::detectFormatVersion()
- return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
- + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
-}
-
-/* static */ HeaderReadingUtils::DictionaryFlags
- HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) {
- return ByteArrayUtils::readUint16(dictBuf,
- HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
-}
-
-/* static */ void HeaderReadingUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
- AttributeMap *const headerAttributes) {
- const int headerSize = getHeaderSize(dictBuf);
- int pos = getHeaderOptionsPosition();
- if (pos == NOT_A_DICT_POS) {
- // The header doesn't have header options.
- return;
- }
- int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
- int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
- while (pos < headerSize) {
- const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
- MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
- std::vector<int> key;
- key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
- const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
- MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
- std::vector<int> value;
- value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
- headerAttributes->insert(AttributeMap::value_type(key, value));
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index e6cff431b..8a84bd261 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -31,9 +31,21 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
return;
}
int nextPos = dicNode->getChildrenPos();
+ if (nextPos < 0 || nextPos >= mDictBufferSize) {
+ AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
+ nextPos, mDictBufferSize);
+ ASSERT(false);
+ return;
+ }
const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
mDictRoot, &nextPos);
for (int i = 0; i < childCount; i++) {
+ if (nextPos < 0 || nextPos >= mDictBufferSize) {
+ AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
+ nextPos, mDictBufferSize, i, childCount);
+ ASSERT(false);
+ return;
+ }
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
}
}
@@ -49,7 +61,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node).
/* Parameters :
- * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
+ * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
* outUnigramProbability: a pointer to an int to write the probability into.
@@ -57,7 +69,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
*/
// TODO: Split this function to be more readable
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int nodePos, const int maxCodePointCount, int *const outCodePoints,
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
int pos = getRootPosition();
int wordPos = 0;
@@ -78,7 +90,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
mDictRoot, &pos);
- if (nodePos == startPos) {
+ if (ptNodePos == startPos) {
// We found the position. Copy the rest of the code points in the buffer and return
// the length.
outCodePoints[wordPos] = character;
@@ -121,7 +133,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// Here comes the tricky part. First, read the children position.
const int childrenPos = PatriciaTrieReadingUtils
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
- if (childrenPos > nodePos) {
+ if (childrenPos > ptNodePos) {
// If the children pos is greater than the position, it means the previous
// PtNode, which position is stored in lastCandidatePtNodePos, was the right
// one.
@@ -213,7 +225,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
}
}
- // If we have looked through all the PtNodes and found no match, the nodePos is
+ // If we have looked through all the PtNodes and found no match, the ptNodePos is
// not the position of a terminal in this dictionary.
return 0;
}
@@ -319,11 +331,11 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
}
}
-int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
- if (nodePos == NOT_A_DICT_POS) {
+int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- int pos = nodePos;
+ int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
@@ -341,11 +353,11 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
mDictRoot, &pos), NOT_A_PROBABILITY);
}
-int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_DICT_POS) {
+int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = nodePos;
+ int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
@@ -361,11 +373,11 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
return pos;
}
-int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_DICT_POS) {
+int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = nodePos;
+ int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
@@ -385,8 +397,8 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
}
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
- const int nodePos, DicNodeVector *childDicNodes) const {
- int pos = nodePos;
+ const int ptNodePos, DicNodeVector *childDicNodes) const {
+ int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH];
@@ -404,7 +416,12 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
getBigramsStructurePolicy()->skipAllBigrams(&pos);
}
- childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
+ if (mergedNodeCodePointCount <= 0) {
+ AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
+ ASSERT(false);
+ return pos;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
PatriciaTrieReadingUtils::isTerminal(flags),
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index cee3e4ab2..f1de914cb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -34,8 +34,9 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
~PatriciaTriePolicy() {
@@ -58,11 +59,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getProbability(const int unigramProbability, const int bigramProbability) const;
- int getUnigramProbabilityOfPtNode(const int nodePos) const;
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
- int getShortcutPositionOfNode(const int nodePos) const;
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
- int getBigramsPositionOfNode(const int nodePos) const;
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
@@ -118,10 +119,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
+ const int mDictBufferSize;
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
- int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
+ int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
index 1316b425f..7df55815f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -71,8 +71,17 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
pos);
} else {
- if (maxLength > 0) {
- outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos);
+ const int codePoint = getCodePointAndAdvancePosition(buffer, pos);
+ if (codePoint == NOT_A_CODE_POINT) {
+ // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is
+ // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR
+ // when the PtNode has a single code point.
+ length = 0;
+ AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x",
+ *pos - 1, codePoint, buffer[*pos - 1]);
+ ASSERT(false);
+ } else if (maxLength > 0) {
+ outBuffer[0] = codePoint;
length = 1;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
index 1803c09cb..bd3211f6a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
@@ -31,7 +31,7 @@ namespace latinime {
*/
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
- explicit DynamicShortcutListPolicy(BufferWithExtendableBuffer *const buffer)
+ explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
: mBuffer(buffer) {}
~DynamicShortcutListPolicy() {}
@@ -82,18 +82,20 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
}
}
- // Copy shortcuts from the shortcut list that starts at fromPos to toPos and advance these
- // positions after the shortcut lists. This returns whether the copy was succeeded or not.
- bool copyAllShortcutsAndReturnIfSucceededOrNot(int *const fromPos, int *const toPos) {
+ // Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
+ // bufferToWrite and advance these positions after the shortcut lists. This returns whether
+ // the copy was succeeded or not.
+ bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
+ int *const fromPos, int *const toPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
const int shortcutListSize = ShortcutListReadingUtils
- ::getShortcutListSizeAndForwardPointer(buffer, fromPos);
+ ::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
+ fromPos);
// Copy shortcut list size.
- if (!mBuffer->writeUintAndAdvancePosition(
+ if (!bufferToWrite->writeUintAndAdvancePosition(
shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
return false;
@@ -102,7 +104,7 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
for (int i = 0; i < shortcutListSize; ++i) {
const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
- if (!mBuffer->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
+ if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
return false;
}
}
@@ -115,7 +117,7 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
- BufferWithExtendableBuffer *const mBuffer;
+ const BufferWithExtendableBuffer *const mBuffer;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 0fed275e9..f692882f2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,9 +18,10 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
-const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
+const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
+// TODO: Needs to allocate larger memory corresponding to the current vector size.
+const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
@@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return true;
}
+bool BufferWithExtendableBuffer::extendBuffer() {
+ const size_t sizeAfterExtending =
+ mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+ if (sizeAfterExtending > mMaxAdditionalBufferSize) {
+ return false;
+ }
+ mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
+ return true;
+}
+
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
if (isInAdditionalBuffer(pos)) {
const int tailPosition = getTailPosition();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index c6a484131..17d2e39c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,9 +32,11 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
- BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
+ BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
+ const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
@@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize;
}
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
+ * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
+ }
+
/**
* For writing.
*
@@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
+ static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
uint8_t *const mOriginalBuffer;
const int mOriginalBufferSize;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
+ const size_t mMaxAdditionalBufferSize;
// Return if the buffer is successfully extended or not.
- AK_FORCE_INLINE bool extendBuffer() {
- if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
- > MAX_ADDITIONAL_BUFFER_SIZE) {
- return false;
- }
- mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
- return true;
- }
+ bool extendBuffer();
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
// the additional buffer, try extending the buffer.
- AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size);
+ bool checkAndPrepareWriting(const int pos, const int size);
};
}
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
new file mode 100644
index 000000000..2e4ec2e1d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+
+#include <cstdio>
+#include <cstring>
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+namespace latinime {
+
+const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp";
+
+/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
+ const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ switch (dictVersion) {
+ case 3:
+ return createEmptyV3DictFile(filePath, attributeMap);
+ default:
+ // Only version 3 dictionary is supported for now.
+ return false;
+ }
+}
+
+/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
+ headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */);
+ BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
+ return false;
+ }
+ return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
+}
+
+/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
+ BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
+ const int tmpFileNameBufSize = strlen(filePath)
+ + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
+ // Name of a temporary file used for writing that is a connected string of original name and
+ // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
+ char tmpFileName[tmpFileNameBufSize];
+ snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath,
+ TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ FILE *const file = fopen(tmpFileName, "wb");
+ if (!file) {
+ AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
+ ASSERT(false);
+ return false;
+ }
+ // Write the dictionary header.
+ if (!writeBufferToFile(file, dictHeader)) {
+ remove(tmpFileName);
+ AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ // Write the dictionary body.
+ if (!writeBufferToFile(file, dictBody)) {
+ remove(tmpFileName);
+ AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ fclose(file);
+ rename(tmpFileName, filePath);
+ return true;
+}
+
+// This closes file pointer when an error is caused and returns whether the writing was succeeded
+// or not.
+/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer) {
+ const int originalBufSize = buffer->getOriginalBufferSize();
+ if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
+ originalBufSize, 1, file) < 1) {
+ fclose(file);
+ return false;
+ }
+ const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
+ if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
+ additionalBufSize, 1, file) < 1) {
+ fclose(file);
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
new file mode 100644
index 000000000..bd4ac66fd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H
+#define LATINIME_DICT_FILE_WRITING_UTILS_H
+
+#include <cstdio>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class DictFileWritingUtils {
+ public:
+ static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
+ static bool flushAllHeaderAndBodyToFile(const char *const filePath,
+ BufferWithExtendableBuffer *const dictHeader,
+ BufferWithExtendableBuffer *const dictBody);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
+
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+
+ static bool createEmptyV3DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
+ static bool writeBufferToFile(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer);
+};
+} // namespace latinime
+#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 3796c7b7b..1d77d5c27 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -20,20 +20,10 @@
namespace latinime {
-/**
- * Dictionary size
- */
-// Any file smaller than this is not a dictionary.
-const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
+const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
-/**
- * Format versions
- */
-// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported
-// or obsolete dictionary formats.
-const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
-// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
+// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
+const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
const uint8_t *const dict, const int dictSize) {
@@ -45,16 +35,10 @@ const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
}
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
- case HEADER_VERSION_2_MAGIC_NUMBER:
- // Version 2 header are at least 12 bytes long.
- // If this header has the version 2 magic number but is less than 12 bytes long,
- // then it's an unknown format and we need to avoid confidently reading the next bytes.
- if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
- return UNKNOWN_VERSION;
- }
+ case MAGIC_NUMBER:
// Version 2 header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
- // Version number (2 bytes)
+ // Dictionary format version number (2 bytes)
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
if (ByteArrayUtils::readUint16(dict, 4) == 2) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index f84321577..79ed0de29 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -34,14 +34,16 @@ class FormatUtils {
UNKNOWN_VERSION
};
+ // 32 bit magic number is stored at the beginning of the dictionary header to reject
+ // unsupported or obsolete dictionary formats.
+ static const uint32_t MAGIC_NUMBER;
+
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
- static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
- static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index b6aa85896..9f0a331e3 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -74,7 +74,8 @@ class TypingWeighting : public Weighting {
// Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on
// the keyboard (like accented letters)
const float normalizedSquaredLength = traverseSession->getProximityInfoState(0)
- ->getPointToKeyLength(pointIndex, dicNode->getNodeCodePoint());
+ ->getPointToKeyLength(pointIndex,
+ CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor(
traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength);
const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance;
@@ -113,10 +114,10 @@ class TypingWeighting : public Weighting {
const int16_t parentPointIndex = parentDicNode->getInputIndex(0);
const int prevCodePoint = parentDicNode->getNodeCodePoint();
const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
- parentPointIndex + 1, prevCodePoint);
+ parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint));
const int codePoint = dicNode->getNodeCodePoint();
const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
- parentPointIndex, codePoint);
+ parentPointIndex, CharUtils::toBaseLowerCase(codePoint));
const float distance = distance1 + distance2;
const float weightedLengthDistance =
distance * ScoringParams::DISTANCE_WEIGHT_LENGTH;
@@ -133,7 +134,7 @@ class TypingWeighting : public Weighting {
const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0)
->existsAdjacentProximityChars(insertedPointIndex);
const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
- insertedPointIndex + 1, dicNode->getNodeCodePoint());
+ insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH;
const bool singleChar = dicNode->getNodeCodePointCount() == 1;
float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f);