aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/suggest/policyimpl
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/suggest/policyimpl')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp182
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h102
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp336
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp149
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h178
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp275
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h104
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp215
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h286
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp511
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h128
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp147
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h76
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp131
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h114
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp215
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h117
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp433
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h130
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp133
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h120
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp51
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp103
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h103
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h261
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp107
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp56
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h49
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h102
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h55
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h8
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h24
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h71
-rw-r--r--native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h14
-rw-r--r--native/jni/src/suggest/policyimpl/utils/edit_distance.h20
51 files changed, 6073 insertions, 57 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
new file mode 100644
index 000000000..6ff95cac4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_LIST_POLICY_H
+#define LATINIME_BIGRAM_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+
+namespace latinime {
+
+class BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
+
+ ~BigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const pos) const {
+ BigramListReadWriteUtils::BigramFlags flags;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
+ outBigramPos, pos);
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ }
+
+ void skipAllBigrams(int *const pos) const {
+ BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
+
+ const uint8_t *const mBigramsBuf;
+};
+} // namespace latinime
+#endif // LATINIME_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
new file mode 100644
index 000000000..1926b9831
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
+ 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
+ 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+
+/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
+ int *const outTargetPtNodePos, int *const bigramEntryPos) {
+ const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
+ bigramEntryPos);
+ if (outBigramFlags) {
+ *outBigramFlags = bigramFlags;
+ }
+ const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
+ bigramEntryPos);
+ if (outTargetPtNodePos) {
+ *outTargetPtNodePos = targetPos;
+ }
+}
+
+/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
+ int *const bigramListPos) {
+ BigramFlags flags;
+ do {
+ getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
+ bigramListPos);
+ } while(hasNext(flags));
+}
+
+/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
+ const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
+ int offset = 0;
+ const int origin = *pos;
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
+ break;
+ }
+ if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
+ return origin;
+ }
+ if (isOffsetNegative(flags)) {
+ return origin - offset;
+ } else {
+ return origin + offset;
+ }
+}
+
+/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
+ BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
+ const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
+ int readingPos = entryPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= buffer->getOriginalBufferSize();
+ }
+ BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer->getBuffer(usesAdditionalBuffer), &readingPos);
+ if (hasNext) {
+ bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
+ } else {
+ bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
+ }
+ int writingPos = entryPos;
+ return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
+}
+
+/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
+ const bool hasNext, int *const writingPos) {
+ BigramFlags flags;
+ if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
+ return false;
+ }
+ return writeBigramEntry(buffer, flags, targetPos, writingPos);
+}
+
+/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
+ BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
+ const int targetPtNodePos, int *const writingPos) {
+ const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
+ const BigramFlags flagsToWrite = (offset < 0) ?
+ (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
+ if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
+ return false;
+ }
+ const uint32_t absOffest = abs(offset);
+ const int bigramTargetFieldSize = attributeAddressSize(flags);
+ return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
+ writingPos);
+}
+
+// Returns true if the bigram entry is valid and put entry flags into out*.
+/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
+ const int targetPtNodePos, const int probability, const bool hasNext,
+ BigramFlags *const outBigramFlags) {
+ BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
+ if (hasNext) {
+ flags |= FLAG_ATTRIBUTE_HAS_NEXT;
+ }
+ const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
+ if (offset < 0) {
+ flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ }
+ const uint32_t absOffest = abs(offset);
+ if ((absOffest >> 24) != 0) {
+ // Offset is too large.
+ return false;
+ } else if ((absOffest >> 16) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ } else if ((absOffest >> 8) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ } else {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ }
+ // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
+ // writing.
+ // TODO: Remove following 2 lines and optimize memory space.
+ flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ *outBigramFlags = flags;
+ return true;
+}
+
+/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
+ const int entryPos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else {
+ const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
+ if (offset == 0) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ } else {
+ return offset;
+ }
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
new file mode 100644
index 000000000..eabe4e099
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+#define LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+
+#include <cstdlib>
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class BigramListReadWriteUtils {
+public:
+ typedef uint8_t BigramFlags;
+
+ static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
+ BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
+ int *const bigramEntryPos);
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // Bigrams reading methods
+ static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
+
+ // Returns the size of the bigram position field that is stored in bigram flags.
+ static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+ return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
+ /* Note: this is a value-dependant optimization of what may probably be
+ more readably written this way:
+ switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
+ default: return 0;
+ }
+ */
+ }
+
+ static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
+ const bool hasNext, const int entryPos);
+
+ static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
+ const int probability) {
+ return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
+ }
+
+ static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
+ const int targetPos, const int probability, const bool hasNext, int *const writingPos);
+
+ static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
+ const int targetOffset, int *const writingPos);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
+
+ static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int ATTRIBUTE_ADDRESS_SHIFT;
+
+ // Returns true if the bigram entry is valid and put entry flags into out*.
+ static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
+ const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
+
+ static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+ }
+
+ static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
+ const BigramFlags flags, int *const pos);
+
+ static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
+};
+} // namespace latinime
+#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
new file mode 100644
index 000000000..29307b56a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
+const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
+
+void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *bigramEntryPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int originalBigramPos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
+ &originalBigramPos, bigramEntryPos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
+ if (usesAdditionalBuffer) {
+ *bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
+ int *const fromPos, int *const toPos, int *const outBigramsCount) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
+ if (usesAdditionalBuffer) {
+ *fromPos -= mBuffer->getOriginalBufferSize();
+ }
+ *outBigramsCount = 0;
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ int lastWrittenEntryPos = NOT_A_DICT_POS;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ // The buffer address can be changed after calling buffer writing methods.
+ int originalBigramPos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ fromPos);
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ // skip invalid bigram entry.
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ if (bigramPos == NOT_A_DICT_POS) {
+ // Target PtNode has been invalidated.
+ continue;
+ }
+ lastWrittenEntryPos = *toPos;
+ if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
+ BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
+ return false;
+ }
+ (*outBigramsCount)++;
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // Makes the last entry the terminal of the list. Updates the flags.
+ if (lastWrittenEntryPos != NOT_A_DICT_POS) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
+ lastWrittenEntryPos)) {
+ return false;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ *fromPos += mBuffer->getOriginalBufferSize();
+ }
+ return true;
+}
+
+// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
+// has been deleted or is not a valid terminal.
+bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
+ int *const bigramListPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ bigramListPos);
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ // This entry has already been removed.
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramTargetNodePos =
+ followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
+ // TODO: Update probability for supporting probability decaying.
+ if (nodeReader.isDeleted() || !nodeReader.isTerminal()
+ || bigramTargetNodePos == NOT_A_DICT_POS) {
+ // The target is no longer valid terminal. Invalidate the current bigram entry.
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
+ return false;
+ }
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return true;
+}
+
+// Updates bigram target PtNode positions in the list after the placing step in GC.
+bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ int bigramTargetPtNodePos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
+ bigramListPos);
+ if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
+ }
+
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
+ if (it != ptNodePositionRelocationMap->end()) {
+ bigramTargetPtNodePos = it->second;
+ } else {
+ bigramTargetPtNodePos = NOT_A_DICT_POS;
+ }
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ bigramTargetPtNodePos, &bigramEntryPos)) {
+ return false;
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return true;
+}
+
+bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
+ const int probability, int *const bigramListPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int entryPos = *bigramListPos;
+ if (usesAdditionalBuffer) {
+ entryPos += mBuffer->getOriginalBufferSize();
+ }
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ bigramListPos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
+ // Update this bigram entry.
+ const BigramListReadWriteUtils::BigramFlags updatedFlags =
+ BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
+ return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
+ originalBigramPos, &entryPos);
+ }
+ if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
+ continue;
+ }
+ // The current last entry is found.
+ // First, update the flags of the last entry.
+ if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
+ return false;
+ }
+ if (usesAdditionalBuffer) {
+ *bigramListPos += mBuffer->getOriginalBufferSize();
+ }
+ // Then, add a new entry after the last entry.
+ return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // We return directly from the while loop.
+ ASSERT(false);
+ return false;
+}
+
+bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
+ int *const writingPos) {
+ // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
+ return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
+ probability, false /* hasNext */, writingPos);
+}
+
+bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
+ int pos = bigramListPos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = pos;
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ if (bigramPos != bigramTargetPos) {
+ continue;
+ }
+ // Target entry is found. Write an invalid target position to mark the bigram invalid.
+ return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return false;
+}
+
+int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
+ const int originalBigramPos) const {
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int currentPos = originalBigramPos;
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ int bigramLinkCount = 0;
+ while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
+ currentPos = nodeReader.getBigramLinkedNodePos();
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ bigramLinkCount++;
+ if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
+ AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
+ ASSERT(false);
+ return NOT_A_DICT_POS;
+ }
+ }
+ return currentPos;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
new file mode 100644
index 000000000..8ea318a41
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This is a dynamic version of BigramListPolicy and supports an additional buffer.
+ */
+class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
+
+ ~DynamicBigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const bigramEntryPos) const;
+
+ void skipAllBigrams(int *const bigramListPos) const;
+
+ // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
+ // bufferToWrite and advance these positions after bigram lists. This method skips invalid
+ // bigram entries and write the valid bigram entry count to outBigramsCount.
+ bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
+ int *const toPos, int *const outBigramsCount) const;
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
+
+ bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap);
+
+ bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
+ int *const bigramListPos);
+
+ bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
+ int *const writingPos);
+
+ // Return if targetBigramPos is found or not.
+ bool removeBigram(const int bigramListPos, const int bigramTargetPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
+
+ static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
+ static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
+
+ // Follow bigram link and return the position of bigram target PtNode that is currently valid.
+ int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..ff80dd2f6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable) {
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // impl classes of DictionaryStructureWithBufferPolicy.
+ const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable);
+ if (!mmapedBuffer) {
+ return 0;
+ }
+ switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
+ mmapedBuffer->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return new PatriciaTriePolicy(mmapedBuffer);
+ case FormatUtils::VERSION_3:
+ return new DynamicPatriciaTriePolicy(mmapedBuffer);
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number");
+ delete mmapedBuffer;
+ ASSERT(false);
+ return 0;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
new file mode 100644
index 000000000..8cebc3b16
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicyFactory {
+ public:
+ static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
+ const char *const path, const int bufOffset, const int size, const bool isUpdatable);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
new file mode 100644
index 000000000..c60e45819
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+
+namespace latinime {
+
+bool DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
+ // children.
+ bool isUselessPtNode = !node->isTerminal();
+ if (mChildrenValue > 0) {
+ isUselessPtNode = false;
+ } else if (node->isTerminal()) {
+ // Remove children as all children are useless.
+ int writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
+ return false;
+ }
+ }
+ if (isUselessPtNode) {
+ // Current PtNode is no longer needed. Mark it as deleted.
+ if (!mWritingHelper->markNodeAsDeleted(node)) {
+ return false;
+ }
+ } else {
+ valueStack.back() += 1;
+ }
+ return true;
+}
+
+// Writes dummy PtNode array size when the head of PtNode array is read.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onDescend(const int ptNodeArrayPos) {
+ mValidPtNodeCount = 0;
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
+ ptNodeArrayPos, writingPos));
+ // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
+ // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
+ mPtNodeArraySizeFieldPos = writingPos;
+ return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, 0 /* arraySize */, &writingPos);
+}
+
+// Write PtNode array terminal and actual PtNode array size.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onReadingPtNodeArrayTail() {
+ int writingPos = mBufferToWrite->getTailPosition();
+ // Write PtNode array terminal.
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Write actual PtNode array size.
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (node->isDeleted()) {
+ // Current PtNode is not written in new buffer because it has been deleted.
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), NOT_A_DICT_POS));
+ return true;
+ }
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), writingPos));
+ mValidPtNodeCount++;
+ // Writes current PtNode.
+ return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
+ node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
+ node->getProbability(), &writingPos);
+}
+
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // Updates parent position.
+ int parentPos = node->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
+ parentPos, node->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = node->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
+ childrenPos, &writingPos)) {
+ return false;
+ }
+
+ // Updates bigram target PtNode positions in the bigram list.
+ int bigramsPos = node->getBigramsPos();
+ if (bigramsPos != NOT_A_DICT_POS) {
+ if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
+ &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
new file mode 100644
index 000000000..4256f22fb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class DynamicPatriciaTrieGcEventListeners {
+ public:
+ // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
+ // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
+ // TODO: Concatenate non-terminal PtNodes.
+ class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ BufferWithExtendableBuffer *const buffer)
+ : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
+ mChildrenValue(0) {}
+
+ ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
+
+ bool onAscend() {
+ if (valueStack.empty()) {
+ return false;
+ }
+ mChildrenValue = valueStack.back();
+ valueStack.pop_back();
+ return true;
+ }
+
+ bool onDescend(const int ptNodeArrayPos) {
+ valueStack.push_back(0);
+ return true;
+ }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ BufferWithExtendableBuffer *const mBuffer;
+ std::vector<int> valueStack;
+ int mChildrenValue;
+ };
+
+ // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
+ // entries.
+ class TraversePolicyToUpdateBigramProbability
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
+ : mBigramPolicy(bigramPolicy) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (!node->isDeleted()) {
+ int pos = node->getBigramsPos();
+ if (pos != NOT_A_DICT_POS) {
+ if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
+
+ DynamicBigramListPolicy *const mBigramPolicy;
+ };
+
+ class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
+ mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
+ mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos);
+
+ bool onReadingPtNodeArrayTail();
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
+ int mValidPtNodeCount;
+ int mPtNodeArraySizeFieldPos;
+ };
+
+ class TraversePolicyToUpdateAllPositionFields
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPositionFields(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ DynamicBigramListPolicy *const bigramPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
+ mBufferToWrite(bufferToWrite),
+ mDictPositionRelocationMap(dictPositionRelocationMap) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
+ };
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..456352c17
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ invalidatePtNodeInfo();
+ return;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ mHeadPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
+ if (outCodePoints != 0) {
+ mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
+ } else {
+ mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
+ dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
+ }
+ if (isTerminal()) {
+ mProbabilityFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
+ } else {
+ mProbabilityFieldPos = NOT_A_DICT_POS;
+ mProbability = NOT_A_PROBABILITY;
+ }
+ mChildrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
+ mChildrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (mSiblingPos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
+ mBigramLinkedNodePos = mChildrenPos;
+ } else {
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
+ mShortcutPos = pos;
+ mShortcutsPolicy->skipAllShortcuts(&pos);
+ } else {
+ mShortcutPos = NOT_A_DICT_POS;
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
+ mBigramPos = pos;
+ mBigramsPolicy->skipAllBigrams(&pos);
+ } else {
+ mBigramPos = NOT_A_DICT_POS;
+ }
+ // Update siblingPos if needed.
+ if (mSiblingPos == NOT_A_DICT_POS) {
+ // Sibling position is the tail position of current node.
+ mSiblingPos = pos;
+ }
+ // Read destination node if the read node is a moved node.
+ if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
+ // The destination position is stored at the same place as the parent position.
+ fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
+ outCodePoints);
+ }
+}
+
+void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
+ mHeadPos = NOT_A_DICT_POS;
+ mFlags = 0;
+ mParentPos = NOT_A_DICT_POS;
+ mCodePointCount = 0;
+ mProbabilityFieldPos = NOT_A_DICT_POS;
+ mProbability = NOT_A_PROBABILITY;
+ mChildrenPosFieldPos = NOT_A_DICT_POS;
+ mChildrenPos = NOT_A_DICT_POS;
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ mShortcutPos = NOT_A_DICT_POS;
+ mBigramPos = NOT_A_DICT_POS;
+ mSiblingPos = NOT_A_DICT_POS;
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
new file mode 100644
index 000000000..3b36d425f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
+ * node and reads node attributes.
+ */
+class DynamicPatriciaTrieNodeReader {
+ public:
+ DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
+ mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mSiblingPos(NOT_A_DICT_POS) {}
+
+ ~DynamicPatriciaTrieNodeReader() {}
+
+ // Reads PtNode information from dictionary buffer and updates members with the information.
+ AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
+ fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
+ 0 /* maxCodePointCount */, 0 /* outCodePoints */);
+ }
+
+ AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
+ mSiblingPos = NOT_A_DICT_POS;
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
+ }
+
+ // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
+ int mHeadPos;
+ DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
+ int mParentPos;
+ uint8_t mCodePointCount;
+ int mProbabilityFieldPos;
+ int mProbability;
+ int mChildrenPosFieldPos;
+ int mChildrenPos;
+ int mBigramLinkedNodePos;
+ int mShortcutPos;
+ int mBigramPos;
+ int mSiblingPos;
+
+ void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int maxCodePointCount, int *const outCodePoints);
+
+ void invalidatePtNodeInfo();
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
new file mode 100644
index 000000000..42397c19e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+
+#include "defines.h"
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ while (!readingHelper.isEnd()) {
+ childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
+ nodeReader->getChildrenPos(), nodeReader->getProbability(),
+ nodeReader->isTerminal() && !nodeReader->isDeleted(),
+ nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
+ nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
+ readingHelper.readNextSiblingNode();
+ }
+}
+
+int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ // First, read the terminal node and get its probability.
+ readingHelper.initWithPtNodePos(ptNodePos);
+ if (!readingHelper.isValidTerminalNode()) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ while (!readingHelper.isEnd()) {
+ if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ readingHelper.fetchMergedNodeCodePointsInReverseOrder(
+ readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
+ // Follow parent node toward the root node.
+ readingHelper.readParentNode();
+ }
+ if (readingHelper.isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ const int codePointCount = readingHelper.getTotalCodePointCount();
+ for (int i = 0; i < codePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
+ }
+ return codePointCount;
+}
+
+int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ while (!readingHelper.isEnd()) {
+ const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
+ if (readingHelper.getTotalCodePointCount() > length
+ || !readingHelper.isMatchedCodePoint(0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readingHelper.readNextSiblingNode();
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = nodeReader->getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!readingHelper.isMatchedCodePoint(
+ j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == readingHelper.getTotalCodePointCount()) {
+ // Terminal position is found.
+ return nodeReader->getHeadPos();
+ }
+ if (!nodeReader->hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readingHelper.readChildNode();
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
+int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ // TODO: check mHeaderPolicy.usesForgettingCurve();
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+}
+
+int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
+}
+
+int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (nodeReader.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return nodeReader.getShortcutPos();
+}
+
+int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (nodeReader.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return nodeReader.getBigramsPos();
+}
+
+bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
+ const int probability) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ return false;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
+}
+
+bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1, const int probability) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ return writingHelper.addBigramWords(word0Pos, word1Pos, probability);
+}
+
+bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ return writingHelper.removeBigramWords(word0Pos, word1Pos);
+}
+
+void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFile(filePath, &mHeaderPolicy);
+}
+
+void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
+}
+
+bool DynamicPatriciaTriePolicy::needsToRunGC() const {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ // TODO: Implement more properly.
+ return mBufferWithExtendableBuffer.isNearSizeLimit();
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
new file mode 100644
index 000000000..06d8095d8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
+ mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
+ mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mShortcutListPolicy(&mBufferWithExtendableBuffer),
+ mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {}
+
+ ~DynamicPatriciaTriePolicy() {
+ delete mBuffer;
+ }
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramListPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability);
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability);
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1);
+
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC() const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
+
+ const MmappedBuffer *const mBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mBufferWithExtendableBuffer;
+ DynamicShortcutListPolicy mShortcutListPolicy;
+ DynamicBigramListPolicy mBigramListPolicy;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
new file mode 100644
index 000000000..f4a2ef389
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+// To avoid infinite loop caused by invalid or malicious forward links.
+const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+
+// Visits all PtNodes in post-order depth first manner.
+// For example, visits c -> b -> y -> x -> a for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ while (!isEnd()) {
+ if (!alreadyVisitedChildren) {
+ if (mNodeReader.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode();
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ return false;
+ }
+ readNextSiblingNode();
+ if (isEnd()) {
+ // All PtNodes in current linked PtNode arrays have been visited.
+ // Return to the parent.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ if (!listener->onAscend()) {
+ return false;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ } else {
+ // Process sibling PtNode.
+ alreadyVisitedChildren = false;
+ }
+ }
+ }
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
+// that PtNodes are written in the dictionary buffer.
+// For example, visits a -> b -> x -> c -> y for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedAllPtNodesInArray = false;
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ while (!isEnd()) {
+ if (alreadyVisitedAllPtNodesInArray) {
+ if (alreadyVisitedChildren) {
+ // Move to next sibling PtNode's children.
+ readNextSiblingNode();
+ if (isEnd()) {
+ // Return to the parent PTNode.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ alreadyVisitedAllPtNodesInArray = true;
+ } else {
+ alreadyVisitedChildren = false;
+ }
+ } else {
+ if (mNodeReader.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode();
+ // Push state to return the head of PtNode array.
+ pushReadingStateToStack();
+ alreadyVisitedAllPtNodesInArray = false;
+ alreadyVisitedChildren = false;
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ return false;
+ }
+ readNextSiblingNode();
+ if (isEnd()) {
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ // Return to the head of current PtNode array.
+ popReadingStateFromStack();
+ alreadyVisitedAllPtNodesInArray = true;
+ }
+ }
+ }
+ popReadingStateFromStack();
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
+// method to avoid an infinite loop.
+void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
+ mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ mReadingState.mPos -= mBuffer->getOriginalBufferSize();
+ }
+ mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &mReadingState.mPos);
+ if (usesAdditionalBuffer) {
+ mReadingState.mPos += mBuffer->getOriginalBufferSize();
+ }
+ // Count up nodes and node arrays to avoid infinite loop.
+ mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
+ mReadingState.mNodeArrayCount++;
+ if (mReadingState.mNodeCount < 0
+ || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ // Invalid dictionary.
+ AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
+ "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
+ mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
+ MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ if (mReadingState.mNodeCount == 0) {
+ // Empty node array. Try following forward link.
+ followForwardLink();
+ }
+}
+
+// Follow the forward link and read the next node array if exists.
+void DynamicPatriciaTrieReadingHelper::followForwardLink() {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ mReadingState.mPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int forwardLinkPosition =
+ DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
+ if (usesAdditionalBuffer) {
+ mReadingState.mPos += mBuffer->getOriginalBufferSize();
+ }
+ mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
+ if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
+ // Follow the forward link.
+ mReadingState.mPos += forwardLinkPosition;
+ nextPtNodeArray();
+ } else {
+ // All node arrays have been read.
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
new file mode 100644
index 000000000..c6d8ddcf7
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+
+#include <cstddef>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
+ * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
+ */
+class DynamicPatriciaTrieReadingHelper {
+ public:
+ class TraversingEventListener {
+ public:
+ virtual ~TraversingEventListener() {};
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onAscend() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onDescend(const int ptNodeArrayPos) = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onReadingPtNodeArrayTail() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) = 0;
+
+ protected:
+ TraversingEventListener() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
+ };
+
+ DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mIsError(false), mReadingState(), mBuffer(buffer),
+ mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
+
+ ~DynamicPatriciaTrieReadingHelper() {}
+
+ AK_FORCE_INLINE bool isError() const {
+ return mIsError;
+ }
+
+ AK_FORCE_INLINE bool isEnd() const {
+ return mReadingState.mPos == NOT_A_DICT_POS;
+ }
+
+ // Initialize reading state with the head position of a PtNode array.
+ AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
+ if (ptNodeArrayPos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mReadingState.mPos = ptNodeArrayPos;
+ mReadingState.mPrevTotalCodePointCount = 0;
+ mReadingState.mTotalNodeCount = 0;
+ mReadingState.mNodeArrayCount = 0;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ nextPtNodeArray();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
+ }
+ }
+
+ // Initialize reading state with the head position of a node.
+ AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mReadingState.mPos = ptNodePos;
+ mReadingState.mNodeCount = 1;
+ mReadingState.mPrevTotalCodePointCount = 0;
+ mReadingState.mTotalNodeCount = 1;
+ mReadingState.mNodeArrayCount = 1;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ fetchPtNodeInfo();
+ }
+ }
+
+ AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
+ return &mNodeReader;
+ }
+
+ AK_FORCE_INLINE bool isValidTerminalNode() const {
+ return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
+ }
+
+ AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
+ return mMergedNodeCodePoints[index] == codePoint;
+ }
+
+ // Return code point count exclude the last read node's code points.
+ AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
+ return mReadingState.mPrevTotalCodePointCount;
+ }
+
+ // Return code point count include the last read node's code points.
+ AK_FORCE_INLINE int getTotalCodePointCount() const {
+ return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
+ }
+
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
+ const int index, int *const outCodePoints) const {
+ const int nodeCodePointCount = mNodeReader.getCodePointCount();
+ for (int i = 0; i < nodeCodePointCount; ++i) {
+ outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
+ }
+ }
+
+ AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
+ return mMergedNodeCodePoints;
+ }
+
+ AK_FORCE_INLINE void readNextSiblingNode() {
+ mReadingState.mNodeCount -= 1;
+ mReadingState.mPos = mNodeReader.getSiblingNodePos();
+ if (mReadingState.mNodeCount <= 0) {
+ // All nodes in the current node array have been read.
+ followForwardLink();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
+ } else {
+ fetchPtNodeInfo();
+ }
+ }
+
+ // Read the first child node of the current node.
+ AK_FORCE_INLINE void readChildNode() {
+ if (mNodeReader.hasChildren()) {
+ mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mReadingState.mTotalNodeCount = 0;
+ mReadingState.mNodeArrayCount = 0;
+ mReadingState.mPos = mNodeReader.getChildrenPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ // Read children node array.
+ nextPtNodeArray();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
+ } else {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ // Read the parent node of the current node.
+ AK_FORCE_INLINE void readParentNode() {
+ if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mReadingState.mTotalNodeCount = 1;
+ mReadingState.mNodeArrayCount = 1;
+ mReadingState.mNodeCount = 1;
+ mReadingState.mPos = mNodeReader.getParentPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ fetchPtNodeInfo();
+ } else {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
+ return mReadingState.mPosOfLastForwardLinkField;
+ }
+
+ AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
+ return mReadingState.mPosOfLastPtNodeArrayHead;
+ }
+
+ AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
+ }
+
+ bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
+
+ bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
+
+ class ReadingState {
+ public:
+ // Note that copy constructor and assignment operator are used for this class to use
+ // std::vector.
+ ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
+ mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
+
+ int mPos;
+ // Node count of a node array.
+ int mNodeCount;
+ int mPrevTotalCodePointCount;
+ int mTotalNodeCount;
+ int mNodeArrayCount;
+ int mPosOfLastForwardLinkField;
+ int mPosOfLastPtNodeArrayHead;
+ };
+
+ static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const size_t MAX_READING_STATE_STACK_SIZE;
+
+ bool mIsError;
+ ReadingState mReadingState;
+ const BufferWithExtendableBuffer *const mBuffer;
+ DynamicPatriciaTrieNodeReader mNodeReader;
+ int mMergedNodeCodePoints[MAX_WORD_LENGTH];
+ std::vector<ReadingState> mReadingStateStack;
+
+ void nextPtNodeArray();
+
+ void followForwardLink();
+
+ AK_FORCE_INLINE void fetchPtNodeInfo() {
+ mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
+ MAX_WORD_LENGTH, mMergedNodeCodePoints);
+ if (mNodeReader.getCodePointCount() <= 0) {
+ // Empty node is not allowed.
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ AK_FORCE_INLINE void pushReadingStateToStack() {
+ if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
+ AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingStateStack.push_back(mReadingState);
+ }
+ }
+
+ AK_FORCE_INLINE void popReadingStateFromStack() {
+ if (mReadingStateStack.empty()) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingState = mReadingStateStack.back();
+ mReadingStateStack.pop_back();
+ fetchPtNodeInfo();
+ }
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..d68446db6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+typedef DynamicPatriciaTrieReadingUtils DptReadingUtils;
+
+const DptReadingUtils::NodeFlags DptReadingUtils::MASK_MOVED = 0xC0;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+
+// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
+// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
+// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
+const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+
+/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
+ const int pos) {
+ int linkAddressPos = pos;
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
+}
+
+/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
+ if (parentOffset == DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
+ return ptNodePos;
+ } else {
+ return parentOffset + ptNodePos;
+ }
+}
+
+/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const int base = *pos;
+ const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ if (offset == DICT_OFFSET_INVALID) {
+ // The PtNode does not have children.
+ return NOT_A_DICT_POS;
+ } else if (offset == DICT_OFFSET_ZERO_OFFSET) {
+ return base;
+ } else {
+ return base + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..67c3cc57e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class DynamicPatriciaTrieReadingUtils {
+ public:
+ typedef uint8_t NodeFlags;
+
+ static const int DICT_OFFSET_INVALID;
+ static const int DICT_OFFSET_ZERO_OFFSET;
+
+ static int getForwardLinkPosition(const uint8_t *const buffer, const int pos);
+
+ static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
+ return forwardLinkAddress != 0;
+ }
+
+ static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getParentPtNodePos(const int parentOffset, const int ptNodePos);
+
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ /**
+ * Node Flags
+ */
+ static AK_FORCE_INLINE bool isMoved(const NodeFlags flags) {
+ return FLAG_IS_MOVED == (MASK_MOVED & flags);
+ }
+
+ static AK_FORCE_INLINE bool isDeleted(const NodeFlags flags) {
+ return FLAG_IS_DELETED == (MASK_MOVED & flags);
+ }
+
+ static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags,
+ const bool isMoved, const bool isDeleted) {
+ NodeFlags flags = originalFlags;
+ flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags;
+ flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags;
+ flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
+ return flags;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils);
+
+ static const NodeFlags MASK_MOVED;
+ static const NodeFlags FLAG_IS_NOT_MOVED;
+ static const NodeFlags FLAG_IS_MOVED;
+ static const NodeFlags FLAG_IS_DELETED;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..578645cd5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -0,0 +1,511 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
+
+bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
+ DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode();
+ continue;
+ }
+ // Check following merged node code points.
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
+ const int nodeCodePointCount = nodeReader->getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ return reallocatePtNodeAndAddNewPtNodes(nodeReader,
+ readingHelper->getMergedNodeCodePoints(), j, probability,
+ wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount()) {
+ return setPtNodeProbability(nodeReader, probability,
+ readingHelper->getMergedNodeCodePoints());
+ }
+ if (!nodeReader->hasChildren()) {
+ return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, probability,
+ wordCodePoints + readingHelper->getTotalCodePointCount(),
+ codePointCount - readingHelper->getTotalCodePointCount());
+ }
+ // Advance to the children nodes.
+ parentPos = nodeReader->getHeadPos();
+ readingHelper->readChildNode();
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ return createAndInsertNodeIntoPtNodeArray(parentPos,
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ probability, &pos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
+ const int probability) {
+ int mMergedNodeCodePoints[MAX_WORD_LENGTH];
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
+ mMergedNodeCodePoints);
+ // Move node to add bigram entry.
+ const int newNodePos = mBuffer->getTailPosition();
+ if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
+ return false;
+ }
+ int writingPos = newNodePos;
+ // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
+ mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
+ &writingPos)) {
+ return false;
+ }
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
+ if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
+ // Insert a new bigram entry into the existing bigram list.
+ int bigramListPos = nodeReader.getBigramsPos();
+ return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos);
+ } else {
+ // The PtNode doesn't have a bigram list.
+ // First, Write a bigram entry at the tail position of the PtNode.
+ if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
+ return false;
+ }
+ // Then, Mark as the PtNode having bigram list in the flags.
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
+ nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
+ nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
+ nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
+ writingPos = newNodePos;
+ // Write updated flags into the moved PtNode's flags field.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+ }
+}
+
+// Remove a bigram relation from word0Pos to word1Pos.
+bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
+ if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
+ return false;
+ }
+ return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
+ const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const fileName, const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
+ return;
+ }
+ BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
+ MAX_DICTIONARY_SIZE);
+ if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
+}
+
+bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
+ const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
+ int pos = nodeToUpdate->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */);
+ int writingPos = nodeToUpdate->getHeadPos();
+ // Update flags.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
+ const int bigramLinkedNodePos) {
+ int pos = originalNode->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */);
+ int writingPos = originalNode->getHeadPos();
+ // Update flags.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ // Update bigram linked node position, which is stored in the children position field.
+ int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
+ return false;
+ }
+ if (originalNode->hasChildren()) {
+ // Update children's parent position.
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
+ while (!readingHelper.isEnd()) {
+ int parentOffsetFieldPos = nodeReader->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ readingHelper.readNextSiblingNode();
+ }
+ }
+ return true;
+}
+
+// Write new PtNode at writingPos.
+bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
+ BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
+ const bool isNotAWord, const int parentPos, const int *const codePoints,
+ const int codePointCount, const int probability, const int childrenPos,
+ const int originalBigramListPos, const int originalShortcutListPos,
+ int *const writingPos) {
+ const int nodePos = *writingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
+ 0 /* nodeFlags */, writingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
+ parentPos, nodePos, writingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
+ codePoints, codePointCount, writingPos)) {
+ return false;
+ }
+ // Write probability when the probability is a valid probability, which means this node is
+ // terminal.
+ if (probability != NOT_A_PROBABILITY) {
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
+ probability, writingPos)) {
+ return false;
+ }
+ }
+ // Write children position
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
+ childrenPos, writingPos)) {
+ return false;
+ }
+ // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
+ if (originalShortcutListPos != NOT_A_DICT_POS) {
+ int fromPos = originalShortcutListPos;
+ if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
+ writingPos)) {
+ return false;
+ }
+ }
+ // Copy bigram list when the originalBigramListPos is valid dictionary position.
+ int bigramCount = 0;
+ if (originalBigramListPos != NOT_A_DICT_POS) {
+ int fromPos = originalBigramListPos;
+ if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
+ return false;
+ }
+ }
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
+ probability != NOT_A_PROBABILITY /* isTerminal */,
+ originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
+ bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ int flagsFieldPos = nodePos;
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
+ &flagsFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
+ BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos) {
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
+ false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
+ NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
+ NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
+ BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos) {
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
+ originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
+ originalNode->getChildrenPos(), originalNode->getBigramsPos(),
+ originalNode->getShortcutPos(), writingPos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
+ const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
+ int *const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
+ probability);
+}
+
+bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
+ const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
+ const int *const codePoints) {
+ if (originalPtNode->isTerminal()) {
+ // Overwrites the probability.
+ int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
+ probability, &probabilityFieldPos)) {
+ return false;
+ }
+ } else {
+ // Make the node terminal and write the probability.
+ int movedPos = mBuffer->getTailPosition();
+ if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
+ return false;
+ }
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
+ originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
+ probability, &movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
+ const int *const codePoints, const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, &childrenPosFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
+ codePointCount, probability);
+}
+
+bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
+ const int probability) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
+ probability, &writingPos)) {
+ return false;
+ }
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
+ const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
+ const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be updated later
+ // with actual children position.
+ const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
+ if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
+ reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
+ &writingPos)) {
+ return false;
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
+ firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeCodePoints + overlappingCodePointCount,
+ reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNode->getProbability(), &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
+ newNodeCodePoints + overlappingCodePointCount,
+ newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
+ &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocatingPtNode as moved.
+ if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
+ // Update children position.
+ int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ actualChildrenPos, &childrenPosFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ BufferWithExtendableBuffer *const bufferToWrite) {
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ this, mBuffer);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(mBigramPolicy);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instance for the GCed dictionary.
+ DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
+ DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
+ // Create reading helper for the GCed dictionary.
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
+ &newDictShortcutPolicy);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..fe1b2437a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicBigramListPolicy;
+class DynamicPatriciaTrieNodeReader;
+class DynamicPatriciaTrieReadingHelper;
+class DynamicShortcutListPolicy;
+class HeaderPolicy;
+
+class DynamicPatriciaTrieWritingHelper {
+ public:
+ typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
+ typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
+ DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
+ DynamicBigramListPolicy *const bigramPolicy,
+ DynamicShortcutListPolicy *const shortcutPolicy)
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+
+ ~DynamicPatriciaTrieWritingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability);
+
+ // Add a bigram relation from word0Pos to word1Pos.
+ bool addBigramWords(const int word0Pos, const int word1Pos, const int probability);
+
+ // Remove a bigram relation from word0Pos to word1Pos.
+ bool removeBigramWords(const int word0Pos, const int word1Pos);
+
+ void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);
+
+ void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
+ const HeaderPolicy *const headerPolicy);
+
+ // CAVEAT: This method must be called only from inner classes of
+ // DynamicPatriciaTrieGcEventListeners.
+ bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
+
+ // CAVEAT: This method must be called only from this class or inner classes of
+ // DynamicPatriciaTrieGcEventListeners.
+ bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+ static const size_t MAX_DICTIONARY_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ DynamicShortcutListPolicy *const mShortcutPolicy;
+
+ bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool isBlacklisted, const bool isNotAWord,
+ const int parentPos, const int *const codePoints, const int codePointCount,
+ const int probability, const int childrenPos, const int originalBigramListPos,
+ const int originalShortcutListPos, int *const writingPos);
+
+ bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const int parentPos, const int *const codePoints, const int codePointCount,
+ const int probability, int *const writingPos);
+
+ bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
+
+ bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
+ const int probability, const int *const codePoints);
+
+ bool createChildrenPtNodeArrayAndAChildPtNode(
+ const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
+ const int *const codePoints, const int codePointCount);
+
+ bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const int probability);
+
+ bool reallocatePtNodeAndAddNewPtNodes(
+ const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
+ const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
+ const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount);
+
+ bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
new file mode 100644
index 000000000..30ff10cd6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+
+#include <cstddef>
+#include <cstdlib>
+#include <stdint.h>
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
+const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
+const int DynamicPatriciaTrieWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
+const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
+const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
+const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
+const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
+const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
+const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
+const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
+const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
+ BufferWithExtendableBuffer *const buffer, const int rootPos) {
+ int writingPos = rootPos;
+ if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */,
+ &writingPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
+ int *const forwardLinkFieldPos) {
+ return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const size_t arraySize,
+ int *const arraySizeFieldPos) {
+ // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
+ // simplify updating process.
+ // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays.
+ /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
+ return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE,
+ arraySizeFieldPos);
+ } else */
+ if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
+ uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
+ return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE,
+ arraySizeFieldPos);
+ } else {
+ AKLOGI("PtNode array size cannot be written because arraySize is too large: %zd",
+ arraySize);
+ ASSERT(false);
+ return false;
+ }
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer,
+ const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
+ return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos);
+}
+
+// Note that parentOffset is offset from node's head position.
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
+ int *const parentPosFieldPos) {
+ return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int *const codePoints,
+ const int codePointCount, int *const codePointFieldPos) {
+ if (codePointCount <= 0) {
+ AKLOGI("code points cannot be written because codePointCount is invalid: %d",
+ codePointCount);
+ ASSERT(false);
+ return false;
+ }
+ const bool hasMultipleCodePoints = codePointCount > 1;
+ return buffer->writeCodePointsAndAdvancePosition(codePoints, codePointCount,
+ hasMultipleCodePoints, codePointFieldPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int probability,
+ int *const probabilityFieldPos) {
+ if (probability < 0 || probability > MAX_PROBABILITY) {
+ AKLOGI("probability cannot be written because the probability is invalid: %d",
+ probability);
+ ASSERT(false);
+ return false;
+ }
+ return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE,
+ probabilityFieldPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int childrenPosition,
+ int *const childrenPositionFieldPos) {
+ return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
+ childrenPositionFieldPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
+ int *const offsetFieldPos) {
+ int offset = targetPos - basePos;
+ if (targetPos == NOT_A_DICT_POS) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else if (offset == 0) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ }
+ if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
+ AKLOGI("offset cannot be written because the offset is too large or too small: %d",
+ offset);
+ ASSERT(false);
+ return false;
+ }
+ uint32_t data = 0;
+ if (offset >= 0) {
+ data = offset;
+ } else {
+ data = abs(offset) | DICT_OFFSET_NEGATIVE_FLAG;
+ }
+ return buffer->writeUintAndAdvancePosition(data, DICT_OFFSET_FIELD_SIZE, offsetFieldPos);
+}
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
new file mode 100644
index 000000000..af76bc6b5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
+
+#include <cstddef>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class DynamicPatriciaTrieWritingUtils {
+ public:
+ static const int NODE_FLAG_FIELD_SIZE;
+
+ static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos);
+
+ static bool writeForwardLinkPositionAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
+ int *const forwardLinkFieldPos);
+
+ static bool writePtNodeArraySizeAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const size_t arraySize, int *const arraySizeFieldPos);
+
+ static bool writeFlagsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
+ int *const nodeFlagsFieldPos);
+
+ static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int parentPosition, const int basePos, int *const parentPosFieldPos);
+
+ static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
+
+ static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int probability, int *const probabilityFieldPos);
+
+ static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int childrenPosition, int *const childrenPositionFieldPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingUtils);
+
+ static const size_t MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD;
+ static const size_t MAX_PTNODE_ARRAY_SIZE;
+ static const int SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE;
+ static const int LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE;
+ static const int LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
+ static const int DICT_OFFSET_FIELD_SIZE;
+ static const int MAX_DICT_OFFSET_VALUE;
+ static const int MIN_DICT_OFFSET_VALUE;
+ static const int DICT_OFFSET_NEGATIVE_FLAG;
+ static const int PROBABILITY_FIELD_SIZE;
+
+ static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
+ const int basePos, int *const offsetFieldPos);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
new file mode 100644
index 000000000..7bbeacaa0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+
+#include <cstddef>
+#include <cstdio>
+#include <ctime>
+
+namespace latinime {
+
+
+// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
+const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
+const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
+const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
+const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
+const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
+
+// Used for logging. Question mark is used to indicate that the key is not found.
+void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
+ int outValueSize) const {
+ if (outValueSize <= 0) return;
+ if (outValueSize == 1) {
+ outValue[0] = '\0';
+ return;
+ }
+ std::vector<int> keyCodePointVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
+ HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
+ if (it == mAttributeMap.end()) {
+ // The key was not found.
+ outValue[0] = '?';
+ outValue[1] = '\0';
+ return;
+ }
+ const int terminalIndex = min(static_cast<int>(it->second.size()), outValueSize - 1);
+ for (int i = 0; i < terminalIndex; ++i) {
+ outValue[i] = it->second[i];
+ }
+ outValue[terminalIndex] = '\0';
+}
+
+float HeaderPolicy::readMultipleWordCostMultiplier() const {
+ std::vector<int> keyVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector);
+ const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
+ if (demotionRate <= 0) {
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ }
+ return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
+}
+
+bool HeaderPolicy::readUsesForgettingCurveFlag() const {
+ std::vector<int> keyVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector);
+ return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
+ false /* defaultValue */);
+}
+
+// Returns current time when the key is not found or the value is invalid.
+int HeaderPolicy::readLastUpdatedTime() const {
+ std::vector<int> keyVector;
+ HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector);
+ return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
+ time(0) /* defaultValue */);
+}
+
+bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool updatesLastUpdatedTime) const {
+ int writingPos = 0;
+ if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
+ &writingPos)) {
+ return false;
+ }
+ if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Temporarily writes a dummy header size.
+ int headerSizeFieldPos = writingPos;
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */,
+ &writingPos)) {
+ return false;
+ }
+ if (updatesLastUpdatedTime) {
+ // Set current time as a last updated time.
+ HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
+ std::vector<int> updatedTimekey;
+ HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
+ HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0));
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
+ &writingPos)) {
+ return false;
+ }
+ } else {
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap,
+ &writingPos)) {
+ return false;
+ }
+ }
+ // Writes an actual header size.
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos,
+ &headerSizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+/* static */ HeaderReadWriteUtils::AttributeMap
+ HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
+ HeaderReadWriteUtils::AttributeMap attributeMap;
+ HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
+ return attributeMap;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
new file mode 100644
index 000000000..e97c08ca4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HEADER_POLICY_H
+#define LATINIME_HEADER_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+namespace latinime {
+
+class HeaderPolicy : public DictionaryHeaderStructurePolicy {
+ public:
+ // Reads information from existing dictionary buffer.
+ HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
+ : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
+ mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
+ mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mUsesForgettingCurve(readUsesForgettingCurveFlag()),
+ mLastUpdatedTime(readLastUpdatedTime()) {}
+
+ // Constructs header information using an attribute map.
+ HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap)
+ : mDictFormatVersion(dictFormatVersion),
+ mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
+ attributeMap)), mSize(0), mAttributeMap(*attributeMap),
+ mMultiWordCostMultiplier(readUsesForgettingCurveFlag()),
+ mUsesForgettingCurve(readUsesForgettingCurveFlag()),
+ mLastUpdatedTime(readLastUpdatedTime()) {}
+
+ ~HeaderPolicy() {}
+
+ AK_FORCE_INLINE int getSize() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE bool supportsDynamicUpdate() const {
+ return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
+ return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
+ return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
+ return mMultiWordCostMultiplier;
+ }
+
+ AK_FORCE_INLINE bool usesForgettingCurve() const {
+ return mUsesForgettingCurve;
+ }
+
+ AK_FORCE_INLINE int getLastUpdatedTime() const {
+ return mLastUpdatedTime;
+ }
+
+ void readHeaderValueOrQuestionMark(const char *const key,
+ int *outValue, int outValueSize) const;
+
+ bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool updatesLastUpdatedTime) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
+
+ static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
+ static const char *const USES_FORGETTING_CURVE_KEY;
+ static const char *const LAST_UPDATED_TIME_KEY;
+ static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
+ static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
+
+ const FormatUtils::FORMAT_VERSION mDictFormatVersion;
+ const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
+ const int mSize;
+ HeaderReadWriteUtils::AttributeMap mAttributeMap;
+ const float mMultiWordCostMultiplier;
+ const bool mUsesForgettingCurve;
+ const int mLastUpdatedTime;
+
+ float readMultipleWordCostMultiplier() const;
+
+ bool readUsesForgettingCurveFlag() const;
+
+ int readLastUpdatedTime() const;
+
+ static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
+ const uint8_t *const dictBuf);
+};
+} // namespace latinime
+#endif /* LATINIME_HEADER_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
new file mode 100644
index 000000000..3b1c78085
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+
+#include <cctype>
+#include <cstdio>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
+
+const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
+const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
+const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
+
+const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
+const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
+const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
+ "REQUIRES_GERMAN_UMLAUT_PROCESSING";
+const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
+ "REQUIRES_FRENCH_LIGATURE_PROCESSING";
+
+/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
+ // See the format of the header in the comment in
+ // BinaryDictionaryFormatUtils::detectFormatVersion()
+ return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
+ + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
+ return ByteArrayUtils::readUint16(dictBuf,
+ HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
+}
+
+/* static */ HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ AttributeMap::key_type key;
+ insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key);
+ const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key,
+ false /* defaultValue */);
+ key.clear();
+ insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key);
+ const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key,
+ false /* defaultValue */);
+ key.clear();
+ insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key);
+ const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key,
+ false /* defaultValue */);
+ DictionaryFlags dictflags = NO_FLAGS;
+ dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
+ dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
+ dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
+ return dictflags;
+}
+
+/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
+ AttributeMap *const headerAttributes) {
+ const int headerSize = getHeaderSize(dictBuf);
+ int pos = getHeaderOptionsPosition();
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return;
+ }
+ int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
+ int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
+ while (pos < headerSize) {
+ const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
+ std::vector<int> key;
+ key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
+ const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
+ MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
+ std::vector<int> value;
+ value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
+ headerAttributes->insert(AttributeMap::value_type(key, value));
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
+ BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
+ int *const writingPos) {
+ if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
+ writingPos)) {
+ return false;
+ }
+ switch (version) {
+ case FormatUtils::VERSION_2:
+ // Version 2 dictionary writing is not supported.
+ return false;
+ case FormatUtils::VERSION_3:
+ return buffer->writeUintAndAdvancePosition(3 /* data */,
+ HEADER_DICTIONARY_VERSION_SIZE, writingPos);
+ default:
+ return false;
+ }
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
+ BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
+ int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
+ BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
+ return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
+}
+
+/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
+ BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
+ int *const writingPos) {
+ for (AttributeMap::const_iterator it = headerAttributes->begin();
+ it != headerAttributes->end(); ++it) {
+ // Write a key.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ // Write a value.
+ if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
+ true /* writesTerminator */, writingPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const bool value) {
+ setIntAttribute(headerAttributes, key, value ? 1 : 0);
+}
+
+/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int value) {
+ AttributeMap::mapped_type valueVector;
+ char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
+ snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value);
+ insertCharactersIntoVector(charBuf, &valueVector);
+ (*headerAttributes)[*key] = valueVector;
+}
+
+/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
+ const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
+ const bool defaultValue) {
+ const int intDefaultValue = defaultValue ? 1 : 0;
+ const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
+ return intValue != 0;
+}
+
+/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
+ const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
+ const int defaultValue) {
+ AttributeMap::const_iterator it = headerAttributes->find(*key);
+ if (it != headerAttributes->end()) {
+ int value = 0;
+ bool isNegative = false;
+ for (size_t i = 0; i < it->second.size(); ++i) {
+ if (i == 0 && it->second.at(i) == '-') {
+ isNegative = true;
+ } else {
+ if (!isdigit(it->second.at(i))) {
+ // If not a number.
+ return defaultValue;
+ }
+ value *= 10;
+ value += it->second.at(i) - '0';
+ }
+ }
+ return isNegative ? -value : value;
+ }
+ return defaultValue;
+}
+
+/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
+ std::vector<int> *const vector) {
+ for (int i = 0; characters[i]; ++i) {
+ vector->push_back(characters[i]);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
new file mode 100644
index 000000000..caa5097f6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HEADER_READ_WRITE_UTILS_H
+#define LATINIME_HEADER_READ_WRITE_UTILS_H
+
+#include <map>
+#include <stdint.h>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class HeaderReadWriteUtils {
+ public:
+ typedef uint16_t DictionaryFlags;
+ typedef std::map<std::vector<int>, std::vector<int> > AttributeMap;
+
+ static int getHeaderSize(const uint8_t *const dictBuf);
+
+ static DictionaryFlags getFlags(const uint8_t *const dictBuf);
+
+ static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
+ return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
+ return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
+ return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE int getHeaderOptionsPosition() {
+ return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ + HEADER_SIZE_FIELD_SIZE;
+ }
+
+ static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap(
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
+ static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
+ AttributeMap *const headerAttributes);
+
+ static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
+ const FormatUtils::FORMAT_VERSION version, int *const writingPos);
+
+ static bool writeDictionaryFlags(BufferWithExtendableBuffer *const buffer,
+ const DictionaryFlags flags, int *const writingPos);
+
+ static bool writeDictionaryHeaderSize(BufferWithExtendableBuffer *const buffer,
+ const int size, int *const writingPos);
+
+ static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
+ const AttributeMap *const headerAttributes, int *const writingPos);
+
+ /**
+ * Methods for header attributes.
+ */
+ static void setBoolAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const bool value);
+
+ static void setIntAttribute(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int value);
+
+ static bool readBoolAttributeValue(const AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const bool defaultValue);
+
+ static int readIntAttributeValue(const AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int defaultValue);
+
+ static void insertCharactersIntoVector(const char *const characters,
+ AttributeMap::key_type *const key);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
+
+ static const int MAX_ATTRIBUTE_KEY_LENGTH;
+ static const int MAX_ATTRIBUTE_VALUE_LENGTH;
+
+ static const int HEADER_MAGIC_NUMBER_SIZE;
+ static const int HEADER_DICTIONARY_VERSION_SIZE;
+ static const int HEADER_FLAG_SIZE;
+ static const int HEADER_SIZE_FIELD_SIZE;
+
+ static const DictionaryFlags NO_FLAGS;
+ // Flags for special processing
+ // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
+ // something very bad (like, the apocalypse) will happen. Please update both at the same time.
+ static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
+ static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
+ static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
+
+ static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
+ static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
+};
+}
+#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
new file mode 100644
index 000000000..8a84bd261
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -0,0 +1,433 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+
+#include "defines.h"
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ int nextPos = dicNode->getChildrenPos();
+ if (nextPos < 0 || nextPos >= mDictBufferSize) {
+ AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
+ nextPos, mDictBufferSize);
+ ASSERT(false);
+ return;
+ }
+ const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &nextPos);
+ for (int i = 0; i < childCount; i++) {
+ if (nextPos < 0 || nextPos >= mDictBufferSize) {
+ AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
+ nextPos, mDictBufferSize, i, childCount);
+ ASSERT(false);
+ return;
+ }
+ nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
+ }
+}
+
+// This retrieves code points and the probability of the word by its terminal position.
+// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
+// it is possible to check for this with advantageous complexity. For each node, we search
+// for PtNodes with children and compare the children position with the position we look for.
+// When we shoot the position we look for, it means the word we look for is in the children
+// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
+// PtNode array with the last PtNode's children position still less than what we are searching for,
+// we must descend the last PtNode's children (for example, if the word we are searching for starts
+// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
+// than the position we look for, and we have to descend the z node).
+/* Parameters :
+ * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
+ * what is stored as the "bigram position" in each bigram)
+ * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
+ * outUnigramProbability: a pointer to an int to write the probability into.
+ * Return value : the code point count, of 0 if the word was not found.
+ */
+// TODO: Split this function to be more readable
+int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ int pos = getRootPosition();
+ int wordPos = 0;
+ // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
+ // only traverse nodes that are actually a part of the terminal we are searching, so each time
+ // we enter this loop we are one depth level further than last time.
+ // The only reason we count nodes is because we want to reduce the probability of infinite
+ // looping in case there is a bug. Since we know there is an upper bound to the depth we are
+ // supposed to traverse, it does not hurt to count iterations.
+ for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
+ int lastCandidatePtNodePos = 0;
+ // Let's loop through PtNodes in this PtNode array searching for either the terminal
+ // or one of its ascendants.
+ for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
+ const int startPos = pos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ if (ptNodePos == startPos) {
+ // We found the position. Copy the rest of the code points in the buffer and return
+ // the length.
+ outCodePoints[wordPos] = character;
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ // We count code points in order to avoid infinite loops if the file is broken
+ // or if there is some other bug
+ int charCount = maxCodePointCount;
+ while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
+ outCodePoints[++wordPos] = nextChar;
+ nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ }
+ }
+ *outUnigramProbability =
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
+ &pos);
+ return ++wordPos;
+ }
+ // We need to skip past this PtNode, so skip any remaining code points after the
+ // first and possibly the probability.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ }
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ // The fact that this PtNode has children is very important. Since we already know
+ // that this PtNode does not match, if it has no children we know it is irrelevant
+ // to what we are searching for.
+ const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
+ // We will write in `found' whether we have passed the children position we are
+ // searching for. For example if we search for "beer", the children of b are less
+ // than the address we are searching for and the children of c are greater. When we
+ // come here for c, we realize this is too big, and that we should descend b.
+ bool found;
+ if (hasChildren) {
+ int currentPos = pos;
+ // Here comes the tricky part. First, read the children position.
+ const int childrenPos = PatriciaTrieReadingUtils
+ ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
+ if (childrenPos > ptNodePos) {
+ // If the children pos is greater than the position, it means the previous
+ // PtNode, which position is stored in lastCandidatePtNodePos, was the right
+ // one.
+ found = true;
+ } else if (1 >= ptNodeCount) {
+ // However if we are on the LAST PtNode of this array, and we have NOT shot the
+ // position we should descend THIS node. So we trick the lastCandidatePtNodePos
+ // so that we will descend this PtNode, not the previous one.
+ lastCandidatePtNodePos = startPos;
+ found = true;
+ } else {
+ // Else, we should continue looking.
+ found = false;
+ }
+ } else {
+ // Even if we don't have children here, we could still be on the last PtNode of /
+ // this array. If this is the case, we should descend the last PtNode that had
+ // children, and their position is already in lastCandidatePtNodePos.
+ found = (1 >= ptNodeCount);
+ }
+
+ if (found) {
+ // Okay, we found the PtNode we should descend. Its position is in
+ // the lastCandidatePtNodePos variable, so we just re-read it.
+ if (0 != lastCandidatePtNodePos) {
+ const PatriciaTrieReadingUtils::NodeFlags lastFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ // We copy all the characters in this PtNode to the buffer
+ outCodePoints[wordPos] = lastChar;
+ if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
+ int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ int charCount = maxCodePointCount;
+ while (-1 != nextChar && --charCount > 0) {
+ outCodePoints[++wordPos] = nextChar;
+ nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &lastCandidatePtNodePos);
+ }
+ }
+ ++wordPos;
+ // Now we only need to branch to the children address. Skip the probability if
+ // it's there, read pos, and break to resume the search at pos.
+ if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
+ &lastCandidatePtNodePos);
+ }
+ pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, lastFlags, &lastCandidatePtNodePos);
+ break;
+ } else {
+ // Here is a little tricky part: we come here if we found out that all children
+ // addresses in this PtNode are bigger than the address we are searching for.
+ // Should we conclude the word is not in the dictionary? No! It could still be
+ // one of the remaining PtNodes in this array, so we have to keep looking in
+ // this array until we find it (or we realize it's not there either, in which
+ // case it's actually not in the dictionary). Pass the end of this PtNode,
+ // ready to start the next one.
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ mBigramListPolicy.skipAllBigrams(&pos);
+ }
+ }
+ } else {
+ // If we did not find it, we should record the last children address for the next
+ // iteration.
+ if (hasChildren) lastCandidatePtNodePos = startPos;
+ // Now skip the end of this PtNode (children pos and the attributes if any) so that
+ // our pos is after the end of this PtNode, at the start of the next one.
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ mBigramListPolicy.skipAllBigrams(&pos);
+ }
+ }
+
+ }
+ }
+ // If we have looked through all the PtNodes and found no match, the ptNodePos is
+ // not the position of a terminal in this dictionary.
+ return 0;
+}
+
+// This function gets the position of the terminal node of the exact matching word in the
+// dictionary. If no match is found, it returns NOT_A_DICT_POS.
+int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ int pos = getRootPosition();
+ int wordPos = 0;
+
+ while (true) {
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ if (wordPos >= length) return NOT_A_DICT_POS;
+ int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
+ &pos);
+ const int wChar = forceLowerCaseSearch
+ ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
+ while (true) {
+ // If there are no more PtNodes in this array, it means we could not
+ // find a matching character for this depth, therefore there is no match.
+ if (0 >= ptNodeCount) return NOT_A_DICT_POS;
+ const int ptNodePos = pos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
+ &pos);
+ if (character == wChar) {
+ // This is the correct PtNode. Only one PtNode may start with the same char within
+ // a PtNode array, so either we found our match in this array, or there is
+ // no match and we can return NOT_A_DICT_POS. So we will check all the
+ // characters in this PtNode indeed does match.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
+ &pos);
+ while (NOT_A_CODE_POINT != character) {
+ ++wordPos;
+ // If we shoot the length of the word we search for, or if we find a single
+ // character that does not match, as explained above, it means the word is
+ // not in the dictionary (by virtue of this PtNode being the only one to
+ // match the word on the first character, but not matching the whole word).
+ if (wordPos >= length) return NOT_A_DICT_POS;
+ if (inWord[wordPos] != character) return NOT_A_DICT_POS;
+ character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
+ mDictRoot, &pos);
+ }
+ }
+ // If we come here we know that so far, we do match. Either we are on a terminal
+ // and we match the length, in which case we found it, or we traverse children.
+ // If we don't match the length AND don't have children, then a word in the
+ // dictionary fully matches a prefix of the searched word but not the full word.
+ ++wordPos;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ if (wordPos == length) {
+ return ptNodePos;
+ }
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ return NOT_A_DICT_POS;
+ }
+ // We have children and we are still shorter than the word we are searching for, so
+ // we need to traverse children. Put the pointer on the children position, and
+ // break
+ pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
+ flags, &pos);
+ break;
+ } else {
+ // This PtNode does not match, so skip the remaining part and go to the next.
+ if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
+ &pos);
+ }
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
+ flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ mBigramListPolicy.skipAllBigrams(&pos);
+ }
+ }
+ --ptNodeCount;
+ }
+ }
+}
+
+int PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+}
+
+int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ int pos = ptNodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
+ return NOT_A_PROBABILITY;
+ }
+ if (PatriciaTrieReadingUtils::isNotAWord(flags)
+ || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no probability outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
+ mDictRoot, &pos), NOT_A_PROBABILITY);
+}
+
+int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int pos = ptNodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ return NOT_A_DICT_POS;
+ }
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
+ }
+ return pos;
+}
+
+int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int pos = ptNodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ return NOT_A_DICT_POS;
+ }
+ PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ mShortcutListPolicy.skipAllShortcuts(&pos);;
+ }
+ return pos;
+}
+
+int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
+ const int ptNodePos, DicNodeVector *childDicNodes) const {
+ int pos = ptNodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
+ const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
+ : NOT_A_PROBABILITY;
+ const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ mDictRoot, flags, &pos) : NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ getBigramsStructurePolicy()->skipAllBigrams(&pos);
+ }
+ if (mergedNodeCodePointCount <= 0) {
+ AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
+ ASSERT(false);
+ return pos;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
+ PatriciaTrieReadingUtils::isTerminal(flags),
+ PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags) ||
+ PatriciaTrieReadingUtils::isNotAWord(flags),
+ mergedNodeCodePointCount, mergedNodeCodePoints);
+ return pos;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
new file mode 100644
index 000000000..f1de914cb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
+#define LATINIME_PATRICIA_TRIE_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ PatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
+ mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
+
+ ~PatriciaTriePolicy() {
+ delete mBuffer;
+ }
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramListPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ void flush(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ }
+
+ void flushWithGC(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ }
+
+ bool needsToRunGC() const {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
+
+ const MmappedBuffer *const mBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ const uint8_t *const mDictRoot;
+ const int mDictBufferSize;
+ const BigramListPolicy mBigramListPolicy;
+ const ShortcutListPolicy mShortcutListPolicy;
+
+ int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
+ DicNodeVector *const childDicNodes) const;
+};
+} // namespace latinime
+#endif // LATINIME_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..7df55815f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+typedef PatriciaTrieReadingUtils PtReadingUtils;
+
+const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0;
+
+// Flag for single/multiple char group
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
+// Flag for terminal PtNodes
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
+// Flag for shortcut targets presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
+// Flag for bigram presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
+// Flag for non-words (typically, shortcut only entries)
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
+// Flag for blacklist
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
+
+/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ if (firstByte < 0x80) {
+ return firstByte;
+ } else {
+ return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer, pos);
+ }
+}
+
+/* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+}
+
+// Returns the number of read characters.
+/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
+ int length = 0;
+ if (hasMultipleChars(flags)) {
+ length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
+ pos);
+ } else {
+ const int codePoint = getCodePointAndAdvancePosition(buffer, pos);
+ if (codePoint == NOT_A_CODE_POINT) {
+ // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is
+ // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR
+ // when the PtNode has a single code point.
+ length = 0;
+ AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x",
+ *pos - 1, codePoint, buffer[*pos - 1]);
+ ASSERT(false);
+ } else if (maxLength > 0) {
+ outBuffer[0] = codePoint;
+ length = 1;
+ }
+ }
+ return length;
+}
+
+// Returns the number of skipped characters.
+/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos) {
+ if (hasMultipleChars(flags)) {
+ return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
+ } else {
+ if (maxLength > 0) {
+ getCodePointAndAdvancePosition(buffer, pos);
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+}
+
+/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
+ const int base = *pos;
+ int offset = 0;
+ switch (MASK_CHILDREN_POSITION_TYPE & flags) {
+ case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
+ break;
+ default:
+ // If we come here, it means we asked for the children of a word with
+ // no children.
+ return NOT_A_DICT_POS;
+ }
+ return base + offset;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
new file mode 100644
index 000000000..8420ee95a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class PatriciaTrieReadingUtils {
+ public:
+ typedef uint8_t NodeFlags;
+
+ static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ // Returns the number of read characters.
+ static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const outBuffer, int *const pos);
+
+ // Returns the number of skipped characters.
+ static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos);
+
+ static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, int *const pos);
+
+ /**
+ * Node Flags
+ */
+ static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
+ return (flags & FLAG_IS_BLACKLISTED) != 0;
+ }
+
+ static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
+ return (flags & FLAG_IS_NOT_A_WORD) != 0;
+ }
+
+ static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
+ return (flags & FLAG_IS_TERMINAL) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
+ return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
+ return (flags & FLAG_HAS_BIGRAMS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
+ return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
+ return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
+ }
+
+ static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
+ const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
+ const bool hasBigrams, const bool hasMultipleChars,
+ const int childrenPositionFieldSize) {
+ NodeFlags nodeFlags = 0;
+ nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
+ nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
+ nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
+ nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
+ nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
+ nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
+ if (childrenPositionFieldSize == 1) {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
+ } else if (childrenPositionFieldSize == 2) {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
+ } else if (childrenPositionFieldSize == 3) {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
+ } else {
+ nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
+ }
+ return nodeFlags;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
+
+ static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
+
+ static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
+ static const NodeFlags FLAG_IS_TERMINAL;
+ static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
+ static const NodeFlags FLAG_HAS_BIGRAMS;
+ static const NodeFlags FLAG_IS_NOT_A_WORD;
+ static const NodeFlags FLAG_IS_BLACKLISTED;
+};
+} // namespace latinime
+#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
new file mode 100644
index 000000000..bd3211f6a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+/*
+ * This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
+ */
+class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
+ : mBuffer(buffer) {}
+
+ ~DynamicShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ buffer, maxCodePointCount, outCodePoint, pos);
+ }
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(buffer, pos);
+ *pos += shortcutListSize;
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ // Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
+ // bufferToWrite and advance these positions after the shortcut lists. This returns whether
+ // the copy was succeeded or not.
+ bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
+ int *const fromPos, int *const toPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
+ if (usesAdditionalBuffer) {
+ *fromPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
+ fromPos);
+ // Copy shortcut list size.
+ if (!bufferToWrite->writeUintAndAdvancePosition(
+ shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
+ ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
+ return false;
+ }
+ // Copy shortcut list.
+ for (int i = 0; i < shortcutListSize; ++i) {
+ const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
+ if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
+ return false;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ *fromPos += mBuffer->getOriginalBufferSize();
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
new file mode 100644
index 000000000..d73f73953
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_POLICY_H
+#define LATINIME_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+
+namespace latinime {
+
+class ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit ShortcutListPolicy(const uint8_t *const shortcutBuf)
+ : mShortcutsBuf(shortcutBuf) {}
+
+ ~ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int listPos = pos;
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mShortcutsBuf, &listPos);
+ return listPos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mShortcutsBuf, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ mShortcutsBuf, maxCodePointCount, outCodePoint, pos);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos);
+ *pos += shortcutListSize;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListPolicy);
+
+ const uint8_t *const mShortcutsBuf;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
new file mode 100644
index 000000000..847dcdee5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+// Flag for presence of more attributes
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
+// The numeric value of the shortcut probability that means 'whitelist'.
+const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
+
+/* static */ ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(const uint8_t *const dictRoot,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
+}
+
+/* static */ int ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
+ return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos)
+ - SHORTCUT_LIST_SIZE_FIELD_SIZE;
+}
+
+/* static */ int ShortcutListReadingUtils::readShortcutTarget(
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
+ return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
new file mode 100644
index 000000000..a83ed5a50
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H
+#define LATINIME_SHORTCUT_LIST_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ShortcutListReadingUtils {
+ public:
+ typedef uint8_t ShortcutFlags;
+
+ static ShortcutFlags getFlagsAndForwardPointer(const uint8_t *const dictRoot, int *const pos);
+
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const ShortcutFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ // This method returns the size of the shortcut list region excluding the shortcut list size
+ // field at the beginning.
+ static int getShortcutListSizeAndForwardPointer(const uint8_t *const dictRoot, int *const pos);
+
+ static AK_FORCE_INLINE int getShortcutListSizeFieldSize() {
+ return SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
+ static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) {
+ const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos);
+ *pos += shortcutListSize;
+ }
+
+ static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
+ return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
+ }
+
+ static int readShortcutTarget(const uint8_t *const dictRoot, const int maxLength,
+ int *const outWord, int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils);
+
+ static const ShortcutFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const ShortcutFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ static const int WHITELIST_SHORTCUT_PROBABILITY;
+};
+} // namespace latinime
+#endif // LATINIME_SHORTCUT_LIST_READING_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
new file mode 100644
index 000000000..f692882f2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
+// TODO: Needs to allocate larger memory corresponding to the current vector size.
+const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
+
+bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
+ int *const pos) {
+ if (!(size >= 1 && size <= 4)) {
+ AKLOGI("writeUintAndAdvancePosition() is called with invalid size: %d", size);
+ ASSERT(false);
+ return false;
+ }
+ if (!checkAndPrepareWriting(*pos, size)) {
+ return false;
+ }
+ const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
+ uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ if (usesAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+ return true;
+}
+
+bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
+ const int codePointCount, const bool writesTerminator ,int *const pos) {
+ const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
+ codePoints, codePointCount, writesTerminator);
+ if (!checkAndPrepareWriting(*pos, size)) {
+ return false;
+ }
+ const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
+ uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ if (usesAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePoints, codePointCount,
+ writesTerminator, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+ return true;
+}
+
+bool BufferWithExtendableBuffer::extendBuffer() {
+ const size_t sizeAfterExtending =
+ mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+ if (sizeAfterExtending > mMaxAdditionalBufferSize) {
+ return false;
+ }
+ mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
+ return true;
+}
+
+bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
+ if (isInAdditionalBuffer(pos)) {
+ const int tailPosition = getTailPosition();
+ if (pos == tailPosition) {
+ // Append data to the tail.
+ if (pos + size > static_cast<int>(mAdditionalBuffer.size()) + mOriginalBufferSize) {
+ // Need to extend buffer.
+ if (!extendBuffer()) {
+ return false;
+ }
+ }
+ mUsedAdditionalBufferSize += size;
+ } else if (pos + size > tailPosition) {
+ // The access will beyond the tail of used region.
+ return false;
+ }
+ } else {
+ if (pos < 0 || mOriginalBufferSize < pos + size) {
+ // Invalid position or violate the boundary.
+ return false;
+ }
+ }
+ return true;
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
new file mode 100644
index 000000000..17d2e39c2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
+#define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
+
+#include <cstddef>
+#include <stdint.h>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+// This is used as a buffer that can be extended for updatable dictionaries.
+// To optimize performance, raw pointer is directly used for reading buffer. The position has to be
+// adjusted to access additional buffer. On the other hand, this class does not provide writable
+// raw pointer but provides several methods that handle boundary checking for writing data.
+class BufferWithExtendableBuffer {
+ public:
+ BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
+ const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
+ : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
+ AK_FORCE_INLINE int getTailPosition() const {
+ return mOriginalBufferSize + mUsedAdditionalBufferSize;
+ }
+
+ /**
+ * For reading.
+ */
+ AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
+ return position >= mOriginalBufferSize;
+ }
+
+ // TODO: Resolve the issue that the address can be changed when the vector is resized.
+ // CAVEAT!: Be careful about array out of bound access with buffers
+ AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
+ if (usesAdditionalBuffer) {
+ return &mAdditionalBuffer[0];
+ } else {
+ return mOriginalBuffer;
+ }
+ }
+
+ AK_FORCE_INLINE int getOriginalBufferSize() const {
+ return mOriginalBufferSize;
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
+ * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
+ }
+
+ /**
+ * For writing.
+ *
+ * Writing is allowed for original buffer, already written region of additional buffer and the
+ * tail of additional buffer.
+ */
+ bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
+
+ bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
+ const bool writesTerminator, int *const pos);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
+
+ static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
+ static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
+ static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+
+ uint8_t *const mOriginalBuffer;
+ const int mOriginalBufferSize;
+ std::vector<uint8_t> mAdditionalBuffer;
+ int mUsedAdditionalBufferSize;
+ const size_t mMaxAdditionalBufferSize;
+
+ // Return if the buffer is successfully extended or not.
+ bool extendBuffer();
+
+ // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
+ // the additional buffer, try extending the buffer.
+ bool checkAndPrepareWriting(const int pos, const int size);
+};
+}
+#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
new file mode 100644
index 000000000..1833e8832
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const uint8_t ByteArrayUtils::MINIMUM_ONE_BYTE_CHARACTER_VALUE = 0x20;
+const uint8_t ByteArrayUtils::MAXIMUM_ONE_BYTE_CHARACTER_VALUE = 0xFF;
+const uint8_t ByteArrayUtils::CHARACTER_ARRAY_TERMINATOR = 0x1F;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
new file mode 100644
index 000000000..0c1576818
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BYTE_ARRAY_UTILS_H
+#define LATINIME_BYTE_ARRAY_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+/**
+ * Utility methods for reading byte arrays.
+ */
+class ByteArrayUtils {
+ public:
+ /**
+ * Integer writing
+ *
+ * Each method write a corresponding size integer in a big endian manner.
+ */
+ static AK_FORCE_INLINE void writeUintAndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, const int size, int *const pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ ByteArrayUtils::writeUint8AndAdvancePosition(buffer, data, pos);
+ return;
+ case 2:
+ ByteArrayUtils::writeUint16AndAdvancePosition(buffer, data, pos);
+ return;
+ case 3:
+ ByteArrayUtils::writeUint24AndAdvancePosition(buffer, data, pos);
+ return;
+ case 4:
+ ByteArrayUtils::writeUint32AndAdvancePosition(buffer, data, pos);
+ return;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * Integer reading
+ *
+ * Each method read a corresponding size integer in a big endian manner.
+ */
+ static AK_FORCE_INLINE uint32_t readUint32(const uint8_t *const buffer, const int pos) {
+ return (buffer[pos] << 24) ^ (buffer[pos + 1] << 16)
+ ^ (buffer[pos + 2] << 8) ^ buffer[pos + 3];
+ }
+
+ static AK_FORCE_INLINE uint32_t readUint24(const uint8_t *const buffer, const int pos) {
+ return (buffer[pos] << 16) ^ (buffer[pos + 1] << 8) ^ buffer[pos + 2];
+ }
+
+ static AK_FORCE_INLINE uint16_t readUint16(const uint8_t *const buffer, const int pos) {
+ return (buffer[pos] << 8) ^ buffer[pos + 1];
+ }
+
+ static AK_FORCE_INLINE uint8_t readUint8(const uint8_t *const buffer, const int pos) {
+ return buffer[pos];
+ }
+
+ static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint32_t value = readUint32(buffer, *pos);
+ *pos += 4;
+ return value;
+ }
+
+ static AK_FORCE_INLINE int readSint24AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t value = readUint8(buffer, *pos);
+ if (value < 0x80) {
+ return readUint24AndAdvancePosition(buffer, pos);
+ } else {
+ (*pos)++;
+ return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
+ }
+ }
+
+ static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint32_t value = readUint24(buffer, *pos);
+ *pos += 3;
+ return value;
+ }
+
+ static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint16_t value = readUint16(buffer, *pos);
+ *pos += 2;
+ return value;
+ }
+
+ static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return buffer[(*pos)++];
+ }
+
+ /**
+ * Code Point Reading
+ *
+ * 1 byte = bbbbbbbb match
+ * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
+ * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
+ * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
+ * 00011111 would be outside unicode.
+ * else: iso-latin-1 code
+ * This allows for the whole unicode range to be encoded, including chars outside of
+ * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
+ * characters which should never happen anyway (and still work, but take 3 bytes).
+ */
+ static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
+ int p = pos;
+ return readCodePointAndAdvancePosition(buffer, &p);
+ }
+
+ static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t firstByte = readUint8(buffer, *pos);
+ if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
+ if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
+ *pos += 1;
+ return NOT_A_CODE_POINT;
+ } else {
+ return readUint24AndAdvancePosition(buffer, pos);
+ }
+ } else {
+ *pos += 1;
+ return firstByte;
+ }
+ }
+
+ /**
+ * String (array of code points) Reading
+ *
+ * Reads code points until the terminator is found.
+ */
+ // Returns the length of the string.
+ static int readStringAndAdvancePosition(const uint8_t *const buffer,
+ const int maxLength, int *const outBuffer, int *const pos) {
+ int length = 0;
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
+ outBuffer[length++] = codePoint;
+ codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ }
+ return length;
+ }
+
+ // Advances the position and returns the length of the string.
+ static int advancePositionToBehindString(
+ const uint8_t *const buffer, const int maxLength, int *const pos) {
+ int length = 0;
+ int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
+ codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ length++;
+ }
+ return length;
+ }
+
+ /**
+ * String (array of code points) Writing
+ */
+ static void writeCodePointsAndAdvancePosition(uint8_t *const buffer,
+ const int *const codePoints, const int codePointCount, const bool writesTerminator,
+ int *const pos) {
+ for (int i = 0; i < codePointCount; ++i) {
+ const int codePoint = codePoints[i];
+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
+ break;
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
+ // three bytes character.
+ writeUint24AndAdvancePosition(buffer, codePoint, pos);
+ } else {
+ // one byte character.
+ writeUint8AndAdvancePosition(buffer, codePoint, pos);
+ }
+ }
+ if (writesTerminator) {
+ writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos);
+ }
+ }
+
+ static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints,
+ const int codePointCount, const bool writesTerminator) {
+ int byteCount = 0;
+ for (int i = 0; i < codePointCount; ++i) {
+ const int codePoint = codePoints[i];
+ if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
+ break;
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
+ // three bytes character.
+ byteCount += 3;
+ } else {
+ // one byte character.
+ byteCount += 1;
+ }
+ }
+ if (writesTerminator) {
+ // The terminator is one byte.
+ byteCount += 1;
+ }
+ return byteCount;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
+
+ static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
+ static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
+ static const uint8_t CHARACTER_ARRAY_TERMINATOR;
+
+ static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 24) & 0xFF;
+ buffer[(*pos)++] = (data >> 16) & 0xFF;
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint24AndAdvancePosition(uint8_t *const buffer,
+ const uint32_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 16) & 0xFF;
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint16AndAdvancePosition(uint8_t *const buffer,
+ const uint16_t data, int *const pos) {
+ buffer[(*pos)++] = (data >> 8) & 0xFF;
+ buffer[(*pos)++] = data & 0xFF;
+ }
+
+ static AK_FORCE_INLINE void writeUint8AndAdvancePosition(uint8_t *const buffer,
+ const uint8_t data, int *const pos) {
+ buffer[(*pos)++] = data & 0xFF;
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_BYTE_ARRAY_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
new file mode 100644
index 000000000..2e4ec2e1d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+
+#include <cstdio>
+#include <cstring>
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+namespace latinime {
+
+const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp";
+
+/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
+ const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ switch (dictVersion) {
+ case 3:
+ return createEmptyV3DictFile(filePath, attributeMap);
+ default:
+ // Only version 3 dictionary is supported for now.
+ return false;
+ }
+}
+
+/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
+ headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */);
+ BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
+ return false;
+ }
+ return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
+}
+
+/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
+ BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
+ const int tmpFileNameBufSize = strlen(filePath)
+ + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
+ // Name of a temporary file used for writing that is a connected string of original name and
+ // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
+ char tmpFileName[tmpFileNameBufSize];
+ snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath,
+ TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ FILE *const file = fopen(tmpFileName, "wb");
+ if (!file) {
+ AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
+ ASSERT(false);
+ return false;
+ }
+ // Write the dictionary header.
+ if (!writeBufferToFile(file, dictHeader)) {
+ remove(tmpFileName);
+ AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ // Write the dictionary body.
+ if (!writeBufferToFile(file, dictBody)) {
+ remove(tmpFileName);
+ AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ fclose(file);
+ rename(tmpFileName, filePath);
+ return true;
+}
+
+// This closes file pointer when an error is caused and returns whether the writing was succeeded
+// or not.
+/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer) {
+ const int originalBufSize = buffer->getOriginalBufferSize();
+ if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
+ originalBufSize, 1, file) < 1) {
+ fclose(file);
+ return false;
+ }
+ const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
+ if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
+ additionalBufSize, 1, file) < 1) {
+ fclose(file);
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
new file mode 100644
index 000000000..bd4ac66fd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H
+#define LATINIME_DICT_FILE_WRITING_UTILS_H
+
+#include <cstdio>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class DictFileWritingUtils {
+ public:
+ static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
+ static bool flushAllHeaderAndBodyToFile(const char *const filePath,
+ BufferWithExtendableBuffer *const dictHeader,
+ BufferWithExtendableBuffer *const dictBody);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
+
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+
+ static bool createEmptyV3DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
+ static bool writeBufferToFile(FILE *const file,
+ const BufferWithExtendableBuffer *const buffer);
+};
+} // namespace latinime
+#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
new file mode 100644
index 000000000..1d77d5c27
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
+
+// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
+const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
+ const uint8_t *const dict, const int dictSize) {
+ // The magic number is stored big-endian.
+ // If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
+ // understand this format.
+ if (dictSize < DICTIONARY_MINIMUM_SIZE) {
+ return UNKNOWN_VERSION;
+ }
+ const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
+ switch (magicNumber) {
+ case MAGIC_NUMBER:
+ // Version 2 header is as follows:
+ // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
+ // Dictionary format version number (2 bytes)
+ // Options (2 bytes)
+ // Header size (4 bytes) : integer, big endian
+ if (ByteArrayUtils::readUint16(dict, 4) == 2) {
+ return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
+ return VERSION_3;
+ } else {
+ return UNKNOWN_VERSION;
+ }
+ default:
+ return UNKNOWN_VERSION;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
new file mode 100644
index 000000000..79ed0de29
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FORMAT_UTILS_H
+#define LATINIME_FORMAT_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+/**
+ * Methods to handle binary dictionary format version.
+ */
+class FormatUtils {
+ public:
+ enum FORMAT_VERSION {
+ VERSION_2,
+ VERSION_3,
+ UNKNOWN_VERSION
+ };
+
+ // 32 bit magic number is stored at the beginning of the dictionary header to reject
+ // unsupported or obsolete dictionary formats.
+ static const uint32_t MAGIC_NUMBER;
+
+ static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
+
+ static const int DICTIONARY_MINIMUM_SIZE;
+};
+} // namespace latinime
+#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
new file mode 100644
index 000000000..6b69116eb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_MMAPPED_BUFFER_H
+#define LATINIME_MMAPPED_BUFFER_H
+
+#include <cerrno>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class MmappedBuffer {
+ public:
+ static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
+ const int bufferSize, const bool isUpdatable) {
+ const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
+ const int mmapFd = open(path, openMode);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return 0;
+ }
+ const int pagesize = getpagesize();
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return 0;
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return 0;
+ }
+ return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
+ isUpdatable);
+ }
+
+ ~MmappedBuffer() {
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+ }
+
+ AK_FORCE_INLINE uint8_t *getBuffer() const {
+ return mBuffer;
+ }
+
+ AK_FORCE_INLINE int getBufferSize() const {
+ return mBufferSize;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ private:
+ AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize,
+ void *const mmappedBuffer, const int alignedSize, const int mmapFd,
+ const bool isUpdatable)
+ : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
+ mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
+
+ uint8_t *const mBuffer;
+ const int mBufferSize;
+ void *const mMmappedBuffer;
+ const int mAlignedSize;
+ const int mMmapFd;
+ const bool mIsUpdatable;
+};
+}
+#endif /* LATINIME_MMAPPED_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h
new file mode 100644
index 000000000..21fe355b8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_UTILS_H
+#define LATINIME_PROBABILITY_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ProbabilityUtils {
+ public:
+ static AK_FORCE_INLINE int backoff(const int unigramProbability) {
+ return unigramProbability;
+ // For some reason, applying the backoff weight gives bad results in tests. To apply the
+ // backoff weight, we divide the probability by 2, which in our storing format means
+ // decreasing the score by 8.
+ // TODO: figure out what's wrong with this.
+ // return unigramProbability > 8 ?
+ // unigramProbability - 8 : (0 == unigramProbability ? 0 : 8);
+ }
+
+ static AK_FORCE_INLINE int computeProbabilityForBigram(
+ const int unigramProbability, const int bigramProbability) {
+ // We divide the range [unigramProbability..255] in 16.5 steps - in other words, we want
+ // the unigram probability to be the median value of the 17th step from the top. A value of
+ // 0 for the bigram probability represents the middle of the 16th step from the top,
+ // while a value of 15 represents the middle of the top step.
+ // See makedict.BinaryDictEncoder#makeBigramFlags for details.
+ const float stepSize = static_cast<float>(MAX_PROBABILITY - unigramProbability)
+ / (1.5f + MAX_BIGRAM_ENCODED_PROBABILITY);
+ return unigramProbability
+ + static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityUtils);
+};
+}
+#endif /* LATINIME_PROBABILITY_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index f87989286..ecceb60d3 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -22,31 +22,36 @@ const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120;
const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
-const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 125;
+// TODO: Unlimit max cache dic node size
+const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
+const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f;
-const float ScoringParams::PROXIMITY_COST = 0.086f;
-const float ScoringParams::FIRST_PROXIMITY_COST = 0.104f;
+const float ScoringParams::PROXIMITY_COST = 0.095f;
+const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.102f;
+const float ScoringParams::FIRST_PROXIMITY_COST = 0.019f;
const float ScoringParams::OMISSION_COST = 0.458f;
const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.491f;
const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.582f;
const float ScoringParams::INSERTION_COST = 0.730f;
+const float ScoringParams::TERMINAL_INSERTION_COST = 0.93f;
const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.586f;
+const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.70f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.623f;
-const float ScoringParams::TRANSPOSITION_COST = 0.516f;
+const float ScoringParams::TRANSPOSITION_COST = 0.526f;
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
-const float ScoringParams::SUBSTITUTION_COST = 0.403f;
+const float ScoringParams::SUBSTITUTION_COST = 0.383f;
const float ScoringParams::COST_NEW_WORD = 0.042f;
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f;
const float ScoringParams::COST_LOOKAHEAD = 0.073f;
-const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.105f;
-const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.038f;
-const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.444f;
+const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.093f;
+const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.041f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.447f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
-const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.06f;
+const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index 53ac999c1..7d4b5c3c7 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -29,6 +29,7 @@ class ScoringParams {
static const int THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
static const float AUTOCORRECT_OUTPUT_THRESHOLD;
static const int MAX_CACHE_DIC_NODE_SIZE;
+ static const int MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
static const int THRESHOLD_SHORT_WORD_LENGTH;
// Numerically optimized parameters (currently for tap typing only).
@@ -36,12 +37,15 @@ class ScoringParams {
// TODO: explore optimization of gesture parameters.
static const float DISTANCE_WEIGHT_LENGTH;
static const float PROXIMITY_COST;
+ static const float FIRST_CHAR_PROXIMITY_COST;
static const float FIRST_PROXIMITY_COST;
static const float OMISSION_COST;
static const float OMISSION_COST_SAME_CHAR;
static const float OMISSION_COST_FIRST_CHAR;
static const float INSERTION_COST;
+ static const float TERMINAL_INSERTION_COST;
static const float INSERTION_COST_SAME_CHAR;
+ static const float INSERTION_COST_PROXIMITY_CHAR;
static const float INSERTION_COST_FIRST_CHAR;
static const float TRANSPOSITION_COST;
static const float SPACE_SUBSTITUTION_COST;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 90e2133e7..56ffcc93e 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -55,10 +55,10 @@ class TypingScoring : public Scoring {
const int inputSize, const bool forceCommit) const {
const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
+ static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
- return static_cast<int>((ScoringParams::TYPING_BASE_OUTPUT_SCORE
- - (compoundDistance / maxDistance)
- + (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f))
- * SUGGEST_INTERFACE_OUTPUT_SCALE);
+ const float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE
+ - compoundDistance / maxDistance
+ + (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f);
+ return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);
}
AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(const int terminalIndex,
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 12110d54f..89e53f441 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -19,14 +19,15 @@
#include <stdint.h>
-#include "char_utils.h"
#include "defines.h"
-#include "proximity_info_state.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/policyimpl/typing/scoring_params.h"
+#include "utils/char_utils.h"
namespace latinime {
class TypingTraversal : public Traversal {
@@ -64,9 +65,9 @@ class TypingTraversal : public Traversal {
}
const int point0Index = dicNode->getInputIndex(0);
const int currentBaseLowerCodePoint =
- toBaseLowerCase(childDicNode->getNodeCodePoint());
+ CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
const int typedBaseLowerCodePoint =
- toBaseLowerCase(traverseSession->getProximityInfoState(0)
+ CharUtils::toBaseLowerCase(traverseSession->getProximityInfoState(0)
->getPrimaryCodePointAt(point0Index));
return (currentBaseLowerCodePoint != typedBaseLowerCodePoint);
}
@@ -136,7 +137,7 @@ class TypingTraversal : public Traversal {
return ScoringParams::MAX_SPATIAL_DISTANCE;
}
- AK_FORCE_INLINE bool allowPartialCommit() const {
+ AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
return true;
}
@@ -147,11 +148,12 @@ class TypingTraversal : public Traversal {
AK_FORCE_INLINE bool sameAsTyped(
const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
return traverseSession->getProximityInfoState(0)->sameAsTyped(
- dicNode->getOutputWordBuf(), dicNode->getDepth());
+ dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
}
- AK_FORCE_INLINE int getMaxCacheSize() const {
- return ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
+ AK_FORCE_INLINE int getMaxCacheSize(const int inputSize) const {
+ return (inputSize <= 1) ? ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT
+ : ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
}
AK_FORCE_INLINE bool isPossibleOmissionChildNode(
@@ -159,7 +161,7 @@ class TypingTraversal : public Traversal {
const DicNode *const dicNode) const {
const ProximityType proximityType =
getProximityType(traverseSession, parentDicNode, dicNode);
- if (!DicNodeUtils::isProximityChar(proximityType)) {
+ if (!ProximityInfoUtils::isMatchOrProximityChar(proximityType)) {
return false;
}
return true;
@@ -171,8 +173,8 @@ class TypingTraversal : public Traversal {
return false;
}
const int c = dicNode->getOutputWordBuf()[0];
- const bool shortCappedWord = dicNode->getDepth()
- < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && isAsciiUpper(c);
+ const bool shortCappedWord = dicNode->getNodeCodePointCount()
+ < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && CharUtils::isAsciiUpper(c);
return !shortCappedWord
|| probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
}
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index e4c69d1f6..408b12ae9 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -44,6 +44,7 @@ ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
+ case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
return ET_EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMITTION:
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 3938c0ec5..9f0a331e3 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -18,11 +18,12 @@
#define LATINIME_TYPING_WEIGHTING_H
#include "defines.h"
-#include "suggest_utils.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/policyimpl/typing/scoring_params.h"
+#include "utils/char_utils.h"
namespace latinime {
@@ -54,7 +55,7 @@ class TypingWeighting : public Weighting {
const bool isZeroCostOmission = parentDicNode->isZeroCostOmission();
const bool sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode);
// If the traversal omitted the first letter then the dicNode should now be on the second.
- const bool isFirstLetterOmission = dicNode->getDepth() == 2;
+ const bool isFirstLetterOmission = dicNode->getNodeCodePointCount() == 2;
float cost = 0.0f;
if (isZeroCostOmission) {
cost = 0.0f;
@@ -73,16 +74,20 @@ class TypingWeighting : public Weighting {
// Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on
// the keyboard (like accented letters)
const float normalizedSquaredLength = traverseSession->getProximityInfoState(0)
- ->getPointToKeyLength(pointIndex, dicNode->getNodeCodePoint());
- const float normalizedDistance = SuggestUtils::getSweetSpotFactor(
+ ->getPointToKeyLength(pointIndex,
+ CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
+ const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor(
traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength);
const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance;
const bool isFirstChar = pointIndex == 0;
const bool isProximity = isProximityDicNode(traverseSession, dicNode);
- float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST
+ float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_CHAR_PROXIMITY_COST
: ScoringParams::PROXIMITY_COST) : 0.0f;
- if (dicNode->getDepth() == 2) {
+ if (isProximity && dicNode->getProximityCorrectionCount() == 0) {
+ cost += ScoringParams::FIRST_PROXIMITY_COST;
+ }
+ if (dicNode->getNodeCodePointCount() == 2) {
// At the second character of the current word, we check if the first char is uppercase
// and the word is a second or later word of a multiple word suggestion. We demote it
// if so.
@@ -98,9 +103,9 @@ class TypingWeighting : public Weighting {
bool isProximityDicNode(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const {
const int pointIndex = dicNode->getInputIndex(0);
- const int primaryCodePoint = toBaseLowerCase(
+ const int primaryCodePoint = CharUtils::toBaseLowerCase(
traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex));
- const int dicNodeChar = toBaseLowerCase(dicNode->getNodeCodePoint());
+ const int dicNodeChar = CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint());
return primaryCodePoint != dicNodeChar;
}
@@ -109,10 +114,10 @@ class TypingWeighting : public Weighting {
const int16_t parentPointIndex = parentDicNode->getInputIndex(0);
const int prevCodePoint = parentDicNode->getNodeCodePoint();
const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
- parentPointIndex + 1, prevCodePoint);
+ parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint));
const int codePoint = dicNode->getNodeCodePoint();
const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
- parentPointIndex, codePoint);
+ parentPointIndex, CharUtils::toBaseLowerCase(codePoint));
const float distance = distance1 + distance2;
const float weightedLengthDistance =
distance * ScoringParams::DISTANCE_WEIGHT_LENGTH;
@@ -121,31 +126,38 @@ class TypingWeighting : public Weighting {
float getInsertionCost(const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const {
- const int16_t parentPointIndex = parentDicNode->getInputIndex(0);
- const int prevCodePoint =
- traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(parentPointIndex);
-
+ const int16_t insertedPointIndex = parentDicNode->getInputIndex(0);
+ const int prevCodePoint = traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(
+ insertedPointIndex);
const int currentCodePoint = dicNode->getNodeCodePoint();
const bool sameCodePoint = prevCodePoint == currentCodePoint;
+ const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0)
+ ->existsAdjacentProximityChars(insertedPointIndex);
const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
- parentPointIndex + 1, currentCodePoint);
+ insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH;
- const bool singleChar = dicNode->getDepth() == 1;
- const float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f)
- + (sameCodePoint ? ScoringParams::INSERTION_COST_SAME_CHAR
- : ScoringParams::INSERTION_COST);
+ const bool singleChar = dicNode->getNodeCodePointCount() == 1;
+ float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f);
+ if (sameCodePoint) {
+ cost += ScoringParams::INSERTION_COST_SAME_CHAR;
+ } else if (existsAdjacentProximityChars) {
+ cost += ScoringParams::INSERTION_COST_PROXIMITY_CHAR;
+ } else {
+ cost += ScoringParams::INSERTION_COST;
+ }
return cost + weightedDistance;
}
- float getNewWordCost(const DicTraverseSession *const traverseSession,
- const DicNode *const dicNode) const {
+ float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier();
}
- float getNewWordBigramCost(const DicTraverseSession *const traverseSession,
+ float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode,
MultiBigramMap *const multiBigramMap) const {
- return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(),
+ return DicNodeUtils::getBigramNodeImprobability(
+ traverseSession->getDictionaryStructurePolicy(),
dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}
@@ -162,9 +174,16 @@ class TypingWeighting : public Weighting {
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
- const float languageImprobability = (dicNode->isExactMatch()) ?
- 0.0f : dicNodeLanguageImprobability;
- return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
+ return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
+ }
+
+ float getTerminalInsertionCost(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ const int inputIndex = dicNode->getInputIndex(0);
+ const int inputSize = traverseSession->getInputSize();
+ ASSERT(inputIndex < inputSize);
+ // TODO: Implement more efficient logic
+ return ScoringParams::TERMINAL_INSERTION_COST * (inputSize - inputIndex);
}
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
diff --git a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
index ec1457455..81614bc9c 100644
--- a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
+++ b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
@@ -17,8 +17,8 @@
#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
-#include "char_utils.h"
#include "suggest/policyimpl/utils/edit_distance_policy.h"
+#include "utils/char_utils.h"
namespace latinime {
@@ -31,8 +31,8 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
~DamerauLevenshteinEditDistancePolicy() {}
AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
- const int c0 = toBaseLowerCase(mString0[index0]);
- const int c1 = toBaseLowerCase(mString1[index1]);
+ const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
+ const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
return (c0 == c1) ? 0.0f : 1.0f;
}
@@ -45,10 +45,10 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
}
AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
- const int c0 = toBaseLowerCase(mString0[index0]);
- const int c1 = toBaseLowerCase(mString1[index1]);
- if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1])
- && c1 == toBaseLowerCase(mString0[index0 - 1])) {
+ const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
+ const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
+ if (index0 > 0 && index1 > 0 && c0 == CharUtils::toBaseLowerCase(mString1[index1 - 1])
+ && c1 == CharUtils::toBaseLowerCase(mString0[index0 - 1])) {
return true;
}
return false;
diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance.h b/native/jni/src/suggest/policyimpl/utils/edit_distance.h
index cbbd66894..0871c37ce 100644
--- a/native/jni/src/suggest/policyimpl/utils/edit_distance.h
+++ b/native/jni/src/suggest/policyimpl/utils/edit_distance.h
@@ -62,6 +62,26 @@ class EditDistance {
return dp[(beforeLength + 1) * (afterLength + 1) - 1];
}
+ AK_FORCE_INLINE static void dumpEditDistance10ForDebug(const float *const editDistanceTable,
+ const int editDistanceTableWidth, const int outputLength) {
+ if (DEBUG_DICT) {
+ AKLOGI("EditDistanceTable");
+ for (int i = 0; i <= 10; ++i) {
+ float c[11];
+ for (int j = 0; j <= 10; ++j) {
+ if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
+ c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
+ } else {
+ c[j] = -1.0f;
+ }
+ }
+ AKLOGI("[ %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f ]",
+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
+ (void)c; // To suppress compiler warning
+ }
+ }
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance);
};