aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk7
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp142
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h17
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp59
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h9
25 files changed, 527 insertions, 104 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index a5757fd58..b61a66ce6 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -68,12 +68,14 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
- bigram/bigram_list_read_write_utils.cpp \
- bigram/dynamic_bigram_list_policy.cpp \
header/header_policy.cpp \
header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
structure/dictionary_structure_with_buffer_policy_factory.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/bigram/, \
+ bigram_list_read_write_utils.cpp \
+ dynamic_bigram_list_policy.cpp \
+ ver4_bigram_list_policy.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
@@ -88,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_patricia_trie_writing_helper.cpp \
dynamic_patricia_trie_writing_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
+ content/bigram_dict_content.cpp \
ver4_dict_constants.cpp \
ver4_patricia_trie_node_reader.cpp \
ver4_patricia_trie_node_writer.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 3becc7e39..c4383d754 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -86,10 +86,10 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
- DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy(
+ DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
- isUpdatable == JNI_TRUE));
+ isUpdatable == JNI_TRUE);
if (!dictionaryStructureWithBufferPolicy.get()) {
return 0;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 000000000..94d7f1061
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext,
+ &targetTerminalId, bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const int newProbability, bool *const outAddedNewEntry) {
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Updating PtNode doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ // Write an entry.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
+ false /* hasNext */, newTargetTerminalId, &writingPos)) {
+ return false;
+ }
+ return true;
+ }
+
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
+ if (entryPosToUpdate != NOT_A_DICT_POS) {
+ // Overwrite existing entry.
+ int readingPos = entryPosToUpdate;
+ bool hasNext = false;
+ int probability = NOT_A_PROBABILITY;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
+ &targetTerminalId, &readingPos);
+ if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
+ // Reuse invalid entry.
+ *outAddedNewEntry = true;
+ }
+ int writingPos = entryPosToUpdate;
+ return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext,
+ newTargetTerminalId, &writingPos);
+ }
+
+ // Add new entry to the bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ // Write new entry at a head position of the bigram list.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
+ true /* hasNext */, newTargetTerminalId, &writingPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ // Append existing entries by copying.
+ return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
+}
+
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list does't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ int readingPos = entryPosToUpdate;
+ bool hasNext = false;
+ int probability = NOT_A_PROBABILITY;
+ int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
+ &originalTargetTerminalId, &readingPos);
+ if (targetTerminalId != originalTargetTerminalId) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ int writingPos = entryPosToUpdate;
+ // Remove bigram entry by overwriting target terminal Id.
+ return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
+ Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos) const {
+ bool hasNext = true;
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while(hasNext) {
+ const int entryPos = readingPos;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
+ &targetTerminalId, &readingPos);
+ if (targetTerminalId == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ }
+ return invalidEntryPos;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index 875a0ff9b..b3fe13d7d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -19,46 +19,37 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
namespace latinime {
+class BigramDictContent;
+class TerminalPositionLookupTable;
+
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent,
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable)
: mBigramDictContent(bigramDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
void getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const {
- int bigramFlags = 0;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId,
- bigramEntryPos);
- if (outProbability) {
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
- }
- if (outHasNext) {
- *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
- }
- if (outBigramPos) {
- // Lookup target PtNode position.
- *outBigramPos =
- mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
- }
- }
+ bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
}
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
+ bool *const outAddedNewEntry);
+
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
- const BigramDictContent *const mBigramDictContent;
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
+
+ BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index 3ab6a8e21..063b84cbf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -35,8 +35,8 @@ namespace latinime {
const int bufOffset, const int size, const bool isUpdatable) {
// Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of
// MmappedBufferWrapper if the instance has the responsibility.
- MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size,
- isUpdatable));
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable);
if (!mmappedBuffer.get()) {
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
}
@@ -58,8 +58,8 @@ namespace latinime {
}
// Removing extension to get the base path.
dictDirPath.erase(pos);
- const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
- Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer));
+ const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
+ Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer);
if (!dictBuffers.get()->isValid()) {
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
ASSERT(false);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
index c3fe03d37..b3fdbeb78 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
@@ -238,6 +238,9 @@ int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int
}
// All characters are matched.
if (length == getTotalCodePointCount(ptNodeParams)) {
+ if (!ptNodeParams.isTerminal()) {
+ return NOT_A_DICT_POS;
+ }
// Terminal position is found.
return ptNodeParams.getHeadPos();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
new file mode 100644
index 000000000..999460086
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability,
+ bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ if (outProbability) {
+ *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
+ }
+ if (outHasNext) {
+ *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ }
+ const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ if (outTargetTerminalId) {
+ *outTargetTerminalId =
+ (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
+ const int targetTerminalId, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ return false;
+ }
+ const int targetTerminalIdToWrite =
+ (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
+ return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
+}
+
+bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ while(hasNext) {
+ int probability = NOT_A_PROBABILITY;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
+ &readingPos);
+ if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
+ &writingPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index 5eed13e70..bc9e4b619 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -33,21 +33,15 @@ class BigramDictContent : public SparseTableDictContent {
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
- void getBigramEntryAndAdvancePosition(int *const outBigramFlags,
- int *const outTargetTerminalId, int *const bigramEntryPos) const {
- const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
- if (outBigramFlags) {
- *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
- }
- if (outTargetTerminalId) {
- *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
- }
- }
+ BigramDictContent()
+ : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
+ int *const outTargetTerminalId, int *const bigramEntryPos) const;
- // Returns head position of bigram list for a PtNode specified by terminalId.
- int getBigramListHeadPos(const int terminalId) const {
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
const SparseTable *const addressLookupTable = getAddressLookupTable();
if (!addressLookupTable->contains(terminalId)) {
return NOT_A_DICT_POS;
@@ -55,8 +49,23 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
+ bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
+ const int targetTerminalId, int *const entryWritingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool copyBigramList(const int bigramListPos, const int toPos);
+
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ int createAndGetBigramFlags(const int probability, const bool hasNext) const {
+ return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
+ }
};
} // namespace latinime
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
index e85bbe18e..c109cbf51 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
@@ -31,6 +31,8 @@ class ProbabilityDictContent : public SingleDictContent {
: SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
isUpdatable) {}
+ ProbabilityDictContent() {}
+
int getProbability(const int terminalId) const {
if (terminalId < 0 || terminalId >= getSize()) {
return NOT_A_PROBABILITY;
@@ -61,7 +63,7 @@ class ProbabilityDictContent : public SingleDictContent {
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
int getSize() const {
return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
index c10fbcb2a..8463a1753 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
@@ -33,6 +33,10 @@ class ShortcutDictContent : public SparseTableDictContent {
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+ ShortcutDictContent()
+ : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
int *const shortcutEntryPos) const {
@@ -57,7 +61,7 @@ class ShortcutDictContent : public SparseTableDictContent {
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent);
+ DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
};
} // namespace latinime
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
index 4cb96da6a..7669c1eca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@@ -31,12 +32,17 @@ class SingleDictContent : public DictContent {
: mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mIsValid(mMmappedBuffer.get() != 0) {}
+
+ SingleDictContent()
+ : mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mIsValid(true) {}
virtual ~SingleDictContent() {}
virtual bool isValid() const {
- return mMmappedBuffer.get() != 0;
+ return mIsValid;
}
protected:
@@ -49,10 +55,11 @@ class SingleDictContent : public DictContent {
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent);
+ DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
BufferWithExtendableBuffer mExpandableContentBuffer;
+ const bool mIsValid;
};
} // namespace latinime
#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
index 71868e9ca..5ae5f0ff1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
@@ -49,20 +50,37 @@ class SparseTableDictContent : public DictContent {
mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
- sparseTableBlockSize, sparseTableDataSize) {}
+ sparseTableBlockSize, sparseTableDataSize),
+ mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
+ && mContentBuffer.get() != 0) {}
+
+ SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0),
+ mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
virtual ~SparseTableDictContent() {}
virtual bool isValid() const {
- return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
- && mContentBuffer.get() != 0;
+ return mIsValid;
}
protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
const SparseTable *getAddressLookupTable() const {
return &mAddressLookupTable;
}
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
const BufferWithExtendableBuffer *getContentBuffer() const {
return &mExpandableContentBuffer;
}
@@ -70,7 +88,6 @@ class SparseTableDictContent : public DictContent {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
- // TODO: Have sparse table.
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
const MmappedBuffer::MmappedBufferPtr mContentBuffer;
@@ -78,6 +95,7 @@ class SparseTableDictContent : public DictContent {
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
BufferWithExtendableBuffer mExpandableContentBuffer;
SparseTable mAddressLookupTable;
+ const bool mIsValid;
};
} // namespace latinime
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
index f6ced31b4..e016a2b5f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -38,6 +38,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
mHeaderRegionSize(headerRegionSize) {}
+ TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {}
+
int getTerminalPtNodePosition(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS;
@@ -66,7 +68,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
+ DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
int mSize;
const int mHeaderRegionSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 6476478e5..e468be591 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -33,27 +33,30 @@ class Ver4DictBuffers {
public:
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
- static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
}
+ static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers() {
+ return Ver4DictBuffersPtr(new Ver4DictBuffers());
+ }
+
AK_FORCE_INLINE bool isValid() const {
return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
&& mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
&& mShortcutDictContent.isValid();
}
- AK_FORCE_INLINE uint8_t *getRawDictBuffer() const {
- return mDictBuffer.get()->getBuffer();
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
+ return &mExpandableHeaderBuffer;
}
- AK_FORCE_INLINE int getRawDictBufferSize() const {
- return mDictBuffer.get()->getBufferSize();
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
+ return &mExpandableTrieBuffer;
}
-
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
return &mTerminalPositionLookupTable;
}
@@ -70,6 +73,10 @@ class Ver4DictBuffers {
return &mProbabilityDictContent;
}
+ AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
return &mBigramDictContent;
}
@@ -82,21 +89,41 @@ class Ver4DictBuffers {
return mIsUpdatable;
}
+ bool flush(const char *const dictDirPath) {
+ // TODO: Implement.
+ return false;
+ }
+
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
+ DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
: mDictBuffer(dictBuffer),
- // TODO: Quit using getHeaderSize.
- mTerminalPositionLookupTable(dictDirPath, isUpdatable,
- HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
+ mHeaderSize(HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
+ mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderSize,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderSize,
+ dictBuffer.get()->getBufferSize() - mHeaderSize,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ // TODO: Quit using header size.
+ mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
mProbabilityDictContent(dictDirPath, isUpdatable),
mBigramDictContent(dictDirPath, isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
+ AK_FORCE_INLINE Ver4DictBuffers()
+ : mDictBuffer(0), mHeaderSize(0),
+ mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mTerminalPositionLookupTable(), mProbabilityDictContent(),
+ mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
+
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ const int mHeaderSize;
+ BufferWithExtendableBuffer mExpandableHeaderBuffer;
+ BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable;
ProbabilityDictContent mProbabilityDictContent;
BigramDictContent mBigramDictContent;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 941bcd594..af13a374a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -30,6 +30,10 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
".shortcut_index_shortcut";
+// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
+
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
@@ -42,7 +46,13 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 7270d9e6e..cfb7740be 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -34,6 +34,8 @@ class Ver4DictConstants {
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+ static const int MAX_DICTIONARY_SIZE;
+
static const int NOT_A_TERMINAL_ID;
static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
@@ -47,6 +49,9 @@ class Ver4DictConstants {
static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_PROBABILITY_MASK;
+ static const int BIGRAM_HAS_NEXT_MASK;
static const int SHORTCUT_FLAGS_FIELD_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 8b0ea823e..b572ee87f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
@@ -167,8 +167,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
- // TODO: Implement bigram and shortcut writing.
-
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
@@ -188,14 +186,14 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram) {
- // TODO: Implement.
- return false;
+ return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram);
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- // TODO: Implement.
- return false;
+ return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId());
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 520ffc080..698483a79 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -28,14 +28,14 @@ namespace latinime {
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
- DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+ Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
@@ -63,7 +63,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodePos(ptNodePos);
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability);
@@ -71,7 +71,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
@@ -135,12 +135,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
- if (mDictBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
return false;
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
@@ -156,14 +156,63 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
- // TODO: Implement.
- return false;
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewBigram = false;
+ if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
}
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- // TODO: Implement.
- return false;
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
}
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index fdb7ac69b..e8fdf5513 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -38,18 +38,17 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
: mBuffers(buffers),
- mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4),
- mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
- mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mBigramPolicy(mBuffers.get()->getBigramDictContent(),
+ mHeaderPolicy(mBuffers.get()->getWritableHeaderBuffer()->getBuffer(
+ false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
+ mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
+ mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
- mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
- mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
+ mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
+ mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy),
- mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter,
+ mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()) {};
@@ -115,7 +114,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy mHeaderPolicy;
- BufferWithExtendableBuffer mDictBuffer;
+ BufferWithExtendableBuffer *const mDictBuffer;
Ver4BigramListPolicy mBigramPolicy;
Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f17a0d1c0..26eafcd44 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -49,6 +49,11 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC
}
}
+bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
+ int writingPos = pos;
+ return writeUintAndAdvancePosition(data, size, &writingPos);
+}
+
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 13dce9b61..ee6107ad7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -93,6 +93,8 @@ class BufferWithExtendableBuffer {
* Writing is allowed for original buffer, already written region of additional buffer and the
* tail of additional buffer.
*/
+ bool writeUint(const uint32_t data, const int size, const int pos);
+
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index b48e5b005..40f7d1f5c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -21,6 +21,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
@@ -34,7 +35,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
case 3:
return createEmptyV3DictFile(filePath, attributeMap);
case 4:
- // TODO: Support version 4 dictionary format.
+ return createEmptyV4DictFile(filePath, attributeMap);
return false;
default:
// Only version 3 dictionary is supported for now.
@@ -58,6 +59,20 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
}
+/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
+ HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
+ headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(),
+ true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */,
+ 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
+ if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
+ dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ return false;
+ }
+ return dictBuffers.get()->flush(filePath);
+}
+
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
const int tmpFileNameBufSize = strlen(filePath)
@@ -69,21 +84,21 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
FILE *const file = fopen(tmpFileName, "wb");
if (!file) {
- AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
+ AKLOGE("Dictionary file %s cannot be opened.", tmpFileName);
ASSERT(false);
return false;
}
// Write the dictionary header.
if (!writeBufferToFile(file, dictHeader)) {
remove(tmpFileName);
- AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
+ AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition());
ASSERT(false);
return false;
}
// Write the dictionary body.
if (!writeBufferToFile(file, dictBody)) {
remove(tmpFileName);
- AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
+ AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition());
ASSERT(false);
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index bd4ac66fd..3291f98c7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -43,6 +43,9 @@ class DictFileWritingUtils {
static bool createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+ static bool createEmptyV4DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+
static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
index 2678b8c7b..9be35620c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
@@ -19,23 +19,68 @@
namespace latinime {
const int SparseTable::NOT_EXIST = -1;
+const int SparseTable::INDEX_SIZE = 4;
bool SparseTable::contains(const int id) const {
- const int readingPos = id / mBlockSize * mDataSize;
+ const int readingPos = getPosInIndexTable(id);
if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
return false;
}
- const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
return index != NOT_EXIST;
}
uint32_t SparseTable::get(const int id) const {
- const int indexTableIndex = id / mBlockSize;
- int readingPos = indexTableIndex * mDataSize;
- const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
+ const int indexTableReadingPos = getPosInIndexTable(id);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
+ const int contentTableReadingPos = getPosInContentTable(id, index);
+ return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
+}
+
+bool SparseTable::set(const int id, const uint32_t value) {
+ const int posInIndexTable = getPosInIndexTable(id);
+ // Extends the index table if needed.
+ if (mIndexTableBuffer->getTailPosition() < posInIndexTable) {
+ int tailPos = mIndexTableBuffer->getTailPosition();
+ while(tailPos < posInIndexTable) {
+ if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
+ return false;
+ }
+ }
+ }
+ if (contains(id)) {
+ // The entry is already in the content table.
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+ }
+ // The entry is not in the content table.
+ // Create new entry in the content table.
+ const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
+ if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
+ return false;
+ }
+ // Write a new block that containing the entry to be set.
+ int writingPos = getPosInContentTable(0 /* id */, index);
+ for (int i = 0; i < mBlockSize; ++i) {
+ if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize,
+ &writingPos)) {
+ return false;
+ }
+ }
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+}
+
+int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
+ return contentTablePos / mDataSize / mBlockSize;
+}
+
+int SparseTable::getPosInIndexTable(const int id) const {
+ return (id / mBlockSize) * INDEX_SIZE;
+}
+
+int SparseTable::getPosInContentTable(const int id, const int index) const {
const int offset = id % mBlockSize;
- readingPos = (index * mDataSize + offset) * mBlockSize;
- return mContentTableBuffer->readUint(mDataSize, readingPos);
+ return (index * mDataSize + offset) * mBlockSize;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
index d71756c63..21c167506 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
@@ -38,10 +38,19 @@ class SparseTable {
uint32_t get(const int id) const;
+ bool set(const int id, const uint32_t value);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
+ int getIndexFromContentTablePos(const int contentTablePos) const;
+
+ int getPosInIndexTable(const int id) const;
+
+ int getPosInContentTable(const int id, const int index) const;
+
static const int NOT_EXIST;
+ static const int INDEX_SIZE;
BufferWithExtendableBuffer *const mIndexTableBuffer;
BufferWithExtendableBuffer *const mContentTableBuffer;