aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/Android.mk7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp115
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp59
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h9
-rw-r--r--tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java40
18 files changed, 381 insertions, 56 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index a5757fd58..b61a66ce6 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -68,12 +68,14 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
- bigram/bigram_list_read_write_utils.cpp \
- bigram/dynamic_bigram_list_policy.cpp \
header/header_policy.cpp \
header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
structure/dictionary_structure_with_buffer_policy_factory.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/bigram/, \
+ bigram_list_read_write_utils.cpp \
+ dynamic_bigram_list_policy.cpp \
+ ver4_bigram_list_policy.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
@@ -88,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_patricia_trie_writing_helper.cpp \
dynamic_patricia_trie_writing_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
+ content/bigram_dict_content.cpp \
ver4_dict_constants.cpp \
ver4_patricia_trie_node_reader.cpp \
ver4_patricia_trie_node_writer.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 000000000..028078111
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext,
+ &targetTerminalId, bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const int newProbability, bool *const outAddedNewEntry) {
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Updating PtNode doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ // Write an entry.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
+ false /* hasNext */, newTargetTerminalId, &writingPos)) {
+ return false;
+ }
+ return true;
+ }
+
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
+ if (entryPosToUpdate != NOT_A_DICT_POS) {
+ // Overwrite existing entry.
+ int readingPos = entryPosToUpdate;
+ bool hasNext = false;
+ int probability = NOT_A_PROBABILITY;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
+ &targetTerminalId, &readingPos);
+ if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
+ // Reuse invalid entry.
+ *outAddedNewEntry = true;
+ }
+ int writingPos = entryPosToUpdate;
+ return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext,
+ newTargetTerminalId, &writingPos);
+ }
+
+ // Add new entry to the bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ // Write new entry at a head position of the bigram list.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
+ true /* hasNext */, newTargetTerminalId, &writingPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ // Append existing entries by copying.
+ return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos) const {
+ bool hasNext = true;
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while(hasNext) {
+ const int entryPos = readingPos;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
+ &targetTerminalId, &readingPos);
+ if (targetTerminalId == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ }
+ return invalidEntryPos;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index 875a0ff9b..e599a2c8d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -19,46 +19,35 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
namespace latinime {
+class BigramDictContent;
+class TerminalPositionLookupTable;
+
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent,
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable)
: mBigramDictContent(bigramDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
void getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const {
- int bigramFlags = 0;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId,
- bigramEntryPos);
- if (outProbability) {
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
- }
- if (outHasNext) {
- *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
- }
- if (outBigramPos) {
- // Lookup target PtNode position.
- *outBigramPos =
- mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
- }
- }
+ bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
}
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
+ bool *const outAddedNewEntry);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
- const BigramDictContent *const mBigramDictContent;
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
+
+ BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
index c3fe03d37..b3fdbeb78 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
@@ -238,6 +238,9 @@ int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int
}
// All characters are matched.
if (length == getTotalCodePointCount(ptNodeParams)) {
+ if (!ptNodeParams.isTerminal()) {
+ return NOT_A_DICT_POS;
+ }
// Terminal position is found.
return ptNodeParams.getHeadPos();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
new file mode 100644
index 000000000..f02f14582
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability,
+ bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ if (outProbability) {
+ *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
+ }
+ if (outHasNext) {
+ *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ }
+ if (outTargetTerminalId) {
+ *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
+ const int targetTerminalId, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ return false;
+ }
+ return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
+}
+
+bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ while(hasNext) {
+ int probability = NOT_A_PROBABILITY;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
+ &readingPos);
+ if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
+ &writingPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index 5eed13e70..a112667e2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -33,21 +33,11 @@ class BigramDictContent : public SparseTableDictContent {
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
- void getBigramEntryAndAdvancePosition(int *const outBigramFlags,
- int *const outTargetTerminalId, int *const bigramEntryPos) const {
- const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
- if (outBigramFlags) {
- *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
- }
- if (outTargetTerminalId) {
- *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
- }
- }
+ void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
+ int *const outTargetTerminalId, int *const bigramEntryPos) const;
- // Returns head position of bigram list for a PtNode specified by terminalId.
- int getBigramListHeadPos(const int terminalId) const {
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
const SparseTable *const addressLookupTable = getAddressLookupTable();
if (!addressLookupTable->contains(terminalId)) {
return NOT_A_DICT_POS;
@@ -55,8 +45,23 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
+ bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
+ const int targetTerminalId, int *const entryWritingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool copyBigramList(const int bigramListPos, const int toPos);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
+
+ int createAndGetBigramFlags(const int probability, const bool hasNext) const {
+ return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
+ }
};
} // namespace latinime
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
index 71868e9ca..fe9efe874 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -59,10 +59,18 @@ class SparseTableDictContent : public DictContent {
}
protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
const SparseTable *getAddressLookupTable() const {
return &mAddressLookupTable;
}
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
const BufferWithExtendableBuffer *getContentBuffer() const {
return &mExpandableContentBuffer;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 6476478e5..b9e56a254 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -70,6 +70,10 @@ class Ver4DictBuffers {
return &mProbabilityDictContent;
}
+ AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
return &mBigramDictContent;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 941bcd594..2b113e38b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -43,6 +43,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 7270d9e6e..a1830dd0c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -47,6 +47,8 @@ class Ver4DictConstants {
static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int BIGRAM_PROBABILITY_MASK;
+ static const int BIGRAM_HAS_NEXT_MASK;
static const int SHORTCUT_FLAGS_FIELD_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 8b0ea823e..7c66a6691 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
@@ -167,8 +167,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
- // TODO: Implement bigram and shortcut writing.
-
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
@@ -188,8 +186,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram) {
- // TODO: Implement.
- return false;
+ return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram);
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 520ffc080..57201e4a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -135,9 +135,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
- if (mDictBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
+ if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer.getTailPosition());
return false;
}
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
@@ -156,8 +156,34 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
- // TODO: Implement.
- return false;
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer.getTailPosition());
+ return false;
+ }
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewBigram = false;
+ if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
}
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index fdb7ac69b..93cc4b7a2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -42,7 +42,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mBigramPolicy(mBuffers.get()->getBigramDictContent(),
+ mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f17a0d1c0..26eafcd44 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -49,6 +49,11 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC
}
}
+bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
+ int writingPos = pos;
+ return writeUintAndAdvancePosition(data, size, &writingPos);
+}
+
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 13dce9b61..ee6107ad7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -93,6 +93,8 @@ class BufferWithExtendableBuffer {
* Writing is allowed for original buffer, already written region of additional buffer and the
* tail of additional buffer.
*/
+ bool writeUint(const uint32_t data, const int size, const int pos);
+
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
index 2678b8c7b..9be35620c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
@@ -19,23 +19,68 @@
namespace latinime {
const int SparseTable::NOT_EXIST = -1;
+const int SparseTable::INDEX_SIZE = 4;
bool SparseTable::contains(const int id) const {
- const int readingPos = id / mBlockSize * mDataSize;
+ const int readingPos = getPosInIndexTable(id);
if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
return false;
}
- const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
return index != NOT_EXIST;
}
uint32_t SparseTable::get(const int id) const {
- const int indexTableIndex = id / mBlockSize;
- int readingPos = indexTableIndex * mDataSize;
- const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
+ const int indexTableReadingPos = getPosInIndexTable(id);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
+ const int contentTableReadingPos = getPosInContentTable(id, index);
+ return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
+}
+
+bool SparseTable::set(const int id, const uint32_t value) {
+ const int posInIndexTable = getPosInIndexTable(id);
+ // Extends the index table if needed.
+ if (mIndexTableBuffer->getTailPosition() < posInIndexTable) {
+ int tailPos = mIndexTableBuffer->getTailPosition();
+ while(tailPos < posInIndexTable) {
+ if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
+ return false;
+ }
+ }
+ }
+ if (contains(id)) {
+ // The entry is already in the content table.
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+ }
+ // The entry is not in the content table.
+ // Create new entry in the content table.
+ const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
+ if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
+ return false;
+ }
+ // Write a new block that containing the entry to be set.
+ int writingPos = getPosInContentTable(0 /* id */, index);
+ for (int i = 0; i < mBlockSize; ++i) {
+ if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize,
+ &writingPos)) {
+ return false;
+ }
+ }
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+}
+
+int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
+ return contentTablePos / mDataSize / mBlockSize;
+}
+
+int SparseTable::getPosInIndexTable(const int id) const {
+ return (id / mBlockSize) * INDEX_SIZE;
+}
+
+int SparseTable::getPosInContentTable(const int id, const int index) const {
const int offset = id % mBlockSize;
- readingPos = (index * mDataSize + offset) * mBlockSize;
- return mContentTableBuffer->readUint(mDataSize, readingPos);
+ return (index * mDataSize + offset) * mBlockSize;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
index d71756c63..21c167506 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
@@ -38,10 +38,19 @@ class SparseTable {
uint32_t get(const int id) const;
+ bool set(const int id, const uint32_t value);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
+ int getIndexFromContentTablePos(const int contentTablePos) const;
+
+ int getPosInIndexTable(const int id) const;
+
+ int getPosInContentTable(const int id, const int index) const;
+
static const int NOT_EXIST;
+ static const int INDEX_SIZE;
BufferWithExtendableBuffer *const mIndexTableBuffer;
BufferWithExtendableBuffer *const mContentTableBuffer;
diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
index 85e6243e4..b7cef738b 100644
--- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
@@ -206,4 +206,44 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getFrequency("y"));
}
+ public void testWriteBigrams() {
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ getDictionaryOptions(TEST_LOCALE, dictVersion));
+ final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
+ try {
+ encoder.writeDictionary(dict, FORMAT_OPTIONS);
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while writing dictionary", e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "Unsupported format", e);
+ }
+ final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
+ 0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ assertTrue(binaryDictionary.isValidDictionary());
+
+ final int unigramProbability = 100;
+ final int bigramProbability = 10;
+ final int updatedBigramProbability = 15;
+ binaryDictionary.addUnigramWord("aaa", unigramProbability);
+ binaryDictionary.addUnigramWord("abb", unigramProbability);
+ binaryDictionary.addUnigramWord("bcc", unigramProbability);
+ binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
+ binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
+ binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
+ binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
+
+ final int probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
+ assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
+ assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
+ assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
+ }
}