aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h8
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java175
11 files changed, 303 insertions, 58 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index 00c614c46..1926b9831 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -78,8 +79,10 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
break;
}
- if (offset == 0) {
+ if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
+ } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
+ return origin;
}
if (isOffsetNegative(flags)) {
return origin - offset;
@@ -88,6 +91,24 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
}
}
+/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
+ BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
+ const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
+ int readingPos = entryPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= buffer->getOriginalBufferSize();
+ }
+ BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer->getBuffer(usesAdditionalBuffer), &readingPos);
+ if (hasNext) {
+ bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
+ } else {
+ bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
+ }
+ int writingPos = entryPos;
+ return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
+}
+
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
const bool hasNext, int *const writingPos) {
@@ -101,10 +122,9 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
- const int offset = (targetPtNodePos != NOT_A_DICT_POS) ?
- targetPtNodePos - (*writingPos + 1) : 0;
+ const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
const BigramFlags flagsToWrite = (offset < 0) ?
- (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : flags;
+ (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
return false;
}
@@ -116,14 +136,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
// Returns true if the bigram entry is valid and put entry flags into out*.
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
- const int targetPos, const int probability, const bool hasNext,
+ const int targetPtNodePos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags) {
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
- const int targetFieldPos = entryPos + 1;
- const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0;
+ const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
@@ -146,4 +165,18 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
return true;
}
+/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
+ const int entryPos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else {
+ const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
+ if (offset == 0) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ } else {
+ return offset;
+ }
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
index 234a0ea58..eabe4e099 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -59,9 +59,8 @@ public:
*/
}
- static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) {
- return flags | FLAG_ATTRIBUTE_HAS_NEXT;
- }
+ static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
+ const bool hasNext, const int entryPos);
static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
const int probability) {
@@ -96,6 +95,8 @@ private:
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
+
+ static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 2e0f97792..bc2f5ee58 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -70,6 +70,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
*outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
+ int lastWrittenEntryPos = NOT_A_DICT_POS;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
@@ -90,6 +91,11 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ if (bigramPos == NOT_A_DICT_POS) {
+ // Target PtNode has been invalidated.
+ continue;
+ }
+ lastWrittenEntryPos = *toPos;
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
@@ -97,6 +103,13 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
}
(*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // Makes the last entry the terminal of the list. Updates the flags.
+ if (lastWrittenEntryPos != NOT_A_DICT_POS) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
+ lastWrittenEntryPos)) {
+ return false;
+ }
+ }
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
}
@@ -240,10 +253,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
}
// The current last entry is found.
// First, update the flags of the last entry.
- const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setHasNextFlag(bigramFlags);
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos,
- &entryPos)) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
return false;
}
if (usesAdditionalBuffer) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 51374102e..c60e45819 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -96,7 +96,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
mValidPtNodeCount++;
// Writes current PtNode.
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
- node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
+ node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
node->getProbability(), &writingPos);
}
@@ -113,11 +113,9 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
}
}
int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- const int parentPosOffset = (parentPos != NOT_A_DICT_POS) ?
- parentPos - node->getHeadPos() : NOT_A_DICT_POS;
// Write updated parent offset.
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBufferToWrite,
- parentPosOffset, &writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
+ parentPos, node->getHeadPos(), &writingPos)) {
return false;
}
@@ -139,8 +137,10 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
// Updates bigram target PtNode positions in the bigram list.
int bigramsPos = node->getBigramsPos();
if (bigramsPos != NOT_A_DICT_POS) {
- mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
- &mDictPositionRelocationMap->mPtNodePositionRelocationMap);
+ if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
+ &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) {
+ return false;
+ }
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index d52de7ada..456352c17 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -40,9 +40,10 @@ void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNo
pos -= mBuffer->getOriginalBufferSize();
}
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPos =
- DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
- mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS;
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 8428c0b15..d68446db6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -28,24 +28,42 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
+// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
+// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
+const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+
/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
const int pos) {
int linkAddressPos = pos;
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
}
-/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
+/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
+/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
+ if (parentOffset == DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
+ return ptNodePos;
+ } else {
+ return parentOffset + ptNodePos;
+ }
+}
+
/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const int base = *pos;
const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
- if (offset == 0) {
- // 0 offset means that the node does not have children.
+ if (offset == DICT_OFFSET_INVALID) {
+ // The PtNode does not have children.
return NOT_A_DICT_POS;
+ } else if (offset == DICT_OFFSET_ZERO_OFFSET) {
+ return base;
} else {
return base + offset;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index db5f9b1bd..67c3cc57e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -27,13 +27,19 @@ class DynamicPatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
+ static const int DICT_OFFSET_INVALID;
+ static const int DICT_OFFSET_ZERO_OFFSET;
+
static int getForwardLinkPosition(const uint8_t *const buffer, const int pos);
static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
return forwardLinkAddress != 0;
}
- static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+ static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getParentPtNodePos(const int parentOffset, const int ptNodePos);
static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 53075e686..d2b9a5e97 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -203,9 +203,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
return false;
}
// Update moved position, which is stored in the parent offset field.
- const int movedPosOffset = movedPos - originalNode->getHeadPos();
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- mBuffer, movedPosOffset, &writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
@@ -220,12 +219,10 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
while (!readingHelper.isEnd()) {
- const int childPtNodeWrittenPos = nodeReader->getHeadPos();
- const int parentOffset = movedPos - childPtNodeWrittenPos;
- int parentOffsetFieldPos = childPtNodeWrittenPos
+ int parentOffsetFieldPos = nodeReader->getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- mBuffer, parentOffset, &parentOffsetFieldPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
@@ -251,9 +248,8 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
return false;
}
// Calculate a parent offset and write the offset.
- const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS;
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite,
- parentOffset, writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
+ parentPos, nodePos, writingPos)) {
return false;
}
// Write code points
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
index b261e594d..5a3983776 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
@@ -39,18 +39,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos) {
- const int offset = (forwardLinkPos != NOT_A_DICT_POS) ?
- forwardLinkPos - (*forwardLinkFieldPos) : 0;
- return writeDictOffset(buffer, offset, forwardLinkFieldPos);
+ return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const size_t arraySize,
int *const arraySizeFieldPos) {
- if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
+ // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
+ // simplify updating process.
+ // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays.
+ /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE,
arraySizeFieldPos);
- } else if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
+ } else */
+ if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE,
arraySizeFieldPos);
@@ -69,11 +71,10 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
}
// Note that parentOffset is offset from node's head position.
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- BufferWithExtendableBuffer *const buffer, const int parentOffset,
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
int *const parentPosFieldPos) {
- int offset = (parentOffset != NOT_A_DICT_POS) ? parentOffset : 0;
- return writeDictOffset(buffer, offset, parentPosFieldPos);
+ return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
@@ -106,13 +107,19 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
int *const childrenPositionFieldPos) {
- int offset = (childrenPosition != NOT_A_DICT_POS) ?
- childrenPosition - (*childrenPositionFieldPos) : 0;
- return writeDictOffset(buffer, offset, childrenPositionFieldPos);
+ return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
+ childrenPositionFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
- BufferWithExtendableBuffer *const buffer, const int offset, int *const offsetFieldPos) {
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
+ int *const offsetFieldPos) {
+ int offset = targetPos - basePos;
+ if (targetPos == NOT_A_DICT_POS) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else if (offset == 0) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ }
if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
AKLOGI("offset cannot be written because the offset is too large or too small: %d",
offset);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
index 536ad3005..a37e9fb3d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
@@ -41,8 +41,8 @@ class DynamicPatriciaTrieWritingUtils {
const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
int *const nodeFlagsFieldPos);
- static bool writeParentOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const int parentPosition, int *const parentPosFieldPos);
+ static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int parentPosition, const int basePos, int *const parentPosFieldPos);
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
@@ -67,8 +67,8 @@ class DynamicPatriciaTrieWritingUtils {
static const int DICT_OFFSET_NEGATIVE_FLAG;
static const int PROBABILITY_FIELD_SIZE;
- static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset,
- int *const offsetFieldPos);
+ static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
+ const int basePos, int *const offsetFieldPos);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 87901839f..e26b300a8 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
+import android.util.Pair;
import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.DictEncoder;
@@ -385,7 +386,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
- // TODO: Add large tests for BinaryDictionary.flushWithGC().
public void testFlushWithGCDictionary() {
File dictFile = null;
try {
@@ -431,4 +431,177 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
+
+ // TODO: Evaluate performance of GC
+ public void testAddBigramWordsAndFlashWithGC() {
+ final int wordCount = 100;
+ final int bigramCount = 1000;
+ final int codePointSetSize = 30;
+ // TODO: Use various seeds such as a current timestamp to make this test more random.
+ final int seed = 314159265;
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ } catch (UnsupportedFormatException e) {
+ fail("UnsupportedFormatException while writing an initial dictionary : " + e);
+ }
+
+ BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final ArrayList<String> words = new ArrayList<String>();
+ // Test a word that isn't contained within the dictionary.
+ final Random random = new Random(seed);
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+ final int[] unigramProbabilities = new int[wordCount];
+ for (int i = 0; i < wordCount; ++i) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ words.add(word);
+ final int unigramProbability = random.nextInt(0xFF);
+ unigramProbabilities[i] = unigramProbability;
+ binaryDictionary.addUnigramWord(word, unigramProbability);
+ }
+
+ final int[][] probabilities = new int[wordCount][wordCount];
+
+ for (int i = 0; i < wordCount; ++i) {
+ for (int j = 0; j < wordCount; ++j) {
+ probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
+ }
+ }
+
+ for (int i = 0; i < bigramCount; i++) {
+ final int word0Index = random.nextInt(wordCount);
+ final int word1Index = random.nextInt(wordCount);
+ final String word0 = words.get(word0Index);
+ final String word1 = words.get(word1Index);
+ final int bigramProbability = random.nextInt(0xF);
+ probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
+ unigramProbabilities[word1Index], bigramProbability);
+ binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ }
+
+ binaryDictionary.flushWithGC();
+ binaryDictionary.close();
+ binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ for (int i = 0; i < words.size(); i++) {
+ for (int j = 0; j < words.size(); j++) {
+ assertEquals(probabilities[i][j],
+ binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
+ }
+ }
+ dictFile.delete();
+ }
+
+ public void testRandomOperetionsAndFlashWithGC() {
+ final int flashWithGCIterationCount = 50;
+ final int operationCountInEachIteration = 200;
+ final int initialUnigramCount = 100;
+ final float addUnigramProb = 0.5f;
+ final float addBigramProb = 0.8f;
+ final float removeBigramProb = 0.2f;
+ final int codePointSetSize = 30;
+ final int seed = 141421356;
+
+ final Random random = new Random(seed);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ } catch (UnsupportedFormatException e) {
+ fail("UnsupportedFormatException while writing an initial dictionary : " + e);
+ }
+
+ BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final ArrayList<String> words = new ArrayList<String>();
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+ for (int i = 0; i < initialUnigramCount; ++i) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ words.add(word);
+ final int unigramProbability = random.nextInt(0xFF);
+ unigramProbabilities.put(word, unigramProbability);
+ binaryDictionary.addUnigramWord(word, unigramProbability);
+ }
+ binaryDictionary.flushWithGC();
+ binaryDictionary.close();
+
+ for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
+ binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
+ // Add unigram.
+ if (random.nextFloat() < addUnigramProb) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ words.add(word);
+ final int unigramProbability = random.nextInt(0xFF);
+ unigramProbabilities.put(word, unigramProbability);
+ binaryDictionary.addUnigramWord(word, unigramProbability);
+ }
+ // Add bigram.
+ if (random.nextFloat() < addBigramProb && words.size() > 2) {
+ final int word0Index = random.nextInt(words.size());
+ int word1Index = random.nextInt(words.size() - 1);
+ if (word0Index <= word1Index) {
+ word1Index++;
+ }
+ final String word0 = words.get(word0Index);
+ final String word1 = words.get(word1Index);
+ final int bigramProbability = random.nextInt(0xF);
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ bigramWords.add(bigram);
+ bigramProbabilities.put(bigram, bigramProbability);
+ binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ }
+ // Remove bigram.
+ if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
+ final int bigramIndex = random.nextInt(bigramWords.size());
+ final Pair<String, String> bigram = bigramWords.get(bigramIndex);
+ bigramWords.remove(bigramIndex);
+ bigramProbabilities.remove(bigram);
+ binaryDictionary.removeBigramWords(bigram.first, bigram.second);
+ }
+ }
+
+ // Test whether the all unigram operations are collectlly handled.
+ for (int i = 0; i < words.size(); i++) {
+ final String word = words.get(i);
+ final int unigramProbability = unigramProbabilities.get(word);
+ assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
+ }
+ // Test whether the all bigram operations are collectlly handled.
+ for (int i = 0; i < bigramWords.size(); i++) {
+ final Pair<String, String> bigram = bigramWords.get(i);
+ final int unigramProbability = unigramProbabilities.get(bigram.second);
+ final int probability;
+ if (bigramProbabilities.containsKey(bigram)) {
+ final int bigramProbability = bigramProbabilities.get(bigram);
+ probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ } else {
+ probability = Dictionary.NOT_A_PROBABILITY;
+ }
+ assertEquals(probability,
+ binaryDictionary.getBigramProbability(bigram.first, bigram.second));
+ }
+ binaryDictionary.flushWithGC();
+ binaryDictionary.close();
+ }
+
+ dictFile.delete();
+ }
}