aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java7
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java5
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp100
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h2
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java42
16 files changed, 169 insertions, 67 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 83ee982b1..94a1e3658 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -373,8 +373,7 @@ public final class BinaryDictionary extends Dictionary {
return getProbabilityNative(mNativeDict, codePoints);
}
- // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
- // calls when checking for changes in an entire dictionary.
+ @UsedForTesting
public boolean isValidBigram(final String word0, final String word1) {
return getBigramProbability(word0, word1) != NOT_A_PROBABILITY;
}
@@ -541,7 +540,9 @@ public final class BinaryDictionary extends Dictionary {
close();
final File dictFile = new File(mDictFilePath);
final File tmpDictFile = new File(tmpDictFilePath);
- FileUtils.deleteRecursively(dictFile);
+ if (!FileUtils.deleteRecursively(dictFile)) {
+ return false;
+ }
if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
return false;
}
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index b41981712..6818c156e 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -112,8 +112,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
}
private boolean needsToMigrateDictionary(final int formatVersion) {
- // TODO: Check version.
- return false;
+ // When we bump up the dictionary format version, the old version should be added to here
+ // for supporting migration. Note that native code has to support reading such formats.
+ return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING;
}
public boolean isValidDictionaryLocked() {
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index f25503488..613ff2ba4 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -186,7 +186,12 @@ public final class FormatSpec {
// From version 4 on, we use version * 100 + revision as a version number. That allows
// us to change the format during development while having testing devices remove
// older files with each upgrade, while still having a readable versioning scheme.
+ // When we bump up the dictionary format version, we should update
+ // ExpandableDictionary.needsToMigrateDictionary() and
+ // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
public static final int VERSION2 = 2;
+ // Dictionary version used for testing.
+ public static final int VERSION4_ONLY_FOR_TESTING = 399;
public static final int VERSION4 = 401;
static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 04e768fbd..4975512ff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -50,12 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
+ // 1. The word has no bigrams yet.
+ // 2. The word has bigrams, and there is the target in the list.
+ // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
+ // 4. The word has bigrams. We have to append new bigram entry to the list.
+ // 5. Same as 4, but the list is the last entry of the content file.
+
if (outAddedNewEntry) {
*outAddedNewEntry = false;
}
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
- // Updating PtNode that doesn't have a bigram list.
+ // Case 1. PtNode that doesn't have a bigram list.
// Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
@@ -75,50 +81,55 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
return true;
}
- const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
- if (entryPosToUpdate != NOT_A_DICT_POS) {
- // Overwrite existing entry.
- const BigramEntry originalBigramEntry =
- mBigramDictContent->getBigramEntry(entryPosToUpdate);
- if (!originalBigramEntry.isValid()) {
- // Reuse invalid entry.
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
+ int tailEntryPos = NOT_A_DICT_POS;
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
+ &tailEntryPos);
+ if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5.
+ // Add new entry to the bigram list.
+ if (tailEntryPos == NOT_A_DICT_POS) {
+ // Case 4. Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ // Copy existing bigram list.
+ if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
+ return false;
}
}
- const BigramEntry updatedBigramEntry =
- originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ // Write new entry at the tail position of the bigram content.
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &updatedBigramEntry, newProbability, timestamp);
- return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+ &newBigramEntry, newProbability, timestamp);
+ if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
+ return false;
+ }
+ // Update has next flag of the tail entry.
+ if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
}
- // Add new entry to the bigram list.
- // Create new bigram list.
- if (!mBigramDictContent->createNewBigramList(terminalId)) {
- return false;
- }
- int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- int tailEntryPos = NOT_A_DICT_POS;
- // Copy existing bigram list.
- if (!mBigramDictContent->copyBigramList(bigramListPos, writingPos, &tailEntryPos)) {
- return false;
+ // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
+ const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
+ // entry is updated.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
}
- // Write new entry at the tail position of the bigram content.
- const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &newBigramEntry, newProbability, timestamp);
- if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
- return false;
- }
- // Update has next flag of the tail entry.
- if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
- return false;
- }
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- return true;
+ &updatedBigramEntry, newProbability, timestamp);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
@@ -127,7 +138,8 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram list doesn't exist.
return false;
}
- const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
+ nullptr /* outTailEntryPos */);
if (entryPosToUpdate == NOT_A_DICT_POS) {
// Bigram entry doesn't exist.
return false;
@@ -212,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
}
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
- const int bigramListPos) const {
+ const int bigramListPos, int *const outTailEntryPos) const {
+ if (outTailEntryPos) {
+ *outTailEntryPos = NOT_A_DICT_POS;
+ }
bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
@@ -228,6 +243,11 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
+ if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ }
}
return invalidEntryPos;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index d8f7be631..c1f33359b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -56,7 +56,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
- int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
+ int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const int newProbability, const int timestamp) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index e4a6dc594..da24302c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -139,6 +139,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
switch (mDictFormatVersion) {
case FormatUtils::VERSION_2:
return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
return FormatUtils::VERSION_4;
default:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 5608e27d4..2a9028a9e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -98,8 +98,9 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
case FormatUtils::VERSION_2:
// Version 2 dictionary writing is not supported.
return false;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
- return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */,
+ return buffer->writeUintAndAdvancePosition(version /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index be7e43b98..c4d18608c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -52,9 +52,11 @@ namespace latinime {
DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict(
const int formatVersion, const std::vector<int> &locale,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
- switch (formatVersion) {
+ FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
+ switch (dictFormatVersion) {
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4: {
- HeaderPolicy headerPolicy(FormatUtils::VERSION_4, locale, attributeMap);
+ HeaderPolicy headerPolicy(dictFormatVersion, locale, attributeMap);
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
@@ -87,11 +89,13 @@ namespace latinime {
if (!mmappedBuffer) {
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
}
- switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
- mmappedBuffer->getBufferSize())) {
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
+ mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
+ switch (formatVersion) {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
break;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4: {
const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
char dictPath[dictDirPathBufSize];
@@ -102,7 +106,8 @@ namespace latinime {
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
}
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
- Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer)));
+ Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer),
+ formatVersion));
if (!dictBuffers || !dictBuffers->isValid()) {
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
path);
@@ -135,6 +140,7 @@ namespace latinime {
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
AKLOGE("Given path is a file but the format is version 4. path: %s", path);
break;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index 40ece7636..944e0f9e2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -88,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount);
+ bool isContentTailPos(const int pos) const {
+ return pos == getContentBuffer()->getTailPosition();
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 95f654498..77ed38b89 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -27,7 +27,8 @@
namespace latinime {
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
- const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer) {
+ const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
if (!headerBuffer) {
ASSERT(false);
AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
@@ -35,7 +36,8 @@ namespace latinime {
}
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
const bool isUpdatable = headerBuffer->isUpdatable();
- return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable));
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
+ formatVersion));
}
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
@@ -113,11 +115,12 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
}
Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
- MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable)
+ MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion)
: mHeaderBuffer(std::move(headerBuffer)),
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
- mHeaderPolicy(mHeaderBuffer->getBuffer(), FormatUtils::VERSION_4),
+ mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
mHeaderPolicy.getSize(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index fc41432f4..df177c14a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -36,7 +36,8 @@ class Ver4DictBuffers {
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
- MmappedBuffer::MmappedBufferPtr headerBuffer);
+ MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion);
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
@@ -120,7 +121,8 @@ class Ver4DictBuffers {
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
Ver4DictBuffers(const char *const dictDirPath,
- const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable);
+ const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion);
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 87fa5994c..7bc7b0a48 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -34,9 +34,12 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
const int dictVersion, const std::vector<int> localeAsCodePointVector,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
TimeKeeper::setCurrentTime();
- switch (dictVersion) {
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
+ switch (formatVersion) {
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
- return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap);
+ return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap,
+ formatVersion);
default:
AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
filePath, dictVersion);
@@ -46,8 +49,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
const std::vector<int> localeAsCodePointVector,
- const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
- HeaderPolicy headerPolicy(FormatUtils::VERSION_4, localeAsCodePointVector, attributeMap);
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap);
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE));
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index 54ec651f7..a822989db 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
@@ -46,7 +47,8 @@ class DictFileWritingUtils {
static bool createEmptyV4DictFile(const char *const filePath,
const std::vector<int> localeAsCodePointVector,
- const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
static bool flushBufferToFile(const char *const filePath,
const BufferWithExtendableBuffer *const buffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index cd3c403fa..a8518cdca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -25,6 +25,18 @@ const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
+ switch (formatVersion) {
+ case VERSION_2:
+ return VERSION_2;
+ case VERSION_4_ONLY_FOR_TESTING:
+ return VERSION_4_ONLY_FOR_TESTING;
+ case VERSION_4:
+ return VERSION_4;
+ default:
+ return UNKNOWN_VERSION;
+ }
+}
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
const uint8_t *const dict, const int dictSize) {
// The magic number is stored big-endian.
@@ -46,6 +58,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// same so we use them for both here.
if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
+ return VERSION_4_ONLY_FOR_TESTING;
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
return VERSION_4;
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 759b1c9b2..20dfb9d8c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,6 +31,7 @@ class FormatUtils {
enum FORMAT_VERSION {
// These MUST have the same values as the relevant constants in FormatSpec.java.
VERSION_2 = 2,
+ VERSION_4_ONLY_FOR_TESTING = 399,
VERSION_4 = 401,
UNKNOWN_VERSION = -1
};
@@ -39,6 +40,7 @@ class FormatUtils {
// unsupported or obsolete dictionary formats.
static const uint32_t MAGIC_NUMBER;
+ static FORMAT_VERSION getFormatVersion(const int formatVersion);
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 0fb0fa587..9ceafa705 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -46,21 +46,23 @@ public class BinaryDictionaryTests extends AndroidTestCase {
private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException {
- if (formatVersion == FormatSpec.VERSION4) {
- return createEmptyVer4DictionaryAndGetFile(dictId);
+ if (formatVersion == FormatSpec.VERSION4
+ || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING) {
+ return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
} else {
throw new IOException("Dictionary format version " + formatVersion
+ " is not supported.");
}
}
- private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
+ private File createEmptyVer4DictionaryAndGetFile(final String dictId,
+ final int formatVersion) throws IOException {
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
file.delete();
file.mkdir();
Map<String, String> attributeMap = new HashMap<String, String>();
- if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4,
+ if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
Locale.ENGLISH, attributeMap)) {
return file;
} else {
@@ -1223,4 +1225,36 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
}
}
+
+ public void testDictMigration() {
+ testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4);
+ }
+
+ private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final int unigramProbability = 100;
+ addUnigramWord(binaryDictionary, "aaa", unigramProbability);
+ addUnigramWord(binaryDictionary, "bbb", unigramProbability);
+ final int bigramProbability = 10;
+ addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
+ assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
+ assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
+ assertTrue(binaryDictionary.migrateTo(toFormatVersion));
+ assertTrue(binaryDictionary.isValidDictionary());
+ assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
+ // TODO: Add tests for bigram frequency when the implementation gets ready.
+ assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
+ }
}