aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/Constants.java2
-rw-r--r--java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java40
-rw-r--r--java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java9
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h9
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java49
10 files changed, 144 insertions, 44 deletions
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java
index 5bc9b6281..67ca59540 100644
--- a/java/src/com/android/inputmethod/latin/Constants.java
+++ b/java/src/com/android/inputmethod/latin/Constants.java
@@ -169,6 +169,8 @@ public final class Constants {
// How many continuous deletes at which to start deleting at a higher speed.
public static final int DELETE_ACCELERATE_AT = 20;
+ public static final String WORD_SEPARATOR = " ";
+
public static boolean isValidCoordinate(final int coordinate) {
// Detect {@link NOT_A_COORDINATE}, {@link SUGGESTION_STRIP_COORDINATE},
// and {@link SPELL_CHECKER_COORDINATE}.
diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java
index ea9691a5a..e0220e137 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java
@@ -56,7 +56,7 @@ public class DictionaryFacilitatorForSuggest {
private boolean mIsUserDictEnabled = false;
private volatile CountDownLatch mLatchForWaitingLoadingMainDictionary = new CountDownLatch(0);
// To synchronize assigning mDictionaries to ensure closing dictionaries.
- private Object mLock = new Object();
+ private final Object mLock = new Object();
private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTION =
new String[] {
@@ -370,32 +370,46 @@ public class DictionaryFacilitatorForSuggest {
}
public void addToUserHistory(final String suggestion, final boolean wasAutoCapitalized,
- final String previousWord, final int timeStampInSeconds) {
+ final String previousWord, final int timeStampInSeconds,
+ final boolean blockPotentiallyOffensive) {
final Dictionaries dictionaries = mDictionaries;
+ final String[] words = suggestion.split(Constants.WORD_SEPARATOR);
+ for (int i = 0; i < words.length; i++) {
+ final String currentWord = words[i];
+ final String prevWord = (i == 0) ? previousWord : words[i - 1];
+ final boolean wasCurrentWordAutoCapitalized = (i == 0) ? wasAutoCapitalized : false;
+ addWordToUserHistory(dictionaries, prevWord, currentWord,
+ wasCurrentWordAutoCapitalized, timeStampInSeconds, blockPotentiallyOffensive);
+ }
+ }
+
+ private void addWordToUserHistory(final Dictionaries dictionaries, final String prevWord,
+ final String word, final boolean wasAutoCapitalized, final int timeStampInSeconds,
+ final boolean blockPotentiallyOffensive) {
final ExpandableBinaryDictionary userHistoryDictionary =
dictionaries.getSubDict(Dictionary.TYPE_USER_HISTORY);
if (userHistoryDictionary == null) {
return;
}
- final int maxFreq = getMaxFrequency(suggestion);
- if (maxFreq == 0) {
+ final int maxFreq = getMaxFrequency(word);
+ if (maxFreq == 0 && blockPotentiallyOffensive) {
return;
}
- final String suggestionLowerCase = suggestion.toLowerCase(dictionaries.mLocale);
+ final String lowerCasedWord = word.toLowerCase(dictionaries.mLocale);
final String secondWord;
if (wasAutoCapitalized) {
- if (isValidWord(suggestion, false /* ignoreCase */)
- && !isValidWord(suggestionLowerCase, false /* ignoreCase */)) {
+ if (isValidWord(word, false /* ignoreCase */)
+ && !isValidWord(lowerCasedWord, false /* ignoreCase */)) {
// If the word was auto-capitalized and exists only as a capitalized word in the
// dictionary, then we must not downcase it before registering it. For example,
// the name of the contacts in start-of-sentence position would come here with the
// wasAutoCapitalized flag: if we downcase it, we'd register a lower-case version
// of that contact's name which would end up popping in suggestions.
- secondWord = suggestion;
+ secondWord = word;
} else {
// If however the word is not in the dictionary, or exists as a lower-case word
// only, then we consider that was a lower-case word that had been auto-capitalized.
- secondWord = suggestionLowerCase;
+ secondWord = lowerCasedWord;
}
} else {
// HACK: We'd like to avoid adding the capitalized form of common words to the User
@@ -403,20 +417,20 @@ public class DictionaryFacilitatorForSuggest {
// consolidation is done.
// TODO: Remove this hack when ready.
final int lowerCaseFreqInMainDict = dictionaries.hasDict(Dictionary.TYPE_MAIN) ?
- dictionaries.getDict(Dictionary.TYPE_MAIN).getFrequency(suggestionLowerCase) :
+ dictionaries.getDict(Dictionary.TYPE_MAIN).getFrequency(lowerCasedWord) :
Dictionary.NOT_A_PROBABILITY;
if (maxFreq < lowerCaseFreqInMainDict
&& lowerCaseFreqInMainDict >= CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT) {
// Use lower cased word as the word can be a distracter of the popular word.
- secondWord = suggestionLowerCase;
+ secondWord = lowerCasedWord;
} else {
- secondWord = suggestion;
+ secondWord = word;
}
}
// We demote unrecognized words (frequency < 0, below) by specifying them as "invalid".
// We don't add words with 0-frequency (assuming they would be profanity etc.).
final boolean isValid = maxFreq > 0;
- UserHistoryDictionary.addToDictionary(userHistoryDictionary, previousWord, secondWord,
+ UserHistoryDictionary.addToDictionary(userHistoryDictionary, prevWord, secondWord,
isValid, timeStampInSeconds);
}
diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
index e80ee35bd..8b795b82f 100644
--- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
+++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
@@ -604,7 +604,7 @@ public final class InputLogic {
if (null != candidate
&& mSuggestedWords.mSequenceNumber >= mAutoCommitSequenceNumber) {
if (candidate.mSourceDict.shouldAutoCommit(candidate)) {
- final String[] commitParts = candidate.mWord.split(" ", 2);
+ final String[] commitParts = candidate.mWord.split(Constants.WORD_SEPARATOR, 2);
batchPointers.shift(candidate.mIndexOfTouchPointOfSecondWord);
promotePhantomSpace(settingsValues);
mConnection.commitText(commitParts[0], 0);
@@ -1241,7 +1241,7 @@ public final class InputLogic {
final int timeStampInSeconds = (int)TimeUnit.MILLISECONDS.toSeconds(
System.currentTimeMillis());
mSuggest.mDictionaryFacilitator.addToUserHistory(suggestion, wasAutoCapitalized, prevWord,
- timeStampInSeconds);
+ timeStampInSeconds, settingsValues.mBlockPotentiallyOffensive);
}
public void performUpdateSuggestionStripSync(final SettingsValues settingsValues) {
@@ -1962,10 +1962,11 @@ public final class InputLogic {
final CharSequence chosenWordWithSuggestions =
SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord,
suggestedWords);
- mConnection.commitText(chosenWordWithSuggestions, 1);
- // TODO: we pass 2 here, but would it be better to move this above and pass 1 instead?
+ // Use the 2nd previous word as the previous word because the 1st previous word is the word
+ // to be committed.
final String prevWord = mConnection.getNthPreviousWord(
settingsValues.mSpacingAndPunctuations, 2);
+ mConnection.commitText(chosenWordWithSuggestions, 1);
// Add the word to the user history dictionary
performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWord);
// TODO: figure out here if this is an auto-correct or if the best word is actually
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 3ac424fea..a3d8ec158 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -335,8 +335,9 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
if (!shortcutTargetCodePoints.empty()) {
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
+ // Use 1 for count to indicate the word has inputed.
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
- probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
+ probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
}
@@ -436,8 +437,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
+ // Use 1 for count to indicate the word has inputed.
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
- unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
+ unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 4975512ff..1645039d3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -257,10 +257,12 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const int timestamp) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ // Use 1 for count to indicate the bigram has inputed.
+ const HistoricalInfo historicalInfoForUpdate(timestamp, 0 /* level */, 1 /* count */);
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalBigramEntry->getHistoricalInfo(), newProbability, timestamp,
- mHeaderPolicy);
+ originalBigramEntry->getHistoricalInfo(), newProbability,
+ &historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 50a3e56e3..cc3a24a22 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -387,11 +387,12 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const UnigramProperty *const unigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
+ unigramProperty->getLevel(), unigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(),
- unigramProperty->getProbability(), unigramProperty->getTimestamp(),
- mHeaderPolicy);
+ unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo);
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 2584fe5b7..9999e0692 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -425,6 +425,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ // TODO: Return code point count like other methods.
+ // Null termination.
+ outCodePoints[0] = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
@@ -441,8 +444,13 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
- outCodePoints, &unigramProbability);
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ if (codePointCount < MAX_WORD_LENGTH) {
+ // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
+ // code points.
+ outCodePoints[codePointCount] = 0;
+ }
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index c7d3df984..fed0ae77e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -30,7 +30,7 @@ const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3;
-const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
+const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1;
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
@@ -41,25 +41,34 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
// TODO: Revise the logic to decide the initial probability depending on the given probability.
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
- const HistoricalInfo *const originalHistoricalInfo,
- const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy) {
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
+ const int timestamp = newHistoricalInfo->getTimeStamp();
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
- return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
- } else if (!originalHistoricalInfo->isValid()) {
+ // Add entry as a valid word.
+ const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
+ } else if (!originalHistoricalInfo->isValid()
+ || originalHistoricalInfo->getLevel() < newHistoricalInfo->getLevel()
+ || (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel()
+ && originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) {
// Initial information.
- return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
+ const int level = clampToValidLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
} else {
const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
// The count exceeds the max value the level can be incremented.
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
// The level is already max.
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
- originalHistoricalInfo->getCount());
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount());
} else {
// Level up.
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
- 0 /* count */);
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel() + 1, 0 /* count */);
}
} else {
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
@@ -73,8 +82,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
headerPolicy->getForgettingCurveDurationToLevelDown());
return sProbabilityTable.getProbability(
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
- std::min(std::max(historicalInfo->getLevel(), 0), MAX_LEVEL),
- std::min(std::max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
+ clampToValidLevelRange(historicalInfo->getLevel()),
+ clampToValidTimeStepCountRange(elapsedTimeStepCount));
}
/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
@@ -155,6 +164,23 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return elapsedTimeInSeconds / timeStepDurationInSeconds;
}
+/* static */ int ForgettingCurveUtils::clampToVisibleEntryLevelRange(const int level) {
+ return std::min(std::max(level, MIN_VISIBLE_LEVEL), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count,
+ const HeaderPolicy *const headerPolicy) {
+ return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) {
+ return std::min(std::max(level, 0), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidTimeStepCountRange(const int timeStepCount) {
+ return std::min(std::max(timeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT);
+}
+
const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 4;
const int ForgettingCurveUtils::ProbabilityTable::WEAK_PROBABILITY_TABLE_ID = 0;
const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID = 1;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index bb8690939..3ff80aeec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -30,7 +30,7 @@ class ForgettingCurveUtils {
public:
static const HistoricalInfo createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
- const int timestamp, const HeaderPolicy *const headerPolicy);
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy);
static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo,
@@ -93,7 +93,7 @@ class ForgettingCurveUtils {
static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL;
- static const int MIN_VALID_LEVEL;
+ static const int MIN_VISIBLE_LEVEL;
static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
@@ -103,8 +103,11 @@ class ForgettingCurveUtils {
static const ProbabilityTable sProbabilityTable;
static int backoff(const int unigramProbability);
-
static int getElapsedTimeStepCount(const int timestamp, const int durationToLevelDown);
+ static int clampToVisibleEntryLevelRange(const int level);
+ static int clampToValidLevelRange(const int level);
+ static int clampToValidCountRange(const int count, const HeaderPolicy *const headerPolicy);
+ static int clampToValidTimeStepCountRange(const int timeStepCount);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index ae2205b36..aed24c56e 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -93,15 +93,17 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException {
- if (formatVersion == FormatSpec.VERSION4) {
- return createEmptyVer4DictionaryAndGetFile(dictId);
+ if (formatVersion == FormatSpec.VERSION4
+ || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING) {
+ return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
} else {
throw new IOException("Dictionary format version " + formatVersion
+ " is not supported.");
}
}
- private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
+ private File createEmptyVer4DictionaryAndGetFile(final String dictId, final int formatVersion)
+ throws IOException {
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
FileUtils.deleteRecursively(file);
@@ -113,7 +115,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY,
DictionaryHeader.ATTRIBUTE_VALUE_TRUE);
- if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4,
+ if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
LocaleUtils.constructLocaleFromString(TEST_LOCALE), attributeMap)) {
return file;
} else {
@@ -562,4 +564,43 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
}
}
}
+
+ public void testDictMigration() {
+ testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4);
+ }
+
+ private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
+ setCurrentTimeForTestMode(mCurrentTime);
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ // TODO: Add tests for bigrams when the implementation gets ready.
+ addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidWord("aaa"));
+ addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidWord("bbb"));
+ addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY);
+
+ assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
+ assertTrue(binaryDictionary.migrateTo(toFormatVersion));
+ assertTrue(binaryDictionary.isValidDictionary());
+ assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
+ assertTrue(binaryDictionary.isValidWord("aaa"));
+ assertFalse(binaryDictionary.isValidWord("bbb"));
+ assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc"));
+ addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY);
+ assertTrue(binaryDictionary.isValidWord("bbb"));
+ binaryDictionary.close();
+ dictFile.delete();
+ }
}