diff options
10 files changed, 144 insertions, 44 deletions
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index 5bc9b6281..67ca59540 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -169,6 +169,8 @@ public final class Constants { // How many continuous deletes at which to start deleting at a higher speed. public static final int DELETE_ACCELERATE_AT = 20; + public static final String WORD_SEPARATOR = " "; + public static boolean isValidCoordinate(final int coordinate) { // Detect {@link NOT_A_COORDINATE}, {@link SUGGESTION_STRIP_COORDINATE}, // and {@link SPELL_CHECKER_COORDINATE}. diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java index ea9691a5a..e0220e137 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java +++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java @@ -56,7 +56,7 @@ public class DictionaryFacilitatorForSuggest { private boolean mIsUserDictEnabled = false; private volatile CountDownLatch mLatchForWaitingLoadingMainDictionary = new CountDownLatch(0); // To synchronize assigning mDictionaries to ensure closing dictionaries. - private Object mLock = new Object(); + private final Object mLock = new Object(); private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTION = new String[] { @@ -370,32 +370,46 @@ public class DictionaryFacilitatorForSuggest { } public void addToUserHistory(final String suggestion, final boolean wasAutoCapitalized, - final String previousWord, final int timeStampInSeconds) { + final String previousWord, final int timeStampInSeconds, + final boolean blockPotentiallyOffensive) { final Dictionaries dictionaries = mDictionaries; + final String[] words = suggestion.split(Constants.WORD_SEPARATOR); + for (int i = 0; i < words.length; i++) { + final String currentWord = words[i]; + final String prevWord = (i == 0) ? previousWord : words[i - 1]; + final boolean wasCurrentWordAutoCapitalized = (i == 0) ? wasAutoCapitalized : false; + addWordToUserHistory(dictionaries, prevWord, currentWord, + wasCurrentWordAutoCapitalized, timeStampInSeconds, blockPotentiallyOffensive); + } + } + + private void addWordToUserHistory(final Dictionaries dictionaries, final String prevWord, + final String word, final boolean wasAutoCapitalized, final int timeStampInSeconds, + final boolean blockPotentiallyOffensive) { final ExpandableBinaryDictionary userHistoryDictionary = dictionaries.getSubDict(Dictionary.TYPE_USER_HISTORY); if (userHistoryDictionary == null) { return; } - final int maxFreq = getMaxFrequency(suggestion); - if (maxFreq == 0) { + final int maxFreq = getMaxFrequency(word); + if (maxFreq == 0 && blockPotentiallyOffensive) { return; } - final String suggestionLowerCase = suggestion.toLowerCase(dictionaries.mLocale); + final String lowerCasedWord = word.toLowerCase(dictionaries.mLocale); final String secondWord; if (wasAutoCapitalized) { - if (isValidWord(suggestion, false /* ignoreCase */) - && !isValidWord(suggestionLowerCase, false /* ignoreCase */)) { + if (isValidWord(word, false /* ignoreCase */) + && !isValidWord(lowerCasedWord, false /* ignoreCase */)) { // If the word was auto-capitalized and exists only as a capitalized word in the // dictionary, then we must not downcase it before registering it. For example, // the name of the contacts in start-of-sentence position would come here with the // wasAutoCapitalized flag: if we downcase it, we'd register a lower-case version // of that contact's name which would end up popping in suggestions. - secondWord = suggestion; + secondWord = word; } else { // If however the word is not in the dictionary, or exists as a lower-case word // only, then we consider that was a lower-case word that had been auto-capitalized. - secondWord = suggestionLowerCase; + secondWord = lowerCasedWord; } } else { // HACK: We'd like to avoid adding the capitalized form of common words to the User @@ -403,20 +417,20 @@ public class DictionaryFacilitatorForSuggest { // consolidation is done. // TODO: Remove this hack when ready. final int lowerCaseFreqInMainDict = dictionaries.hasDict(Dictionary.TYPE_MAIN) ? - dictionaries.getDict(Dictionary.TYPE_MAIN).getFrequency(suggestionLowerCase) : + dictionaries.getDict(Dictionary.TYPE_MAIN).getFrequency(lowerCasedWord) : Dictionary.NOT_A_PROBABILITY; if (maxFreq < lowerCaseFreqInMainDict && lowerCaseFreqInMainDict >= CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT) { // Use lower cased word as the word can be a distracter of the popular word. - secondWord = suggestionLowerCase; + secondWord = lowerCasedWord; } else { - secondWord = suggestion; + secondWord = word; } } // We demote unrecognized words (frequency < 0, below) by specifying them as "invalid". // We don't add words with 0-frequency (assuming they would be profanity etc.). final boolean isValid = maxFreq > 0; - UserHistoryDictionary.addToDictionary(userHistoryDictionary, previousWord, secondWord, + UserHistoryDictionary.addToDictionary(userHistoryDictionary, prevWord, secondWord, isValid, timeStampInSeconds); } diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java index e80ee35bd..8b795b82f 100644 --- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java +++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java @@ -604,7 +604,7 @@ public final class InputLogic { if (null != candidate && mSuggestedWords.mSequenceNumber >= mAutoCommitSequenceNumber) { if (candidate.mSourceDict.shouldAutoCommit(candidate)) { - final String[] commitParts = candidate.mWord.split(" ", 2); + final String[] commitParts = candidate.mWord.split(Constants.WORD_SEPARATOR, 2); batchPointers.shift(candidate.mIndexOfTouchPointOfSecondWord); promotePhantomSpace(settingsValues); mConnection.commitText(commitParts[0], 0); @@ -1241,7 +1241,7 @@ public final class InputLogic { final int timeStampInSeconds = (int)TimeUnit.MILLISECONDS.toSeconds( System.currentTimeMillis()); mSuggest.mDictionaryFacilitator.addToUserHistory(suggestion, wasAutoCapitalized, prevWord, - timeStampInSeconds); + timeStampInSeconds, settingsValues.mBlockPotentiallyOffensive); } public void performUpdateSuggestionStripSync(final SettingsValues settingsValues) { @@ -1962,10 +1962,11 @@ public final class InputLogic { final CharSequence chosenWordWithSuggestions = SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord, suggestedWords); - mConnection.commitText(chosenWordWithSuggestions, 1); - // TODO: we pass 2 here, but would it be better to move this above and pass 1 instead? + // Use the 2nd previous word as the previous word because the 1st previous word is the word + // to be committed. final String prevWord = mConnection.getNthPreviousWord( settingsValues.mSpacingAndPunctuations, 2); + mConnection.commitText(chosenWordWithSuggestions, 1); // Add the word to the user history dictionary performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWord); // TODO: figure out here if this is an auto-correct or if the best word is actually diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 3ac424fea..a3d8ec158 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -335,8 +335,9 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, if (!shortcutTargetCodePoints.empty()) { shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } + // Use 1 for count to indicate the word has inputed. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, - probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts); + probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty); } @@ -436,8 +437,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j env->GetIntField(languageModelParam, shortcutProbabilityFieldId); shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } + // Use 1 for count to indicate the word has inputed. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, - unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts); + unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty); if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 4975512ff..1645039d3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -257,10 +257,12 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const int timestamp) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + // Use 1 for count to indicate the bigram has inputed. + const HistoricalInfo historicalInfoForUpdate(timestamp, 0 /* level */, 1 /* count */); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalBigramEntry->getHistoricalInfo(), newProbability, timestamp, - mHeaderPolicy); + originalBigramEntry->getHistoricalInfo(), newProbability, + &historicalInfoForUpdate, mHeaderPolicy); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); } else { return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 50a3e56e3..cc3a24a22 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -387,11 +387,12 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const UnigramProperty *const unigramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(), + unigramProperty->getLevel(), unigramProperty->getCount()); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( originalProbabilityEntry->getHistoricalInfo(), - unigramProperty->getProbability(), unigramProperty->getTimestamp(), - mHeaderPolicy); + unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy); return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( &updatedHistoricalInfo); } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 2584fe5b7..9999e0692 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -425,6 +425,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { + // TODO: Return code point count like other methods. + // Null termination. + outCodePoints[0] = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( @@ -441,8 +444,13 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, - outCodePoints, &unigramProbability); + const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); + if (codePointCount < MAX_WORD_LENGTH) { + // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH + // code points. + outCodePoints[codePointCount] = 0; + } const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index c7d3df984..fed0ae77e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -30,7 +30,7 @@ const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const int ForgettingCurveUtils::MAX_LEVEL = 3; -const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1; +const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1; const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15; const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14; @@ -41,25 +41,34 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT // TODO: Revise the logic to decide the initial probability depending on the given probability. /* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo( - const HistoricalInfo *const originalHistoricalInfo, - const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy) { + const HistoricalInfo *const originalHistoricalInfo, const int newProbability, + const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) { + const int timestamp = newHistoricalInfo->getTimeStamp(); if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { - return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */); - } else if (!originalHistoricalInfo->isValid()) { + // Add entry as a valid word. + const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel()); + const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy); + return HistoricalInfo(timestamp, level, count); + } else if (!originalHistoricalInfo->isValid() + || originalHistoricalInfo->getLevel() < newHistoricalInfo->getLevel() + || (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel() + && originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) { // Initial information. - return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */); + const int level = clampToValidLevelRange(newHistoricalInfo->getLevel()); + const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy); + return HistoricalInfo(timestamp, level, count); } else { const int updatedCount = originalHistoricalInfo->getCount() + 1; if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) { // The count exceeds the max value the level can be incremented. if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) { // The level is already max. - return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), - originalHistoricalInfo->getCount()); + return HistoricalInfo(timestamp, + originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount()); } else { // Level up. - return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1, - 0 /* count */); + return HistoricalInfo(timestamp, + originalHistoricalInfo->getLevel() + 1, 0 /* count */); } } else { return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount); @@ -73,8 +82,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT headerPolicy->getForgettingCurveDurationToLevelDown()); return sProbabilityTable.getProbability( headerPolicy->getForgettingCurveProbabilityValuesTableId(), - std::min(std::max(historicalInfo->getLevel(), 0), MAX_LEVEL), - std::min(std::max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT)); + clampToValidLevelRange(historicalInfo->getLevel()), + clampToValidTimeStepCountRange(elapsedTimeStepCount)); } /* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability, @@ -155,6 +164,23 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT return elapsedTimeInSeconds / timeStepDurationInSeconds; } +/* static */ int ForgettingCurveUtils::clampToVisibleEntryLevelRange(const int level) { + return std::min(std::max(level, MIN_VISIBLE_LEVEL), MAX_LEVEL); +} + +/* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count, + const HeaderPolicy *const headerPolicy) { + return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1); +} + +/* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) { + return std::min(std::max(level, 0), MAX_LEVEL); +} + +/* static */ int ForgettingCurveUtils::clampToValidTimeStepCountRange(const int timeStepCount) { + return std::min(std::max(timeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT); +} + const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 4; const int ForgettingCurveUtils::ProbabilityTable::WEAK_PROBABILITY_TABLE_ID = 0; const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID = 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index bb8690939..3ff80aeec 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -30,7 +30,7 @@ class ForgettingCurveUtils { public: static const HistoricalInfo createUpdatedHistoricalInfo( const HistoricalInfo *const originalHistoricalInfo, const int newProbability, - const int timestamp, const HeaderPolicy *const headerPolicy); + const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy); static const HistoricalInfo createHistoricalInfoToSave( const HistoricalInfo *const originalHistoricalInfo, @@ -93,7 +93,7 @@ class ForgettingCurveUtils { static const int DECAY_INTERVAL_SECONDS; static const int MAX_LEVEL; - static const int MIN_VALID_LEVEL; + static const int MIN_VISIBLE_LEVEL; static const int MAX_ELAPSED_TIME_STEP_COUNT; static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; @@ -103,8 +103,11 @@ class ForgettingCurveUtils { static const ProbabilityTable sProbabilityTable; static int backoff(const int unigramProbability); - static int getElapsedTimeStepCount(const int timestamp, const int durationToLevelDown); + static int clampToVisibleEntryLevelRange(const int level); + static int clampToValidLevelRange(const int level); + static int clampToValidCountRange(const int count, const HeaderPolicy *const headerPolicy); + static int clampToValidTimeStepCountRange(const int timeStepCount); }; } // namespace latinime #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */ diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index ae2205b36..aed24c56e 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -93,15 +93,17 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { private File createEmptyDictionaryAndGetFile(final String dictId, final int formatVersion) throws IOException { - if (formatVersion == FormatSpec.VERSION4) { - return createEmptyVer4DictionaryAndGetFile(dictId); + if (formatVersion == FormatSpec.VERSION4 + || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING) { + return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); } else { throw new IOException("Dictionary format version " + formatVersion + " is not supported."); } } - private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException { + private File createEmptyVer4DictionaryAndGetFile(final String dictId, final int formatVersion) + throws IOException { final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); FileUtils.deleteRecursively(file); @@ -113,7 +115,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { DictionaryHeader.ATTRIBUTE_VALUE_TRUE); attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY, DictionaryHeader.ATTRIBUTE_VALUE_TRUE); - if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4, + if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, LocaleUtils.constructLocaleFromString(TEST_LOCALE), attributeMap)) { return file; } else { @@ -562,4 +564,43 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } } } + + public void testDictMigration() { + testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4); + } + + private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { + setCurrentTimeForTestMode(mCurrentTime); + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + // TODO: Add tests for bigrams when the implementation gets ready. + addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY); + assertTrue(binaryDictionary.isValidWord("aaa")); + addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidWord("bbb")); + addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY); + addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY); + addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY); + addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY); + addUnigramWord(binaryDictionary, "ccc", DUMMY_PROBABILITY); + + assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); + assertTrue(binaryDictionary.isValidWord("aaa")); + assertFalse(binaryDictionary.isValidWord("bbb")); + assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc")); + addUnigramWord(binaryDictionary, "bbb", Dictionary.NOT_A_PROBABILITY); + assertTrue(binaryDictionary.isValidWord("bbb")); + binaryDictionary.close(); + dictFile.delete(); + } } |