diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java | 215 |
1 files changed, 158 insertions, 57 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index fa70f9988..039330c87 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -21,8 +21,8 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Pair; import com.android.inputmethod.latin.NgramContext.WordInfo; +import com.android.inputmethod.latin.common.CodePointUtils; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; -import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; @@ -32,6 +32,7 @@ import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.LocaleUtils; +import com.android.inputmethod.latin.utils.WordInputEventForPersonalization; import java.io.File; import java.io.IOException; @@ -39,7 +40,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Locale; -import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -49,7 +49,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { private static final String TEST_LOCALE = "test"; private static final int DUMMY_PROBABILITY = 0; private static final int[] DICT_FORMAT_VERSIONS = - new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; + new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403, FormatSpec.VERSION4_DEV }; private static final String DICTIONARY_ID = "TestDecayingBinaryDictionary"; private int mCurrentTime = 0; @@ -59,6 +59,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { super.setUp(); mCurrentTime = 0; mDictFilesToBeDeleted.clear(); + setCurrentTimeForTestMode(mCurrentTime); } @Override @@ -71,12 +72,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { super.tearDown(); } - private static boolean supportsBeginningOfSentence(final int formatVersion) { - return formatVersion > FormatSpec.VERSION401; + private static boolean supportsCountBasedNgram(final int formatVersion) { + return formatVersion >= FormatSpec.VERSION403; } private static boolean supportsNgram(final int formatVersion) { - return formatVersion >= FormatSpec.VERSION4_DEV; + return formatVersion >= FormatSpec.VERSION403; } private void onInputWord(final BinaryDictionary binaryDictionary, final String word, @@ -136,28 +137,29 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { private HashSet<File> mDictFilesToBeDeleted = new HashSet<>(); private File createEmptyDictionaryAndGetFile(final int formatVersion) { - if (formatVersion == FormatSpec.VERSION4 - || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING - || formatVersion == FormatSpec.VERSION4_DEV) { - try { - final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion); - mDictFilesToBeDeleted.add(dictFile); - return dictFile; - } catch (final IOException e) { - fail(e.toString()); - } - } else { - fail("Dictionary format version " + formatVersion + " is not supported."); + return createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion, + new HashMap<String, String>()); + } + + private File createEmptyDictionaryWithAttributeMapAndGetFile(final int formatVersion, + final HashMap<String, String> attributeMap) { + try { + final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion, + attributeMap); + mDictFilesToBeDeleted.add(dictFile); + return dictFile; + } catch (final IOException e) { + fail(e.toString()); } return null; } - private File createEmptyVer4DictionaryAndGetFile(final int formatVersion) + private File createEmptyVer4DictionaryAndGetFile(final int formatVersion, + final HashMap<String, String> attributeMap) throws IOException { final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); FileUtils.deleteRecursively(file); - Map<String, String> attributeMap = new HashMap<>(); attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, DICTIONARY_ID); attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); @@ -168,13 +170,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, LocaleUtils.constructLocaleFromString(TEST_LOCALE), attributeMap)) { return file; - } else { - throw new IOException("Empty dictionary " + file.getAbsolutePath() - + " cannot be created. Foramt version: " + formatVersion); } + throw new IOException("Empty dictionary " + file.getAbsolutePath() + + " cannot be created. Foramt version: " + formatVersion); } - private BinaryDictionary getBinaryDictionary(final File dictFile) { + private static BinaryDictionary getBinaryDictionary(final File dictFile) { return new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); @@ -257,12 +258,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { onInputWord(binaryDictionary, "a", false /* isValidWord */); assertTrue(binaryDictionary.isValidWord("a")); - onInputWord(binaryDictionary, "b", true /* isValidWord */); - assertTrue(binaryDictionary.isValidWord("b")); - onInputWordWithPrevWord(binaryDictionary, "b", false /* isValidWord */, "a"); assertFalse(isValidBigram(binaryDictionary, "a", "b")); onInputWordWithPrevWord(binaryDictionary, "b", false /* isValidWord */, "a"); + assertTrue(binaryDictionary.isValidWord("b")); assertTrue(isValidBigram(binaryDictionary, "a", "b")); onInputWordWithPrevWord(binaryDictionary, "c", true /* isValidWord */, "a"); @@ -278,16 +277,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { return; } - onInputWordWithPrevWords(binaryDictionary, "c", false /* isValidWord */, "b", "a"); - assertFalse(isValidTrigram(binaryDictionary, "a", "b", "c")); - assertFalse(isValidBigram(binaryDictionary, "b", "c")); - onInputWordWithPrevWords(binaryDictionary, "c", false /* isValidWord */, "b", "a"); + onInputWordWithPrevWords(binaryDictionary, "c", true /* isValidWord */, "b", "a"); assertTrue(isValidTrigram(binaryDictionary, "a", "b", "c")); assertTrue(isValidBigram(binaryDictionary, "b", "c")); - - onInputWordWithPrevWords(binaryDictionary, "d", true /* isValidWord */, "b", "a"); - assertTrue(isValidTrigram(binaryDictionary, "a", "b", "d")); - assertTrue(isValidBigram(binaryDictionary, "b", "d")); + onInputWordWithPrevWords(binaryDictionary, "d", false /* isValidWord */, "c", "b"); + assertFalse(isValidTrigram(binaryDictionary, "b", "c", "d")); + assertFalse(isValidBigram(binaryDictionary, "c", "d")); onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "b", "a"); assertTrue(isValidTrigram(binaryDictionary, "a", "b", "cd")); @@ -306,6 +301,13 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { onInputWord(binaryDictionary, "a", true /* isValidWord */); assertTrue(binaryDictionary.isValidWord("a")); forcePassingShortTime(binaryDictionary); + if (supportsCountBasedNgram(formatVersion)) { + // Count based ngram language model doesn't support decaying based on the elapsed time. + assertTrue(binaryDictionary.isValidWord("a")); + } else { + assertFalse(binaryDictionary.isValidWord("a")); + } + forcePassingLongTime(binaryDictionary); assertFalse(binaryDictionary.isValidWord("a")); onInputWord(binaryDictionary, "a", true /* isValidWord */); @@ -321,6 +323,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { onInputWordWithPrevWord(binaryDictionary, "b", true /* isValidWord */, "a"); assertTrue(isValidBigram(binaryDictionary, "a", "b")); forcePassingShortTime(binaryDictionary); + if (supportsCountBasedNgram(formatVersion)) { + assertTrue(isValidBigram(binaryDictionary, "a", "b")); + } else { + assertFalse(isValidBigram(binaryDictionary, "a", "b")); + } + forcePassingLongTime(binaryDictionary); assertFalse(isValidBigram(binaryDictionary, "a", "b")); onInputWord(binaryDictionary, "a", true /* isValidWord */); @@ -343,7 +351,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab"); onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab"); assertTrue(isValidTrigram(binaryDictionary, "ab", "bc", "cd")); - forcePassingShortTime(binaryDictionary); + forcePassingLongTime(binaryDictionary); assertFalse(isValidTrigram(binaryDictionary, "ab", "bc", "cd")); onInputWord(binaryDictionary, "ab", true /* isValidWord */); @@ -388,7 +396,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } final int maxUnigramCount = Integer.parseInt( - binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); + binaryDictionary.getPropertyForGettingStats( + BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); for (int i = 0; i < unigramTypedCount; i++) { final String word = words.get(random.nextInt(words.size())); onInputWord(binaryDictionary, word, true /* isValidWord */); @@ -476,6 +485,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } private void testAddManyBigramsToDecayingDict(final int formatVersion) { + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap<String, String> attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); + final int unigramCount = 5000; final int bigramCount = 30000; final int bigramTypedCount = 100000; @@ -484,7 +499,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { final Random random = new Random(seed); setCurrentTimeForTestMode(mCurrentTime); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion, + attributeMap); final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -507,9 +523,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { bigrams.add(bigram); } - final int maxBigramCount = Integer.parseInt( - binaryDictionary.getPropertyForGettingStats( - BinaryDictionary.MAX_BIGRAM_COUNT_QUERY)); for (int i = 0; i < bigramTypedCount; ++i) { final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size())); onInputWord(binaryDictionary, bigram.first, true /* isValidWord */); @@ -529,7 +542,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { assertTrue(bigramCountBeforeGC > bigramCountAfterGC); } } - + forcePassingShortTime(binaryDictionary); assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats( BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0); assertTrue(Integer.parseInt(binaryDictionary.getPropertyForGettingStats( @@ -546,6 +559,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } private void testOverflowBigrams(final int formatVersion) { + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap<String, String> attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); + final int bigramCount = 20000; final int unigramCount = 1000; final int unigramTypedCount = 20; @@ -556,7 +575,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); setCurrentTimeForTestMode(mCurrentTime); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion, + attributeMap); final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -635,27 +655,43 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { assertFalse(binaryDictionary.isValidWord("bbb")); assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb")); + if (supportsNgram(toFormatVersion)) { + onInputWordWithPrevWords(binaryDictionary, "xyz", true, "abc", "aaa"); + assertTrue(isValidTrigram(binaryDictionary, "aaa", "abc", "xyz")); + onInputWordWithPrevWords(binaryDictionary, "def", false, "abc", "aaa"); + assertFalse(isValidTrigram(binaryDictionary, "aaa", "abc", "def")); + } + assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); assertTrue(binaryDictionary.migrateTo(toFormatVersion)); assertTrue(binaryDictionary.isValidDictionary()); assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); assertTrue(binaryDictionary.isValidWord("aaa")); assertFalse(binaryDictionary.isValidWord("bbb")); - assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc")); - onInputWord(binaryDictionary, "bbb", false /* isValidWord */); - assertTrue(binaryDictionary.isValidWord("bbb")); + if (supportsCountBasedNgram(toFormatVersion)) { + assertTrue(binaryDictionary.getFrequency("aaa") < binaryDictionary.getFrequency("ccc")); + onInputWord(binaryDictionary, "bbb", false /* isValidWord */); + assertTrue(binaryDictionary.isValidWord("bbb")); + } assertTrue(isValidBigram(binaryDictionary, "aaa", "abc")); assertFalse(isValidBigram(binaryDictionary, "aaa", "bbb")); - onInputWordWithPrevWord(binaryDictionary, "bbb", false /* isValidWord */, "aaa"); - assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); + if (supportsCountBasedNgram(toFormatVersion)) { + onInputWordWithPrevWord(binaryDictionary, "bbb", false /* isValidWord */, "aaa"); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); + } + if (supportsNgram(toFormatVersion)) { + assertTrue(isValidTrigram(binaryDictionary, "aaa", "abc", "xyz")); + assertFalse(isValidTrigram(binaryDictionary, "aaa", "abc", "def")); + onInputWordWithPrevWords(binaryDictionary, "def", false, "abc", "aaa"); + assertTrue(isValidTrigram(binaryDictionary, "aaa", "abc", "def")); + } + binaryDictionary.close(); } public void testBeginningOfSentence() { for (final int formatVersion : DICT_FORMAT_VERSIONS) { - if (supportsBeginningOfSentence(formatVersion)) { - testBeginningOfSentence(formatVersion); - } + testBeginningOfSentence(formatVersion); } } @@ -665,9 +701,9 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */, - BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, - true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, - mCurrentTime); + Dictionary.NOT_A_PROBABILITY /* shortcutProbability */, + true /* isBeginningOfSentence */, true /* isNotAWord */, + false /* isPossiblyOffensive */, mCurrentTime); final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */); assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); @@ -683,10 +719,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb")); onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */); - assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); onInputWordWithBeginningOfSentenceContext(binaryDictionary, "aaa", true /* isValidWord */); onInputWordWithBeginningOfSentenceContext(binaryDictionary, "bbb", true /* isValidWord */); - assertFalse(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb")); onInputWordWithBeginningOfSentenceContext(binaryDictionary, "bbb", true /* isValidWord */); assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "aaa")); assertTrue(binaryDictionary.isValidNgram(beginningOfSentenceContext, "bbb")); @@ -713,7 +747,74 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { assertTrue(binaryDictionary.isValidWord("aaa")); assertTrue(binaryDictionary.removeUnigramEntry("aaa")); assertFalse(binaryDictionary.isValidWord("aaa")); - + onInputWord(binaryDictionary, "aaa", false /* isValidWord */); + assertFalse(binaryDictionary.isValidWord("aaa")); + onInputWord(binaryDictionary, "aaa", false /* isValidWord */); + assertTrue(binaryDictionary.isValidWord("aaa")); + assertTrue(binaryDictionary.removeUnigramEntry("aaa")); + assertFalse(binaryDictionary.isValidWord("aaa")); binaryDictionary.close(); } + + public void testUpdateEntriesForInputEvents() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testUpdateEntriesForInputEvents(formatVersion); + } + } + + private void testUpdateEntriesForInputEvents(final int formatVersion) { + setCurrentTimeForTestMode(mCurrentTime); + final int codePointSetSize = 20; + final int EVENT_COUNT = 1000; + final double CONTINUE_RATE = 0.9; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final ArrayList<String> unigrams = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); + final ArrayList<Pair<Pair<String, String>, String>> trigrams = new ArrayList<>(); + + final WordInputEventForPersonalization[] inputEvents = + new WordInputEventForPersonalization[EVENT_COUNT]; + NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; + int prevWordCount = 0; + for (int i = 0; i < inputEvents.length; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + inputEvents[i] = new WordInputEventForPersonalization(word, ngramContext, + true /* isValid */, mCurrentTime); + unigrams.add(word); + if (prevWordCount >= 2) { + final Pair<String, String> prevWordsPair = bigrams.get(bigrams.size() - 1); + trigrams.add(new Pair<>(prevWordsPair, word)); + } + if (prevWordCount >= 1) { + bigrams.add(new Pair<>(ngramContext.getNthPrevWord(1 /* n */).toString(), word)); + } + if (random.nextDouble() > CONTINUE_RATE) { + ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; + prevWordCount = 0; + } else { + ngramContext = ngramContext.getNextNgramContext(new WordInfo(word)); + prevWordCount++; + } + } + final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); + binaryDictionary.updateEntriesForInputEvents(inputEvents); + + for (final String word : unigrams) { + assertTrue(binaryDictionary.isInDictionary(word)); + } + for (final Pair<String, String> bigram : bigrams) { + assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); + } + if (!supportsNgram(formatVersion)) { + return; + } + for (final Pair<Pair<String, String>, String> trigram : trigrams) { + assertTrue(isValidTrigram(binaryDictionary, trigram.first.first, trigram.first.second, + trigram.second)); + } + } } |