diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 110 |
1 files changed, 30 insertions, 80 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index a640a9835..a1ae93c2f 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -22,14 +22,14 @@ import android.text.TextUtils; import android.util.Pair; import com.android.inputmethod.latin.NgramContext.WordInfo; -import com.android.inputmethod.latin.makedict.CodePointUtils; +import com.android.inputmethod.latin.common.CodePointUtils; +import com.android.inputmethod.latin.common.Constants; import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.WeightedString; import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; import com.android.inputmethod.latin.utils.FileUtils; -import com.android.inputmethod.latin.utils.LanguageModelParam; import java.io.File; import java.io.IOException; @@ -37,7 +37,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Locale; -import java.util.Map; import java.util.Random; // TODO Use the seed passed as an argument for makedict test. @@ -111,13 +110,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, Locale.ENGLISH, attributeMap)) { return file; - } else { - throw new IOException("Empty dictionary " + file.getAbsolutePath() - + " cannot be created. Format version: " + formatVersion); } + throw new IOException("Empty dictionary " + file.getAbsolutePath() + + " cannot be created. Format version: " + formatVersion); } - private BinaryDictionary getBinaryDictionary(final File dictFile) { + private static BinaryDictionary getBinaryDictionary(final File dictFile) { return new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); @@ -200,7 +198,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { // Too long short cut. binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, 10 /* shortcutProbability */, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isBlacklisted */, + false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); addUnigramWord(binaryDictionary, "abc", probability); final int updatedProbability = 200; @@ -211,17 +209,17 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("aaa")); assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); - assertEquals(BinaryDictionary.NOT_A_PROBABILITY, - binaryDictionary.getFrequency(invalidLongWord)); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord)); assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); } private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability) { binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, - BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + Dictionary.NOT_A_PROBABILITY /* shortcutProbability */, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + false /* isPossiblyOffensive */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, @@ -885,63 +883,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } - public void testAddMultipleDictionaryEntries() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddMultipleDictionaryEntries(formatVersion); - } - } - - private void testAddMultipleDictionaryEntries(final int formatVersion) { - final int codePointSetSize = 20; - final int lmParamCount = 1000; - final double bigramContinueRate = 0.9; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); - - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - - final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; - String prevWord = null; - for (int i = 0; i < languageModelParams.length; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int probability = random.nextInt(0xFF); - final int bigramProbability = probability + random.nextInt(0xFF - probability); - unigramProbabilities.put(word, probability); - if (prevWord == null) { - languageModelParams[i] = new LanguageModelParam(word, probability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP); - } else { - languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, - bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - bigramProbabilities.put(new Pair<>(prevWord, word), - bigramProbability); - } - prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; - } - - final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - binaryDictionary.addMultipleDictionaryEntries(languageModelParams); - - for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { - assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); - } - - for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { - final String word0 = entry.getKey().first; - final String word1 = entry.getKey().second; - final int bigramProbability = entry.getValue(); - assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, - isValidBigram(binaryDictionary, word0, word1)); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, word0, word1)); - } - } - } - public void testGetWordProperties() { for (final int formatVersion : DICT_FORMAT_VERSIONS) { testGetWordProperties(formatVersion); @@ -971,11 +912,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { final String word = CodePointUtils.generateWord(random, codePointSet); final int unigramProbability = random.nextInt(0xFF); final boolean isNotAWord = random.nextBoolean(); - final boolean isBlacklisted = random.nextBoolean(); + final boolean isPossiblyOffensive = random.nextBoolean(); // TODO: Add tests for historical info. binaryDictionary.addUnigramEntry(word, unigramProbability, - null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, - false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, + null /* shortcutTarget */, Dictionary.NOT_A_PROBABILITY, + false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { binaryDictionary.flushWithGC(); @@ -987,7 +928,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(word, wordProperty.mWord); assertTrue(wordProperty.isValid()); assertEquals(isNotAWord, wordProperty.mIsNotAWord); - assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); + assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive); assertEquals(false, wordProperty.mHasNgrams); assertEquals(false, wordProperty.mHasShortcuts); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); @@ -1142,7 +1083,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int shortcutProbability = 10; binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", shortcutProbability, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); + false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */); WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", false /* isBeginningOfSentence */); assertEquals(1, wordProperty.mShortcutTargets.size()); @@ -1151,7 +1092,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int updatedShortcutProbability = 2; binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", updatedShortcutProbability, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); + false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */); wordProperty = binaryDictionary.getWordProperty("aaa", false /* isBeginningOfSentence */); assertEquals(1, wordProperty.mShortcutTargets.size()); @@ -1160,7 +1101,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { wordProperty.mShortcutTargets.get(0).getProbability()); binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, 0 /* timestamp */); + false /* isPossiblyOffensive */, 0 /* timestamp */); final HashMap<String, Integer> shortcutTargets = new HashMap<>(); shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); @@ -1223,7 +1164,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = unigramProbabilities.get(word); binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, 0 /* timestamp */); + false /* isPossiblyOffensive */, 0 /* timestamp */); if (shortcutTargets.containsKey(word)) { final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); @@ -1255,6 +1196,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } + public void testPossiblyOffensiveAttributeMaintained() { + final BinaryDictionary binaryDictionary = + getEmptyBinaryDictionary(FormatSpec.VERSION4_DEV); + binaryDictionary.addUnigramEntry("ddd", 100, null, Dictionary.NOT_A_PROBABILITY, + false, true, true, 0); + WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false); + assertEquals(true, wordProperty.mIsPossiblyOffensive); + } + public void testDictMigration() { for (final int formatVersion : DICT_FORMAT_VERSIONS) { testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); @@ -1271,10 +1221,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int shortcutProbability = 10; binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, 0 /* timestamp */); + false /* isPossiblyOffensive */, 0 /* timestamp */); binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, - true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); + true /* isNotAWord */, true /* isPossiblyOffensive */, 0 /* timestamp */); binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE, "aaa", bigramProbability, 0 /* timestamp */); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); @@ -1298,7 +1248,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); wordProperty = binaryDictionary.getWordProperty("ddd", false /* isBeginningOfSentence */); - assertTrue(wordProperty.mIsBlacklistEntry); + assertTrue(wordProperty.mIsPossiblyOffensive); assertTrue(wordProperty.mIsNotAWord); } |