diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 114 |
1 files changed, 84 insertions, 30 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 73b6fdc3b..e39b46f94 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -26,7 +26,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.LanguageModelParam; -import com.android.inputmethod.latin.utils.UnigramProperty; +import com.android.inputmethod.latin.utils.WordProperty; import java.io.File; import java.io.IOException; @@ -871,14 +871,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } - public void testGetUnigramProperties() { - testGetUnigramProperties(FormatSpec.VERSION4); + public void testGetWordProperties() { + testGetWordProperties(FormatSpec.VERSION4); } - private void testGetUnigramProperties(final int formatVersion) { + private void testGetWordProperties(final int formatVersion) { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final int ITERATION_COUNT = 1000; + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -892,11 +893,16 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final UnigramProperty invalidUnigramProperty = - binaryDictionary.getUnigramProperty("dummyWord"); - assertFalse(invalidUnigramProperty.isValid()); + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = + new HashMap<Pair<String, String>, Integer>(); - for (int i = 0; i < ITERATION_COUNT; i++) { + for (int i = 0; i < UNIGRAM_COUNT; i++) { final String word = CodePointUtils.generateWord(random, codePointSet); final int unigramProbability = random.nextInt(0xFF); final boolean isNotAWord = random.nextBoolean(); @@ -905,17 +911,64 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramWord(word, unigramProbability, null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - final UnigramProperty unigramProperty = - binaryDictionary.getUnigramProperty(word); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilities.put(word, unigramProbability); + final WordProperty unigramProperty = binaryDictionary.getWordProperty(word); assertEquals(word, unigramProperty.mCodePoints); assertTrue(unigramProperty.isValid()); assertEquals(isNotAWord, unigramProperty.mIsNotAWord); assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted); assertEquals(false, unigramProperty.mHasBigrams); assertEquals(false, unigramProperty.mHasShortcuts); - assertEquals(unigramProbability, unigramProperty.mProbability); + assertEquals(unigramProbability, unigramProperty.mProbabilityInfo.mProbability); assertTrue(unigramProperty.mShortcutTargets.isEmpty()); } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilities.size()); + final int word1Index = random.nextInt(wordProbabilities.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<String>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability); + } + + for (int i = 0; i < words.size(); i++) { + final String word0 = words.get(i); + if (!bigrams.containsKey(word0)) { + continue; + } + final HashSet<String> bigramWord1s = bigrams.get(word0); + final WordProperty unigramProperty = binaryDictionary.getWordProperty(word0); + assertEquals(bigramWord1s.size(), unigramProperty.mBigramTargets.size()); + assertEquals(unigramProperty.mBigramTargets.size(), + unigramProperty.mBigramProbabilityInfo.size()); + for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) { + final String word1 = unigramProperty.mBigramTargets.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final int probability = unigramProperty.mBigramTargets.get(j).mFrequency; + assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)), + probability); + assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability, + probability); + } + } } public void testAddShortcuts() { @@ -938,28 +991,28 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); - UnigramProperty unigramProperty = binaryDictionary.getUnigramProperty("aaa"); - assertEquals(1, unigramProperty.mShortcutTargets.size()); - assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord); - assertEquals(shortcutProbability, unigramProperty.mShortcutTargets.get(0).mFrequency); + WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).mFrequency); final int updatedShortcutProbability = 2; binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); - unigramProperty = binaryDictionary.getUnigramProperty("aaa"); - assertEquals(1, unigramProperty.mShortcutTargets.size()); - assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord); + wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(updatedShortcutProbability, - unigramProperty.mShortcutTargets.get(0).mFrequency); + wordProperty.mShortcutTargets.get(0).mFrequency); binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy", shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>(); shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); - unigramProperty = binaryDictionary.getUnigramProperty("aaa"); - assertEquals(2, unigramProperty.mShortcutTargets.size()); - for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) { + wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency); shortcutTargets.remove(shortcutTarget.mWord); @@ -967,9 +1020,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); binaryDictionary.flushWithGC(); - unigramProperty = binaryDictionary.getUnigramProperty("aaa"); - assertEquals(2, unigramProperty.mShortcutTargets.size()); - for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) { + wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency); shortcutTargets.remove(shortcutTarget.mWord); @@ -1036,14 +1089,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { } for (final String word : words) { - final UnigramProperty unigramProperty = binaryDictionary.getUnigramProperty(word); - assertEquals((int)unigramProbabilities.get(word), unigramProperty.mProbability); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + assertEquals((int)unigramProbabilities.get(word), + wordProperty.mProbabilityInfo.mProbability); if (!shortcutTargets.containsKey(word)) { // The word does not have shortcut targets. continue; } - assertEquals(shortcutTargets.get(word).size(), unigramProperty.mShortcutTargets.size()); - for (final WeightedString shortcutTarget : unigramProperty.mShortcutTargets) { + assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); + for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { final String targetCodePonts = shortcutTarget.mWord; assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), shortcutTarget.mFrequency); |