diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 712 |
1 files changed, 625 insertions, 87 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 5b8f0e977..0fb0fa587 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -23,6 +23,11 @@ import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.WeightedString; +import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; +import com.android.inputmethod.latin.utils.FileUtils; +import com.android.inputmethod.latin.utils.LanguageModelParam; import java.io.File; import java.io.IOException; @@ -33,39 +38,45 @@ import java.util.Locale; import java.util.Map; import java.util.Random; +// TODO Use the seed passed as an argument for makedict test. @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; - @Override - protected void setUp() throws Exception { - super.setUp(); - } - - @Override - protected void tearDown() throws Exception { - super.tearDown(); + private File createEmptyDictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { + if (formatVersion == FormatSpec.VERSION4) { + return createEmptyVer4DictionaryAndGetFile(dictId); + } else { + throw new IOException("Dictionary format version " + formatVersion + + " is not supported."); + } } - private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { - final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, + private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException { + final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); + file.delete(); + file.mkdir(); Map<String, String> attributeMap = new HashMap<String, String>(); - attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), - 3 /* dictVersion */, attributeMap)) { + if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4, + Locale.ENGLISH, attributeMap)) { return file; } else { - throw new IOException("Empty dictionary cannot be created."); + throw new IOException("Empty dictionary " + file.getAbsolutePath() + + " cannot be created."); } } public void testIsValidDictionary() { + testIsValidDictionary(FormatSpec.VERSION4); + } + + private void testIsValidDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -77,7 +88,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); assertFalse("binaryDictionary must be invalid after closing.", binaryDictionary.isValidDictionary()); - dictFile.delete(); + FileUtils.deleteRecursively(dictFile); binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); @@ -86,10 +97,104 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); } + public void testConstructingDictionaryOnMemory() { + testConstructingDictionaryOnMemory(FormatSpec.VERSION4); + } + + private void testConstructingDictionaryOnMemory(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + FileUtils.deleteRecursively(dictFile); + assertFalse(dictFile.exists()); + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, + new HashMap<String, String>()); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + final int probability = 100; + addUnigramWord(binaryDictionary, "word", probability); + assertEquals(probability, binaryDictionary.getFrequency("word")); + assertFalse(dictFile.exists()); + binaryDictionary.flush(); + assertTrue(dictFile.exists()); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + assertEquals(probability, binaryDictionary.getFrequency("word")); + binaryDictionary.close(); + dictFile.delete(); + } + + public void testAddTooLongWord() { + testAddTooLongWord(FormatSpec.VERSION4); + } + + private void testAddTooLongWord(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final StringBuffer stringBuilder = new StringBuffer(); + for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) { + stringBuilder.append('a'); + } + final String validLongWord = stringBuilder.toString(); + stringBuilder.append('a'); + final String invalidLongWord = stringBuilder.toString(); + final int probability = 100; + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, validLongWord, probability); + addUnigramWord(binaryDictionary, invalidLongWord, probability); + // Too long short cut. + binaryDictionary.addUnigramWord("a", probability, invalidLongWord, + 10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + addUnigramWord(binaryDictionary, "abc", probability); + final int updatedProbability = 200; + // Update. + addUnigramWord(binaryDictionary, validLongWord, updatedProbability); + addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); + addUnigramWord(binaryDictionary, "abc", updatedProbability); + + assertEquals(probability, binaryDictionary.getFrequency("aaa")); + assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); + assertEquals(BinaryDictionary.NOT_A_PROBABILITY, + binaryDictionary.getFrequency(invalidLongWord)); + assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); + dictFile.delete(); + } + + private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, + final int probability) { + binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */, + BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + false /* isNotAWord */, false /* isBlacklisted */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + } + + private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, + final String word1, final int probability) { + binaryDictionary.addBigramWords(word0, word1, probability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + } + public void testAddUnigramWord() { + testAddUnigramWord(FormatSpec.VERSION4); + } + + private void testAddUnigramWord(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -98,21 +203,21 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int probability = 100; - binaryDictionary.addUnigramWord("aaa", probability); + addUnigramWord(binaryDictionary, "aaa", probability); // Reallocate and create. - binaryDictionary.addUnigramWord("aab", probability); + addUnigramWord(binaryDictionary, "aab", probability); // Insert into children. - binaryDictionary.addUnigramWord("aac", probability); + addUnigramWord(binaryDictionary, "aac", probability); // Make terminal. - binaryDictionary.addUnigramWord("aa", probability); + addUnigramWord(binaryDictionary, "aa", probability); // Create children. - binaryDictionary.addUnigramWord("aaaa", probability); + addUnigramWord(binaryDictionary, "aaaa", probability); // Reallocate and make termianl. - binaryDictionary.addUnigramWord("a", probability); + addUnigramWord(binaryDictionary, "a", probability); final int updatedProbability = 200; // Update. - binaryDictionary.addUnigramWord("aaa", updatedProbability); + addUnigramWord(binaryDictionary, "aaa", updatedProbability); assertEquals(probability, binaryDictionary.getFrequency("aab")); assertEquals(probability, binaryDictionary.getFrequency("aac")); @@ -125,13 +230,17 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomlyAddUnigramWord() { + testRandomlyAddUnigramWord(FormatSpec.VERSION4); + } + + private void testRandomlyAddUnigramWord(final int formatVersion) { final int wordCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -148,7 +257,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { probabilityMap.put(word, random.nextInt(0xFF)); } for (String word : probabilityMap.keySet()) { - binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); + addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); } for (String word : probabilityMap.keySet()) { assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); @@ -157,9 +266,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { + testAddBigramWords(FormatSpec.VERSION4); + } + + private void testAddBigramWords(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -170,13 +283,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int bigramProbability = 10; final int updatedBigramProbability = 15; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); final int probability = binaryDictionary.calculateProbability(unigramProbability, bigramProbability); @@ -189,7 +302,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); - binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, updatedBigramProbability); assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); @@ -205,22 +318,26 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.getBigramProbability("aaa", "aaa")); // Testing bigram link. - binaryDictionary.addUnigramWord("abcde", unigramProbability); - binaryDictionary.addUnigramWord("fghij", unigramProbability); - binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); - binaryDictionary.addUnigramWord("fgh", unigramProbability); - binaryDictionary.addUnigramWord("abc", unigramProbability); - binaryDictionary.addUnigramWord("f", unigramProbability); + addUnigramWord(binaryDictionary, "abcde", unigramProbability); + addUnigramWord(binaryDictionary, "fghij", unigramProbability); + addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); + addUnigramWord(binaryDictionary, "fgh", unigramProbability); + addUnigramWord(binaryDictionary, "abc", unigramProbability); + addUnigramWord(binaryDictionary, "f", unigramProbability); assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getBigramProbability("abcde", "fgh")); - binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); + addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); dictFile.delete(); } public void testRandomlyAddBigramWords() { + testRandomlyAddBigramWords(FormatSpec.VERSION4); + } + + private void testRandomlyAddBigramWords(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; @@ -229,7 +346,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -249,7 +366,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int i = 0; i < bigramCount; i++) { @@ -262,7 +379,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { bigramWords.add(bigram); final int bigramProbability = random.nextInt(0xF); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } for (final Pair<String, String> bigram : bigramWords) { @@ -278,9 +395,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRemoveBigramWords() { + testRemoveBigramWords(FormatSpec.VERSION4); + } + + private void testRemoveBigramWords(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -289,13 +410,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; final int bigramProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); @@ -304,7 +425,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.removeBigramWords("aaa", "abb"); assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); @@ -324,9 +445,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushDictionary() { + testFlushDictionary(FormatSpec.VERSION4); + } + + private void testFlushDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -335,8 +460,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int probability = 100; - binaryDictionary.addUnigramWord("aaa", probability); - binaryDictionary.addUnigramWord("abcd", probability); + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, "abcd", probability); // Close without flushing. binaryDictionary.close(); @@ -347,8 +472,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); - binaryDictionary.addUnigramWord("aaa", probability); - binaryDictionary.addUnigramWord("abcd", probability); + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, "abcd", probability); binaryDictionary.flush(); binaryDictionary.close(); @@ -358,7 +483,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("aaa")); assertEquals(probability, binaryDictionary.getFrequency("abcd")); - binaryDictionary.addUnigramWord("bcde", probability); + addUnigramWord(binaryDictionary, "bcde", probability); binaryDictionary.flush(); binaryDictionary.close(); @@ -372,9 +497,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushWithGCDictionary() { + testFlushWithGCDictionary(FormatSpec.VERSION4); + } + + private void testFlushWithGCDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -384,13 +513,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int bigramProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -415,8 +544,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } - // TODO: Evaluate performance of GC public void testAddBigramWordsAndFlashWithGC() { + testAddBigramWordsAndFlashWithGC(FormatSpec.VERSION4); + } + + // TODO: Evaluate performance of GC + private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 30; @@ -425,7 +558,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -446,7 +579,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int i = 0; i < bigramCount; i++) { @@ -459,7 +592,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { bigramWords.add(bigram); final int bigramProbability = random.nextInt(0xF); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } binaryDictionary.flushWithGC(); @@ -480,7 +613,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } - public void testRandomOperetionsAndFlashWithGC() { + public void testRandomOperationsAndFlashWithGC() { + testRandomOperationsAndFlashWithGC(FormatSpec.VERSION4); + } + + private void testRandomOperationsAndFlashWithGC(final int formatVersion) { final int flashWithGCIterationCount = 50; final int operationCountInEachIteration = 200; final int initialUnigramCount = 100; @@ -494,7 +631,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -513,7 +650,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -529,7 +666,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } // Add bigram. if (random.nextFloat() < addBigramProb && words.size() > 2) { @@ -547,7 +684,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final Pair<String, String> bigram = new Pair<String, String>(word0, word1); bigramWords.add(bigram); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } // Remove bigram. if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { @@ -588,6 +725,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddManyUnigramsAndFlushWithGC() { + testAddManyUnigramsAndFlushWithGC(FormatSpec.VERSION4); + } + + private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { final int flashWithGCIterationCount = 3; final int codePointSetSize = 50; @@ -596,7 +737,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -615,7 +756,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int j = 0; j < words.size(); j++) { @@ -632,6 +773,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testUnigramAndBigramCount() { + testUnigramAndBigramCount(FormatSpec.VERSION4); + } + + private void testUnigramAndBigramCount(final int formatVersion) { final int flashWithGCIterationCount = 10; final int codePointSetSize = 50; final int unigramCountPerIteration = 1000; @@ -641,7 +786,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -659,7 +804,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int j = 0; j < bigramCountPerIteration; j++) { final String word0 = words.get(random.nextInt(words.size())); @@ -669,20 +814,413 @@ public class BinaryDictionaryTests extends AndroidTestCase { } bigrams.add(new Pair<String, String>(word0, word1)); final int bigramProbability = random.nextInt(0xF); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } assertEquals(new HashSet<String>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.flushWithGC(); assertEquals(new HashSet<String>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.close(); } dictFile.delete(); } + + public void testAddMultipleDictionaryEntries() { + testAddMultipleDictionaryEntries(FormatSpec.VERSION4); + } + + private void testAddMultipleDictionaryEntries(final int formatVersion) { + final int codePointSetSize = 20; + final int lmParamCount = 1000; + final double bigramContinueRate = 0.9; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = + new HashMap<Pair<String, String>, Integer>(); + + final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; + String prevWord = null; + for (int i = 0; i < languageModelParams.length; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int probability = random.nextInt(0xFF); + final int bigramProbability = random.nextInt(0xF); + unigramProbabilities.put(word, probability); + if (prevWord == null) { + languageModelParams[i] = new LanguageModelParam(word, probability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + } else { + languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, + bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + bigramProbabilities.put(new Pair<String, String>(prevWord, word), + bigramProbability); + } + prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; + } + + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary.addMultipleDictionaryEntries(languageModelParams); + + for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { + assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); + } + + for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { + final String word0 = entry.getKey().first; + final String word1 = entry.getKey().second; + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = entry.getValue(); + final int probability = binaryDictionary.calculateProbability( + unigramProbability, bigramProbability); + assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1)); + } + } + + public void testGetWordProperties() { + testGetWordProperties(FormatSpec.VERSION4); + } + + private void testGetWordProperties(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = + new HashMap<Pair<String, String>, Integer>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + final boolean isNotAWord = random.nextBoolean(); + final boolean isBlacklisted = random.nextBoolean(); + // TODO: Add tests for historical info. + binaryDictionary.addUnigramWord(word, unigramProbability, + null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, + isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilities.put(word, unigramProbability); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + assertEquals(word, wordProperty.mWord); + assertTrue(wordProperty.isValid()); + assertEquals(isNotAWord, wordProperty.mIsNotAWord); + assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); + assertEquals(false, wordProperty.mHasBigrams); + assertEquals(false, wordProperty.mHasShortcuts); + assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); + assertTrue(wordProperty.mShortcutTargets.isEmpty()); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilities.size()); + final int word1Index = random.nextInt(wordProbabilities.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<String>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability); + } + + for (int i = 0; i < words.size(); i++) { + final String word0 = words.get(i); + if (!bigrams.containsKey(word0)) { + continue; + } + final HashSet<String> bigramWord1s = bigrams.get(word0); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word0); + assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); + for (int j = 0; j < wordProperty.mBigrams.size(); j++) { + final String word1 = wordProperty.mBigrams.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final int bigramProbabilityDelta = bigramProbabilities.get( + new Pair<String, String>(word0, word1)); + final int unigramProbability = wordProbabilities.get(word1); + final int bigramProbablity = binaryDictionary.calculateProbability( + unigramProbability, bigramProbabilityDelta); + assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity); + } + } + } + + public void testIterateAllWords() { + testIterateAllWords(FormatSpec.VERSION4); + } + + private void testIterateAllWords(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> wordProbabilitiesToCheckLater = + new HashMap<String, Integer>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = + new HashMap<Pair<String, String>, Integer>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilitiesToCheckLater.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<String>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilitiesToCheckLater.put( + new Pair<String, String>(word0, word1), bigramProbability); + } + + final HashSet<String> wordSet = new HashSet<String>(words); + final HashSet<Pair<String, String>> bigramSet = + new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet()); + int token = 0; + do { + final BinaryDictionary.GetNextWordPropertyResult result = + binaryDictionary.getNextWordProperty(token); + final WordProperty wordProperty = result.mWordProperty; + final String word0 = wordProperty.mWord; + assertEquals((int)wordProbabilitiesToCheckLater.get(word0), + wordProperty.mProbabilityInfo.mProbability); + wordSet.remove(word0); + final HashSet<String> bigramWord1s = bigrams.get(word0); + for (int j = 0; j < wordProperty.mBigrams.size(); j++) { + final String word1 = wordProperty.mBigrams.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); + final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram); + final int bigramProbablity = binaryDictionary.calculateProbability( + unigramProbability, bigramProbabilityDelta); + assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity); + bigramSet.remove(bigram); + } + token = result.mNextToken; + } while (token != 0); + assertTrue(wordSet.isEmpty()); + assertTrue(bigramSet.isEmpty()); + } + + public void testAddShortcuts() { + testAddShortcuts(FormatSpec.VERSION4); + } + + private void testAddShortcuts(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int shortcutProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", + shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); + final int updatedShortcutProbability = 2; + binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", + updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(updatedShortcutProbability, + wordProperty.mShortcutTargets.get(0).getProbability()); + binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy", + shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>(); + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); + shortcutTargets.remove(shortcutTarget.mWord); + } + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + binaryDictionary.flushWithGC(); + wordProperty = binaryDictionary.getWordProperty("aaa"); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); + shortcutTargets.remove(shortcutTarget.mWord); + } + } + + public void testAddManyShortcuts() { + testAddManyShortcuts(FormatSpec.VERSION4); + } + + private void testAddManyShortcuts(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int SHORTCUT_COUNT = 10000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final HashMap<String, HashMap<String, Integer>> shortcutTargets = + new HashMap<String, HashMap<String, Integer>>(); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + words.add(word); + unigramProbabilities.put(word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + for (int i = 0; i < SHORTCUT_COUNT; i++) { + final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); + final int shortcutProbability = random.nextInt(0xF); + final String word = words.get(random.nextInt(words.size())); + final int unigramProbability = unigramProbabilities.get(word); + binaryDictionary.addUnigramWord(word, unigramProbability, shortcutTarget, + shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + if (shortcutTargets.containsKey(word)) { + final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + } else { + final HashMap<String, Integer> shortcutTargetsOfWord = + new HashMap<String, Integer>(); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + shortcutTargets.put(word, shortcutTargetsOfWord); + } + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + + for (final String word : words) { + final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + assertEquals((int)unigramProbabilities.get(word), + wordProperty.mProbabilityInfo.mProbability); + if (!shortcutTargets.containsKey(word)) { + // The word does not have shortcut targets. + continue; + } + assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); + for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + final String targetCodePonts = shortcutTarget.mWord; + assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), + shortcutTarget.getProbability()); + } + } + } } |