diff options
author | 2013-12-13 17:09:16 +0900 | |
---|---|---|
committer | 2013-12-13 17:13:32 +0900 | |
commit | 2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc (patch) | |
tree | 56a5652edf71dd19d04161f72e3e013608cc2a9c /tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | |
parent | 18d033405c18a8dc28f60ca22d1d0df23a679384 (diff) | |
download | latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.tar.gz latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.tar.xz latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.zip |
Reset to 9bd6dac4708ad94fd0257c53e977df62b152e20c
The bulk merge from -bayo to klp-dev should not have been merged to master.
Change-Id: I527a03a76f5247e4939a672f27c314dc11cbb854
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 462 |
1 files changed, 390 insertions, 72 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 5b8f0e977..cfc4c762a 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -21,8 +21,12 @@ import android.test.suitebuilder.annotation.LargeTest; import android.text.TextUtils; import android.util.Pair; +import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.FileUtils; +import com.android.inputmethod.latin.utils.UnigramProperty; import java.io.File; import java.io.IOException; @@ -33,6 +37,7 @@ import java.util.Locale; import java.util.Map; import java.util.Random; +// TODO Use the seed passed as an argument for makedict test. @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; @@ -48,24 +53,41 @@ public class BinaryDictionaryTests extends AndroidTestCase { super.tearDown(); } - private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { - final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, + private File createEmptyDictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { + if (formatVersion == FormatSpec.VERSION4) { + return createEmptyVer4DictionaryAndGetFile(dictId); + } else { + throw new IOException("Dictionary format version " + formatVersion + + " is not supported."); + } + } + + private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException { + final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); + file.delete(); + file.mkdir(); Map<String, String> attributeMap = new HashMap<String, String>(); attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), - 3 /* dictVersion */, attributeMap)) { + FormatSpec.VERSION4, attributeMap)) { return file; } else { - throw new IOException("Empty dictionary cannot be created."); + throw new IOException("Empty dictionary " + file.getAbsolutePath() + + " cannot be created."); } } public void testIsValidDictionary() { + testIsValidDictionary(FormatSpec.VERSION4); + } + + private void testIsValidDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -77,7 +99,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); assertFalse("binaryDictionary must be invalid after closing.", binaryDictionary.isValidDictionary()); - dictFile.delete(); + FileUtils.deleteRecursively(dictFile); binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); @@ -86,10 +108,28 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); } + private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, + final int probability) { + binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */, + BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + false /* isNotAWord */, false /* isBlacklisted */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + } + + private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, + final String word1, final int probability) { + binaryDictionary.addBigramWords(word0, word1, probability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + } + public void testAddUnigramWord() { + testAddUnigramWord(FormatSpec.VERSION4); + } + + private void testAddUnigramWord(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -98,21 +138,21 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int probability = 100; - binaryDictionary.addUnigramWord("aaa", probability); + addUnigramWord(binaryDictionary, "aaa", probability); // Reallocate and create. - binaryDictionary.addUnigramWord("aab", probability); + addUnigramWord(binaryDictionary, "aab", probability); // Insert into children. - binaryDictionary.addUnigramWord("aac", probability); + addUnigramWord(binaryDictionary, "aac", probability); // Make terminal. - binaryDictionary.addUnigramWord("aa", probability); + addUnigramWord(binaryDictionary, "aa", probability); // Create children. - binaryDictionary.addUnigramWord("aaaa", probability); + addUnigramWord(binaryDictionary, "aaaa", probability); // Reallocate and make termianl. - binaryDictionary.addUnigramWord("a", probability); + addUnigramWord(binaryDictionary, "a", probability); final int updatedProbability = 200; // Update. - binaryDictionary.addUnigramWord("aaa", updatedProbability); + addUnigramWord(binaryDictionary, "aaa", updatedProbability); assertEquals(probability, binaryDictionary.getFrequency("aab")); assertEquals(probability, binaryDictionary.getFrequency("aac")); @@ -125,13 +165,17 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomlyAddUnigramWord() { + testRandomlyAddUnigramWord(FormatSpec.VERSION4); + } + + private void testRandomlyAddUnigramWord(final int formatVersion) { final int wordCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -148,7 +192,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { probabilityMap.put(word, random.nextInt(0xFF)); } for (String word : probabilityMap.keySet()) { - binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); + addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); } for (String word : probabilityMap.keySet()) { assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); @@ -157,9 +201,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { + testAddBigramWords(FormatSpec.VERSION4); + } + + private void testAddBigramWords(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -170,13 +218,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int bigramProbability = 10; final int updatedBigramProbability = 15; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); final int probability = binaryDictionary.calculateProbability(unigramProbability, bigramProbability); @@ -189,7 +237,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); - binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, updatedBigramProbability); assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); @@ -205,22 +253,26 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.getBigramProbability("aaa", "aaa")); // Testing bigram link. - binaryDictionary.addUnigramWord("abcde", unigramProbability); - binaryDictionary.addUnigramWord("fghij", unigramProbability); - binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); - binaryDictionary.addUnigramWord("fgh", unigramProbability); - binaryDictionary.addUnigramWord("abc", unigramProbability); - binaryDictionary.addUnigramWord("f", unigramProbability); + addUnigramWord(binaryDictionary, "abcde", unigramProbability); + addUnigramWord(binaryDictionary, "fghij", unigramProbability); + addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); + addUnigramWord(binaryDictionary, "fgh", unigramProbability); + addUnigramWord(binaryDictionary, "abc", unigramProbability); + addUnigramWord(binaryDictionary, "f", unigramProbability); assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getBigramProbability("abcde", "fgh")); - binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); + addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); dictFile.delete(); } public void testRandomlyAddBigramWords() { + testRandomlyAddBigramWords(FormatSpec.VERSION4); + } + + private void testRandomlyAddBigramWords(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; @@ -229,7 +281,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -249,7 +301,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int i = 0; i < bigramCount; i++) { @@ -262,7 +314,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { bigramWords.add(bigram); final int bigramProbability = random.nextInt(0xF); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } for (final Pair<String, String> bigram : bigramWords) { @@ -278,9 +330,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRemoveBigramWords() { + testRemoveBigramWords(FormatSpec.VERSION4); + } + + private void testRemoveBigramWords(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -289,13 +345,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; final int bigramProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); @@ -304,7 +360,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.removeBigramWords("aaa", "abb"); assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); @@ -324,9 +380,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushDictionary() { + testFlushDictionary(FormatSpec.VERSION4); + } + + private void testFlushDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -335,8 +395,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int probability = 100; - binaryDictionary.addUnigramWord("aaa", probability); - binaryDictionary.addUnigramWord("abcd", probability); + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, "abcd", probability); // Close without flushing. binaryDictionary.close(); @@ -347,8 +407,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); - binaryDictionary.addUnigramWord("aaa", probability); - binaryDictionary.addUnigramWord("abcd", probability); + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, "abcd", probability); binaryDictionary.flush(); binaryDictionary.close(); @@ -358,7 +418,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("aaa")); assertEquals(probability, binaryDictionary.getFrequency("abcd")); - binaryDictionary.addUnigramWord("bcde", probability); + addUnigramWord(binaryDictionary, "bcde", probability); binaryDictionary.flush(); binaryDictionary.close(); @@ -372,9 +432,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushWithGCDictionary() { + testFlushWithGCDictionary(FormatSpec.VERSION4); + } + + private void testFlushWithGCDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -384,13 +448,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int bigramProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -415,8 +479,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } - // TODO: Evaluate performance of GC public void testAddBigramWordsAndFlashWithGC() { + testAddBigramWordsAndFlashWithGC(FormatSpec.VERSION4); + } + + // TODO: Evaluate performance of GC + private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 30; @@ -425,7 +493,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -446,7 +514,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int i = 0; i < bigramCount; i++) { @@ -459,7 +527,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { bigramWords.add(bigram); final int bigramProbability = random.nextInt(0xF); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } binaryDictionary.flushWithGC(); @@ -480,7 +548,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } - public void testRandomOperetionsAndFlashWithGC() { + public void testRandomOperationsAndFlashWithGC() { + testRandomOperationsAndFlashWithGC(FormatSpec.VERSION4); + } + + private void testRandomOperationsAndFlashWithGC(final int formatVersion) { final int flashWithGCIterationCount = 50; final int operationCountInEachIteration = 200; final int initialUnigramCount = 100; @@ -494,7 +566,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -513,7 +585,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -529,7 +601,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } // Add bigram. if (random.nextFloat() < addBigramProb && words.size() > 2) { @@ -547,7 +619,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final Pair<String, String> bigram = new Pair<String, String>(word0, word1); bigramWords.add(bigram); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } // Remove bigram. if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { @@ -588,6 +660,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddManyUnigramsAndFlushWithGC() { + testAddManyUnigramsAndFlushWithGC(FormatSpec.VERSION4); + } + + private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { final int flashWithGCIterationCount = 3; final int codePointSetSize = 50; @@ -596,7 +672,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -615,7 +691,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int j = 0; j < words.size(); j++) { @@ -632,6 +708,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testUnigramAndBigramCount() { + testUnigramAndBigramCount(FormatSpec.VERSION4); + } + + private void testUnigramAndBigramCount(final int formatVersion) { final int flashWithGCIterationCount = 10; final int codePointSetSize = 50; final int unigramCountPerIteration = 1000; @@ -641,7 +721,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -659,7 +739,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int j = 0; j < bigramCountPerIteration; j++) { final String word0 = words.get(random.nextInt(words.size())); @@ -669,7 +749,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } bigrams.add(new Pair<String, String>(word0, word1)); final int bigramProbability = random.nextInt(0xF); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } assertEquals(new HashSet<String>(words).size(), Integer.parseInt( binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); @@ -685,4 +765,242 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + public void testAddMultipleDictionaryEntries() { + testAddMultipleDictionaryEntries(FormatSpec.VERSION4); + } + + private void testAddMultipleDictionaryEntries(final int formatVersion) { + final int codePointSetSize = 20; + final int lmParamCount = 1000; + final double bigramContinueRate = 0.9; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = + new HashMap<Pair<String, String>, Integer>(); + + final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; + String prevWord = null; + for (int i = 0; i < languageModelParams.length; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int probability = random.nextInt(0xFF); + final int bigramProbability = random.nextInt(0xF); + unigramProbabilities.put(word, probability); + if (prevWord == null) { + languageModelParams[i] = new LanguageModelParam(word, probability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + } else { + languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, + bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + bigramProbabilities.put(new Pair<String, String>(prevWord, word), + bigramProbability); + } + prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; + } + + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary.addMultipleDictionaryEntries(languageModelParams); + + for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { + assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); + } + + for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { + final String word0 = entry.getKey().first; + final String word1 = entry.getKey().second; + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = entry.getValue(); + final int probability = binaryDictionary.calculateProbability( + unigramProbability, bigramProbability); + assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1)); + } + } + + public void testGetUnigramProperties() { + testGetUnigramProperties(FormatSpec.VERSION4); + } + + private void testGetUnigramProperties(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int ITERATION_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final UnigramProperty invalidUnigramProperty = + binaryDictionary.getUnigramProperty("dummyWord"); + assertFalse(invalidUnigramProperty.isValid()); + + for (int i = 0; i < ITERATION_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + final boolean isNotAWord = random.nextBoolean(); + final boolean isBlacklisted = random.nextBoolean(); + // TODO: Add tests for historical info. + binaryDictionary.addUnigramWord(word, unigramProbability, + null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, + isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + final UnigramProperty unigramProperty = + binaryDictionary.getUnigramProperty(word); + assertEquals(word, unigramProperty.mCodePoints); + assertTrue(unigramProperty.isValid()); + assertEquals(isNotAWord, unigramProperty.mIsNotAWord); + assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted); + assertEquals(false, unigramProperty.mHasBigrams); + assertEquals(false, unigramProperty.mHasShortcuts); + assertEquals(unigramProbability, unigramProperty.mProbability); + assertTrue(unigramProperty.mShortcutTargets.isEmpty()); + } + } + + public void testAddShortcuts() { + testAddShortcuts(FormatSpec.VERSION4); + } + + private void testAddShortcuts(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int shortcutProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", + shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + UnigramProperty unigramProperty = binaryDictionary.getUnigramProperty("aaa"); + assertEquals(1, unigramProperty.mShortcutTargets.size()); + assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord); + assertEquals(shortcutProbability, unigramProperty.mShortcutTargets.get(0).mFrequency); + final int updatedShortcutProbability = 2; + binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", + updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + unigramProperty = binaryDictionary.getUnigramProperty("aaa"); + assertEquals(1, unigramProperty.mShortcutTargets.size()); + assertEquals("zzz", unigramProperty.mShortcutTargets.get(0).mWord); + assertEquals(updatedShortcutProbability, + unigramProperty.mShortcutTargets.get(0).mFrequency); + binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy", + shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>(); + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + unigramProperty = binaryDictionary.getUnigramProperty("aaa"); + assertEquals(2, unigramProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency); + shortcutTargets.remove(shortcutTarget.mWord); + } + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + binaryDictionary.flushWithGC(); + unigramProperty = binaryDictionary.getUnigramProperty("aaa"); + assertEquals(2, unigramProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : unigramProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency); + shortcutTargets.remove(shortcutTarget.mWord); + } + } + + public void testAddManyShortcuts() { + testAddManyShortcuts(FormatSpec.VERSION4); + } + + private void testAddManyShortcuts(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int SHORTCUT_COUNT = 10000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final HashMap<String, HashMap<String, Integer>> shortcutTargets = + new HashMap<String, HashMap<String, Integer>>(); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + words.add(word); + unigramProbabilities.put(word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + for (int i = 0; i < SHORTCUT_COUNT; i++) { + final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); + final int shortcutProbability = random.nextInt(0xF); + final String word = words.get(random.nextInt(words.size())); + final int unigramProbability = unigramProbabilities.get(word); + binaryDictionary.addUnigramWord(word, unigramProbability, shortcutTarget, + shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, + 0 /* timestamp */); + if (shortcutTargets.containsKey(word)) { + final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + } else { + final HashMap<String, Integer> shortcutTargetsOfWord = + new HashMap<String, Integer>(); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + shortcutTargets.put(word, shortcutTargetsOfWord); + } + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + + for (final String word : words) { + final UnigramProperty unigramProperty = binaryDictionary.getUnigramProperty(word); + assertEquals((int)unigramProbabilities.get(word), unigramProperty.mProbability); + assertEquals(shortcutTargets.get(word).size(), unigramProperty.mShortcutTargets.size()); + for (final WeightedString shortcutTarget : unigramProperty.mShortcutTargets) { + final String targetCodePonts = shortcutTarget.mWord; + assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), + shortcutTarget.mFrequency); + } + } + } } |