diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 666 |
1 files changed, 470 insertions, 196 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 0fb0fa587..160b08c4f 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -43,34 +43,49 @@ import java.util.Random; public class BinaryDictionaryTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; + private static final int[] DICT_FORMAT_VERSIONS = + new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; + + private static boolean canCheckBigramProbability(final int formatVersion) { + return formatVersion > FormatSpec.VERSION401; + } + + private static boolean supportsBeginningOfSentence(final int formatVersion) { + return formatVersion > FormatSpec.VERSION401; + } private File createEmptyDictionaryAndGetFile(final String dictId, final int formatVersion) throws IOException { - if (formatVersion == FormatSpec.VERSION4) { - return createEmptyVer4DictionaryAndGetFile(dictId); + if (formatVersion == FormatSpec.VERSION4 + || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING + || formatVersion == FormatSpec.VERSION4_DEV) { + return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); } else { throw new IOException("Dictionary format version " + formatVersion + " is not supported."); } } - private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException { + private File createEmptyVer4DictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); file.delete(); file.mkdir(); - Map<String, String> attributeMap = new HashMap<String, String>(); - if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4, + Map<String, String> attributeMap = new HashMap<>(); + if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, Locale.ENGLISH, attributeMap)) { return file; } else { throw new IOException("Empty dictionary " + file.getAbsolutePath() - + " cannot be created."); + + " cannot be created. Format version: " + formatVersion); } } public void testIsValidDictionary() { - testIsValidDictionary(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testIsValidDictionary(formatVersion); + } } private void testIsValidDictionary(final int formatVersion) { @@ -98,7 +113,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testConstructingDictionaryOnMemory() { - testConstructingDictionaryOnMemory(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testConstructingDictionaryOnMemory(formatVersion); + } } private void testConstructingDictionaryOnMemory(final int formatVersion) { @@ -129,7 +146,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddTooLongWord() { - testAddTooLongWord(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddTooLongWord(formatVersion); + } } private void testAddTooLongWord(final int formatVersion) { @@ -155,8 +174,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, validLongWord, probability); addUnigramWord(binaryDictionary, invalidLongWord, probability); // Too long short cut. - binaryDictionary.addUnigramWord("a", probability, invalidLongWord, - 10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */, + binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, + 10 /* shortcutProbability */, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); addUnigramWord(binaryDictionary, "abc", probability); final int updatedProbability = 200; @@ -173,22 +193,39 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } - private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, + private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability) { - binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */, + binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, - false /* isNotAWord */, false /* isBlacklisted */, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } - private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, + private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, final String word1, final int probability) { - binaryDictionary.addBigramWords(word0, word1, probability, + binaryDictionary.addNgramEntry(new PrevWordsInfo(word0), word1, probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } + private static boolean isValidBigram(final BinaryDictionary binaryDictionary, + final String word0, final String word1) { + return binaryDictionary.isValidNgram(new PrevWordsInfo(word0), word1); + } + + private static void removeBigramEntry(final BinaryDictionary binaryDictionary, + final String word0, final String word1) { + binaryDictionary.removeNgramEntry(new PrevWordsInfo(word0), word1); + } + + private static int getBigramProbability(final BinaryDictionary binaryDictionary, + final String word0, final String word1) { + return binaryDictionary.getNgramProbability(new PrevWordsInfo(word0), word1); + } + public void testAddUnigramWord() { - testAddUnigramWord(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddUnigramWord(formatVersion); + } } private void testAddUnigramWord(final int formatVersion) { @@ -230,7 +267,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomlyAddUnigramWord() { - testRandomlyAddUnigramWord(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRandomlyAddUnigramWord(formatVersion); + } } private void testRandomlyAddUnigramWord(final int formatVersion) { @@ -248,7 +287,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>(); + final HashMap<String, Integer> probabilityMap = new HashMap<>(); // Test a word that isn't contained within the dictionary. final Random random = new Random(seed); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -266,7 +305,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { - testAddBigramWords(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddBigramWords(formatVersion); + } } private void testAddBigramWords(final int formatVersion) { @@ -281,8 +322,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; - final int bigramProbability = 10; - final int updatedBigramProbability = 15; + final int bigramProbability = 150; + final int updatedBigramProbability = 200; addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability); @@ -291,31 +332,32 @@ public class BinaryDictionaryTests extends AndroidTestCase { addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); + assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); + assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); + } addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); - final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, - updatedBigramProbability); - assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(updatedBigramProbability, + getBigramProbability(binaryDictionary, "aaa", "abb")); + } - assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); - assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); + assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); + assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("bcc", "aaa")); + getBigramProbability(binaryDictionary, "bcc", "aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("bcc", "bbc")); + getBigramProbability(binaryDictionary, "bcc", "bbc")); assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("aaa", "aaa")); + getBigramProbability(binaryDictionary, "aaa", "aaa")); // Testing bigram link. addUnigramWord(binaryDictionary, "abcde", unigramProbability); @@ -324,17 +366,26 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, "fgh", unigramProbability); addUnigramWord(binaryDictionary, "abc", unigramProbability); addUnigramWord(binaryDictionary, "f", unigramProbability); - assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); + + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, "abcde", "fghij")); + } assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("abcde", "fgh")); + getBigramProbability(binaryDictionary, "abcde", "fgh")); addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); - assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(updatedBigramProbability, + getBigramProbability(binaryDictionary, "abcde", "fghij")); + } dictFile.delete(); } public void testRandomlyAddBigramWords() { - testRandomlyAddBigramWords(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRandomlyAddBigramWords(formatVersion); + } } private void testRandomlyAddBigramWords(final int formatVersion) { @@ -354,12 +405,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList<String> words = new ArrayList<String>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); @@ -375,27 +425,32 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final Pair<String, String> bigram = new Pair<>(word0, word1); bigramWords.add(bigram); - final int bigramProbability = random.nextInt(0xF); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); bigramProbabilities.put(bigram, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability); } for (final Pair<String, String> bigram : bigramWords) { - final int unigramProbability = unigramProbabilities.get(bigram.second); final int bigramProbability = bigramProbabilities.get(bigram); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); - assertEquals(probability, - binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, bigram.first, bigram.second)); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } } dictFile.delete(); } public void testRemoveBigramWords() { - testRemoveBigramWords(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRemoveBigramWords(formatVersion); + } } private void testRemoveBigramWords(final int formatVersion) { @@ -409,7 +464,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; - final int bigramProbability = 10; + final int bigramProbability = 150; addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability); @@ -418,34 +473,36 @@ public class BinaryDictionaryTests extends AndroidTestCase { addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); + assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); + assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); - binaryDictionary.removeBigramWords("aaa", "abb"); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); + removeBigramEntry(binaryDictionary, "aaa", "abb"); + assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); - binaryDictionary.removeBigramWords("aaa", "bcc"); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); - binaryDictionary.removeBigramWords("abb", "aaa"); - assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); - binaryDictionary.removeBigramWords("abb", "bcc"); - assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); + removeBigramEntry(binaryDictionary, "aaa", "bcc"); + assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc")); + removeBigramEntry(binaryDictionary, "abb", "aaa"); + assertFalse(isValidBigram(binaryDictionary, "abb", "aaa")); + removeBigramEntry(binaryDictionary, "abb", "bcc"); + assertFalse(isValidBigram(binaryDictionary, "abb", "bcc")); - binaryDictionary.removeBigramWords("aaa", "abb"); + removeBigramEntry(binaryDictionary, "aaa", "abb"); // Test remove non-existing bigram operation. - binaryDictionary.removeBigramWords("aaa", "abb"); - binaryDictionary.removeBigramWords("bcc", "aaa"); + removeBigramEntry(binaryDictionary, "aaa", "abb"); + removeBigramEntry(binaryDictionary, "bcc", "aaa"); dictFile.delete(); } public void testFlushDictionary() { - testFlushDictionary(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testFlushDictionary(formatVersion); + } } private void testFlushDictionary(final int formatVersion) { @@ -497,7 +554,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushWithGCDictionary() { - testFlushWithGCDictionary(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testFlushWithGCDictionary(formatVersion); + } } private void testFlushWithGCDictionary(final int formatVersion) { @@ -512,7 +571,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; - final int bigramProbability = 10; + final int bigramProbability = 150; addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability); @@ -526,18 +585,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); - assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); - assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); + } + assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); + assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); + assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -545,7 +604,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWordsAndFlashWithGC() { - testAddBigramWordsAndFlashWithGC(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddBigramWordsAndFlashWithGC(formatVersion); + } } // TODO: Evaluate performance of GC @@ -567,12 +628,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList<String> words = new ArrayList<String>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); @@ -588,9 +648,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final Pair<String, String> bigram = new Pair<>(word0, word1); bigramWords.add(bigram); - final int bigramProbability = random.nextInt(0xF); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); bigramProbabilities.put(bigram, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability); } @@ -601,20 +663,24 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + for (final Pair<String, String> bigram : bigramWords) { - final int unigramProbability = unigramProbabilities.get(bigram.second); final int bigramProbability = bigramProbabilities.get(bigram); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); - assertEquals(probability, - binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, bigram.first, bigram.second)); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } } dictFile.delete(); } public void testRandomOperationsAndFlashWithGC() { - testRandomOperationsAndFlashWithGC(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRandomOperationsAndFlashWithGC(formatVersion); + } } private void testRandomOperationsAndFlashWithGC(final int formatVersion) { @@ -639,12 +705,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList<String> words = new ArrayList<String>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < initialUnigramCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); @@ -680,8 +745,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - final int bigramProbability = random.nextInt(0xF); - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); + final Pair<String, String> bigram = new Pair<>(word0, word1); bigramWords.add(bigram); bigramProbabilities.put(bigram, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability); @@ -692,7 +759,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final Pair<String, String> bigram = bigramWords.get(bigramIndex); bigramWords.remove(bigramIndex); bigramProbabilities.remove(bigram); - binaryDictionary.removeBigramWords(bigram.first, bigram.second); + removeBigramEntry(binaryDictionary, bigram.first, bigram.second); } } @@ -705,17 +772,20 @@ public class BinaryDictionaryTests extends AndroidTestCase { // Test whether the all bigram operations are collectlly handled. for (int i = 0; i < bigramWords.size(); i++) { final Pair<String, String> bigram = bigramWords.get(i); - final int unigramProbability = unigramProbabilities.get(bigram.second); final int probability; if (bigramProbabilities.containsKey(bigram)) { final int bigramProbability = bigramProbabilities.get(bigram); - probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); + probability = bigramProbability; } else { probability = Dictionary.NOT_A_PROBABILITY; } - assertEquals(probability, - binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + + if (canCheckBigramProbability(formatVersion)) { + assertEquals(probability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } + assertEquals(probability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, bigram.first, bigram.second)); } binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -725,7 +795,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddManyUnigramsAndFlushWithGC() { - testAddManyUnigramsAndFlushWithGC(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddManyUnigramsAndFlushWithGC(formatVersion); + } } private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { @@ -742,8 +814,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { fail("IOException while writing an initial dictionary : " + e); } - final ArrayList<String> words = new ArrayList<String>(); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); BinaryDictionary binaryDictionary; @@ -773,7 +845,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testUnigramAndBigramCount() { - testUnigramAndBigramCount(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testUnigramAndBigramCount(formatVersion); + } } private void testUnigramAndBigramCount(final int formatVersion) { @@ -791,8 +865,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { fail("IOException while writing an initial dictionary : " + e); } - final ArrayList<String> words = new ArrayList<String>(); - final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>(); + final ArrayList<String> words = new ArrayList<>(); + final HashSet<Pair<String, String>> bigrams = new HashSet<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); BinaryDictionary binaryDictionary; @@ -812,18 +886,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - bigrams.add(new Pair<String, String>(word0, word1)); + bigrams.add(new Pair<>(word0, word1)); final int bigramProbability = random.nextInt(0xF); addBigramWords(binaryDictionary, word0, word1, bigramProbability); } - assertEquals(new HashSet<String>(words).size(), Integer.parseInt( + assertEquals(new HashSet<>(words).size(), Integer.parseInt( binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); - assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( + assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.flushWithGC(); - assertEquals(new HashSet<String>(words).size(), Integer.parseInt( + assertEquals(new HashSet<>(words).size(), Integer.parseInt( binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); - assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( + assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.close(); } @@ -832,7 +906,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddMultipleDictionaryEntries() { - testAddMultipleDictionaryEntries(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddMultipleDictionaryEntries(formatVersion); + } } private void testAddMultipleDictionaryEntries(final int formatVersion) { @@ -850,16 +926,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { } final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; String prevWord = null; for (int i = 0; i < languageModelParams.length; i++) { final String word = CodePointUtils.generateWord(random, codePointSet); final int probability = random.nextInt(0xFF); - final int bigramProbability = random.nextInt(0xF); + final int bigramProbability = probability + random.nextInt(0xFF - probability); unigramProbabilities.put(word, probability); if (prevWord == null) { languageModelParams[i] = new LanguageModelParam(word, probability, @@ -867,7 +942,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } else { languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - bigramProbabilities.put(new Pair<String, String>(prevWord, word), + bigramProbabilities.put(new Pair<>(prevWord, word), bigramProbability); } prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; @@ -885,16 +960,20 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { final String word0 = entry.getKey().first; final String word1 = entry.getKey().second; - final int unigramProbability = unigramProbabilities.get(word1); final int bigramProbability = entry.getValue(); - final int probability = binaryDictionary.calculateProbability( - unigramProbability, bigramProbability); - assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1)); + assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, word0, word1)); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, word0, word1)); + } } } public void testGetWordProperties() { - testGetWordProperties(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testGetWordProperties(formatVersion); + } } private void testGetWordProperties(final int formatVersion) { @@ -918,11 +997,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); assertFalse(invalidWordProperty.isValid()); - final ArrayList<String> words = new ArrayList<String>(); - final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>(); - final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> wordProbabilities = new HashMap<>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < UNIGRAM_COUNT; i++) { final String word = CodePointUtils.generateWord(random, codePointSet); @@ -930,9 +1008,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { final boolean isNotAWord = random.nextBoolean(); final boolean isBlacklisted = random.nextBoolean(); // TODO: Add tests for historical info. - binaryDictionary.addUnigramWord(word, unigramProbability, + binaryDictionary.addUnigramEntry(word, unigramProbability, null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, - isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { binaryDictionary.flushWithGC(); } @@ -957,18 +1036,19 @@ public class BinaryDictionaryTests extends AndroidTestCase { } final String word0 = words.get(word0Index); final String word1 = words.get(word1Index); - final int bigramProbability = random.nextInt(0xF); - binaryDictionary.addBigramWords(word0, word1, bigramProbability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP); + final int unigramProbability = wordProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { binaryDictionary.flushWithGC(); } if (!bigrams.containsKey(word0)) { - final HashSet<String> bigramWord1s = new HashSet<String>(); + final HashSet<String> bigramWord1s = new HashSet<>(); bigrams.put(word0, bigramWord1s); } bigrams.get(word0).add(word1); - bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability); + bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); } for (int i = 0; i < words.size(); i++) { @@ -982,18 +1062,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (int j = 0; j < wordProperty.mBigrams.size(); j++) { final String word1 = wordProperty.mBigrams.get(j).mWord; assertTrue(bigramWord1s.contains(word1)); - final int bigramProbabilityDelta = bigramProbabilities.get( - new Pair<String, String>(word0, word1)); - final int unigramProbability = wordProbabilities.get(word1); - final int bigramProbablity = binaryDictionary.calculateProbability( - unigramProbability, bigramProbabilityDelta); - assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity); + if (canCheckBigramProbability(formatVersion)) { + final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); + assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + } } } } public void testIterateAllWords() { - testIterateAllWords(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testIterateAllWords(formatVersion); + } } private void testIterateAllWords(final int formatVersion) { @@ -1017,12 +1097,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); assertFalse(invalidWordProperty.isValid()); - final ArrayList<String> words = new ArrayList<String>(); - final HashMap<String, Integer> wordProbabilitiesToCheckLater = - new HashMap<String, Integer>(); - final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = - new HashMap<Pair<String, String>, Integer>(); + new HashMap<>(); for (int i = 0; i < UNIGRAM_COUNT; i++) { final String word = CodePointUtils.generateWord(random, codePointSet); @@ -1043,24 +1122,24 @@ public class BinaryDictionaryTests extends AndroidTestCase { } final String word0 = words.get(word0Index); final String word1 = words.get(word1Index); - final int bigramProbability = random.nextInt(0xF); - binaryDictionary.addBigramWords(word0, word1, bigramProbability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP); + final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { binaryDictionary.flushWithGC(); } if (!bigrams.containsKey(word0)) { - final HashSet<String> bigramWord1s = new HashSet<String>(); + final HashSet<String> bigramWord1s = new HashSet<>(); bigrams.put(word0, bigramWord1s); } bigrams.get(word0).add(word1); - bigramProbabilitiesToCheckLater.put( - new Pair<String, String>(word0, word1), bigramProbability); + bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); } - final HashSet<String> wordSet = new HashSet<String>(words); + final HashSet<String> wordSet = new HashSet<>(words); final HashSet<Pair<String, String>> bigramSet = - new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet()); + new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); int token = 0; do { final BinaryDictionary.GetNextWordPropertyResult result = @@ -1074,12 +1153,11 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (int j = 0; j < wordProperty.mBigrams.size(); j++) { final String word1 = wordProperty.mBigrams.get(j).mWord; assertTrue(bigramWord1s.contains(word1)); - final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); - final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram); - final int bigramProbablity = binaryDictionary.calculateProbability( - unigramProbability, bigramProbabilityDelta); - assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity); + final Pair<String, String> bigram = new Pair<>(word0, word1); + if (canCheckBigramProbability(formatVersion)) { + final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); + assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + } bigramSet.remove(bigram); } token = result.mNextToken; @@ -1089,7 +1167,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddShortcuts() { - testAddShortcuts(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddShortcuts(formatVersion); + } } private void testAddShortcuts(final int formatVersion) { @@ -1105,26 +1185,26 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int shortcutProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", - shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", + shortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); final int updatedShortcutProbability = 2; - binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", - updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", + updatedShortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); wordProperty = binaryDictionary.getWordProperty("aaa"); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(updatedShortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); - binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy", - shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); - final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>(); + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); + final HashMap<String, Integer> shortcutTargets = new HashMap<>(); shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); wordProperty = binaryDictionary.getWordProperty("aaa"); @@ -1149,7 +1229,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddManyShortcuts() { - testAddManyShortcuts(FormatSpec.VERSION4); + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddManyShortcuts(formatVersion); + } } private void testAddManyShortcuts(final int formatVersion) { @@ -1160,10 +1242,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final ArrayList<String> words = new ArrayList<String>(); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<String, HashMap<String, Integer>> shortcutTargets = - new HashMap<String, HashMap<String, Integer>>(); + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); File dictFile = null; try { @@ -1190,15 +1271,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int shortcutProbability = random.nextInt(0xF); final String word = words.get(random.nextInt(words.size())); final int unigramProbability = unigramProbabilities.get(word); - binaryDictionary.addUnigramWord(word, unigramProbability, shortcutTarget, - shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, - 0 /* timestamp */); + binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); if (shortcutTargets.containsKey(word)) { final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); } else { - final HashMap<String, Integer> shortcutTargetsOfWord = - new HashMap<String, Integer>(); + final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); shortcutTargets.put(word, shortcutTargetsOfWord); } @@ -1223,4 +1303,198 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } } + + public void testDictMigration() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); + } + } + + private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int unigramProbability = 100; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "bbb", unigramProbability); + final int bigramProbability = 150; + addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); + final int shortcutProbability = 10; + binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); + binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, + Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, + true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); + assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + if (canCheckBigramProbability(toFormatVersion)) { + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); + } + assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); + WordProperty wordProperty = binaryDictionary.getWordProperty("ccc"); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); + wordProperty = binaryDictionary.getWordProperty("ddd"); + assertTrue(wordProperty.mIsBlacklistEntry); + assertTrue(wordProperty.mIsNotAWord); + } + + public void testLargeDictMigration() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); + } + } + + private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { + final int UNIGRAM_COUNT = 3000; + final int BIGRAM_COUNT = 3000; + final int codePointSetSize = 50; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + unigramProbabilities.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(words.size()); + final int word1Index = random.nextInt(words.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + random.nextInt(0xFF - unigramProbability) + unigramProbability; + addBigramWords(binaryDictionary, word0, word1, bigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + final Pair<String, String> bigram = new Pair<>(word0, word1); + bigrams.add(bigram); + bigramProbabilities.put(bigram, bigramProbability); + } + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + + for (final String word : words) { + assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); + } + assertEquals(unigramProbabilities.size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + + for (final Pair<String, String> bigram : bigrams) { + if (canCheckBigramProbability(toFormatVersion)) { + assertEquals((int)bigramProbabilities.get(bigram), + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } + assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); + } + assertEquals(bigramProbabilities.size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); + } + + public void testBeginningOfSentence() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + if (supportsBeginningOfSentence(formatVersion)) { + testBeginningOfSentence(formatVersion); + } + } + } + + private void testBeginningOfSentence(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int dummyProbability = 0; + final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + final int bigramProbability = 200; + addUnigramWord(binaryDictionary, "aaa", dummyProbability); + binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); + binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + addUnigramWord(binaryDictionary, "bbb", dummyProbability); + binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + binaryDictionary.flushWithGC(); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb")); + } + + public void testGetMaxFrequencyOfExactMatches() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testGetMaxFrequencyOfExactMatches(formatVersion); + } + } + + private void testGetMaxFrequencyOfExactMatches(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + addUnigramWord(binaryDictionary, "abc", 10); + addUnigramWord(binaryDictionary, "aBc", 15); + assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "ab'c", 20); + assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "a-b-c", 25); + assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30); + assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "ab c", 255); + assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + } } |