diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 1212 |
1 files changed, 1020 insertions, 192 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 5b8f0e977..6ba18d665 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -21,8 +21,14 @@ import android.test.suitebuilder.annotation.LargeTest; import android.text.TextUtils; import android.util.Pair; +import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.WeightedString; +import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; +import com.android.inputmethod.latin.utils.FileUtils; +import com.android.inputmethod.latin.utils.LanguageModelParam; import java.io.File; import java.io.IOException; @@ -33,39 +39,60 @@ import java.util.Locale; import java.util.Map; import java.util.Random; +// TODO Use the seed passed as an argument for makedict test. @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; + private static final int[] DICT_FORMAT_VERSIONS = + new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; - @Override - protected void setUp() throws Exception { - super.setUp(); + private static boolean canCheckBigramProbability(final int formatVersion) { + return formatVersion > FormatSpec.VERSION401; } - @Override - protected void tearDown() throws Exception { - super.tearDown(); + private static boolean supportsBeginningOfSentence(final int formatVersion) { + return formatVersion > FormatSpec.VERSION401; } - private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { - final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, + private File createEmptyDictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { + if (formatVersion == FormatSpec.VERSION4 + || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING + || formatVersion == FormatSpec.VERSION4_DEV) { + return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); + } else { + throw new IOException("Dictionary format version " + formatVersion + + " is not supported."); + } + } + + private File createEmptyVer4DictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { + final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); - Map<String, String> attributeMap = new HashMap<String, String>(); - attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), - 3 /* dictVersion */, attributeMap)) { + file.delete(); + file.mkdir(); + Map<String, String> attributeMap = new HashMap<>(); + if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, + Locale.ENGLISH, attributeMap)) { return file; } else { - throw new IOException("Empty dictionary cannot be created."); + throw new IOException("Empty dictionary " + file.getAbsolutePath() + + " cannot be created. Format version: " + formatVersion); } } public void testIsValidDictionary() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testIsValidDictionary(formatVersion); + } + } + + private void testIsValidDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -77,7 +104,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); assertFalse("binaryDictionary must be invalid after closing.", binaryDictionary.isValidDictionary()); - dictFile.delete(); + FileUtils.deleteRecursively(dictFile); binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); @@ -86,10 +113,126 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); } + public void testConstructingDictionaryOnMemory() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testConstructingDictionaryOnMemory(formatVersion); + } + } + + private void testConstructingDictionaryOnMemory(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + FileUtils.deleteRecursively(dictFile); + assertFalse(dictFile.exists()); + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, + new HashMap<String, String>()); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + final int probability = 100; + addUnigramWord(binaryDictionary, "word", probability); + assertEquals(probability, binaryDictionary.getFrequency("word")); + assertFalse(dictFile.exists()); + binaryDictionary.flush(); + assertTrue(dictFile.exists()); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + assertEquals(probability, binaryDictionary.getFrequency("word")); + binaryDictionary.close(); + dictFile.delete(); + } + + public void testAddTooLongWord() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddTooLongWord(formatVersion); + } + } + + private void testAddTooLongWord(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final StringBuffer stringBuilder = new StringBuffer(); + for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) { + stringBuilder.append('a'); + } + final String validLongWord = stringBuilder.toString(); + stringBuilder.append('a'); + final String invalidLongWord = stringBuilder.toString(); + final int probability = 100; + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, validLongWord, probability); + addUnigramWord(binaryDictionary, invalidLongWord, probability); + // Too long short cut. + binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, + 10 /* shortcutProbability */, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + addUnigramWord(binaryDictionary, "abc", probability); + final int updatedProbability = 200; + // Update. + addUnigramWord(binaryDictionary, validLongWord, updatedProbability); + addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); + addUnigramWord(binaryDictionary, "abc", updatedProbability); + + assertEquals(probability, binaryDictionary.getFrequency("aaa")); + assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); + assertEquals(BinaryDictionary.NOT_A_PROBABILITY, + binaryDictionary.getFrequency(invalidLongWord)); + assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); + dictFile.delete(); + } + + private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, + final int probability) { + binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, + BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + } + + private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, + final String word1, final int probability) { + binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + } + + private static boolean isValidBigram(final BinaryDictionary binaryDictionary, + final String word0, final String word1) { + return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1); + } + + private static void removeBigramEntry(final BinaryDictionary binaryDictionary, + final String word0, final String word1) { + binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1); + } + + private static int getBigramProbability(final BinaryDictionary binaryDictionary, + final String word0, final String word1) { + return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1); + } + public void testAddUnigramWord() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddUnigramWord(formatVersion); + } + } + + private void testAddUnigramWord(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -98,21 +241,21 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int probability = 100; - binaryDictionary.addUnigramWord("aaa", probability); + addUnigramWord(binaryDictionary, "aaa", probability); // Reallocate and create. - binaryDictionary.addUnigramWord("aab", probability); + addUnigramWord(binaryDictionary, "aab", probability); // Insert into children. - binaryDictionary.addUnigramWord("aac", probability); + addUnigramWord(binaryDictionary, "aac", probability); // Make terminal. - binaryDictionary.addUnigramWord("aa", probability); + addUnigramWord(binaryDictionary, "aa", probability); // Create children. - binaryDictionary.addUnigramWord("aaaa", probability); + addUnigramWord(binaryDictionary, "aaaa", probability); // Reallocate and make termianl. - binaryDictionary.addUnigramWord("a", probability); + addUnigramWord(binaryDictionary, "a", probability); final int updatedProbability = 200; // Update. - binaryDictionary.addUnigramWord("aaa", updatedProbability); + addUnigramWord(binaryDictionary, "aaa", updatedProbability); assertEquals(probability, binaryDictionary.getFrequency("aab")); assertEquals(probability, binaryDictionary.getFrequency("aac")); @@ -125,13 +268,19 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomlyAddUnigramWord() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRandomlyAddUnigramWord(formatVersion); + } + } + + private void testRandomlyAddUnigramWord(final int formatVersion) { final int wordCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -139,7 +288,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>(); + final HashMap<String, Integer> probabilityMap = new HashMap<>(); // Test a word that isn't contained within the dictionary. final Random random = new Random(seed); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -148,7 +297,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { probabilityMap.put(word, random.nextInt(0xFF)); } for (String word : probabilityMap.keySet()) { - binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); + addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); } for (String word : probabilityMap.keySet()) { assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); @@ -157,9 +306,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddBigramWords(formatVersion); + } + } + + private void testAddBigramWords(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -168,59 +323,73 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; - final int bigramProbability = 10; - final int updatedBigramProbability = 15; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); - - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); - - binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); - final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, - updatedBigramProbability); - assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); - - assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); - assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + final int bigramProbability = 150; + final int updatedBigramProbability = 200; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); + + assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); + assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); + assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); + } + + addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(updatedBigramProbability, + getBigramProbability(binaryDictionary, "aaa", "abb")); + } + + assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); + assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); + assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("bcc", "aaa")); + getBigramProbability(binaryDictionary, "bcc", "aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("bcc", "bbc")); + getBigramProbability(binaryDictionary, "bcc", "bbc")); assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("aaa", "aaa")); + getBigramProbability(binaryDictionary, "aaa", "aaa")); // Testing bigram link. - binaryDictionary.addUnigramWord("abcde", unigramProbability); - binaryDictionary.addUnigramWord("fghij", unigramProbability); - binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); - binaryDictionary.addUnigramWord("fgh", unigramProbability); - binaryDictionary.addUnigramWord("abc", unigramProbability); - binaryDictionary.addUnigramWord("f", unigramProbability); - assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); + addUnigramWord(binaryDictionary, "abcde", unigramProbability); + addUnigramWord(binaryDictionary, "fghij", unigramProbability); + addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); + addUnigramWord(binaryDictionary, "fgh", unigramProbability); + addUnigramWord(binaryDictionary, "abc", unigramProbability); + addUnigramWord(binaryDictionary, "f", unigramProbability); + + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, "abcde", "fghij")); + } assertEquals(Dictionary.NOT_A_PROBABILITY, - binaryDictionary.getBigramProbability("abcde", "fgh")); - binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); - assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); + getBigramProbability(binaryDictionary, "abcde", "fgh")); + addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(updatedBigramProbability, + getBigramProbability(binaryDictionary, "abcde", "fghij")); + } dictFile.delete(); } public void testRandomlyAddBigramWords() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRandomlyAddBigramWords(formatVersion); + } + } + + private void testRandomlyAddBigramWords(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; @@ -229,7 +398,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -237,19 +406,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList<String> words = new ArrayList<String>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int i = 0; i < bigramCount; i++) { @@ -258,29 +426,38 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final Pair<String, String> bigram = new Pair<>(word0, word1); bigramWords.add(bigram); - final int bigramProbability = random.nextInt(0xF); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } for (final Pair<String, String> bigram : bigramWords) { - final int unigramProbability = unigramProbabilities.get(bigram.second); final int bigramProbability = bigramProbabilities.get(bigram); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); - assertEquals(probability, - binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, bigram.first, bigram.second)); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } } dictFile.delete(); } public void testRemoveBigramWords() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRemoveBigramWords(formatVersion); + } + } + + private void testRemoveBigramWords(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -288,45 +465,51 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; - final int bigramProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); - - assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); - assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); - - binaryDictionary.removeBigramWords("aaa", "abb"); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); - - - binaryDictionary.removeBigramWords("aaa", "bcc"); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); - binaryDictionary.removeBigramWords("abb", "aaa"); - assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); - binaryDictionary.removeBigramWords("abb", "bcc"); - assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); - - binaryDictionary.removeBigramWords("aaa", "abb"); + final int bigramProbability = 150; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); + + assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); + assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); + assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); + + removeBigramEntry(binaryDictionary, "aaa", "abb"); + assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); + + + removeBigramEntry(binaryDictionary, "aaa", "bcc"); + assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc")); + removeBigramEntry(binaryDictionary, "abb", "aaa"); + assertFalse(isValidBigram(binaryDictionary, "abb", "aaa")); + removeBigramEntry(binaryDictionary, "abb", "bcc"); + assertFalse(isValidBigram(binaryDictionary, "abb", "bcc")); + + removeBigramEntry(binaryDictionary, "aaa", "abb"); // Test remove non-existing bigram operation. - binaryDictionary.removeBigramWords("aaa", "abb"); - binaryDictionary.removeBigramWords("bcc", "aaa"); + removeBigramEntry(binaryDictionary, "aaa", "abb"); + removeBigramEntry(binaryDictionary, "bcc", "aaa"); dictFile.delete(); } public void testFlushDictionary() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testFlushDictionary(formatVersion); + } + } + + private void testFlushDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -335,8 +518,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int probability = 100; - binaryDictionary.addUnigramWord("aaa", probability); - binaryDictionary.addUnigramWord("abcd", probability); + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, "abcd", probability); // Close without flushing. binaryDictionary.close(); @@ -347,8 +530,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); - binaryDictionary.addUnigramWord("aaa", probability); - binaryDictionary.addUnigramWord("abcd", probability); + addUnigramWord(binaryDictionary, "aaa", probability); + addUnigramWord(binaryDictionary, "abcd", probability); binaryDictionary.flush(); binaryDictionary.close(); @@ -358,7 +541,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("aaa")); assertEquals(probability, binaryDictionary.getFrequency("abcd")); - binaryDictionary.addUnigramWord("bcde", probability); + addUnigramWord(binaryDictionary, "bcde", probability); binaryDictionary.flush(); binaryDictionary.close(); @@ -372,9 +555,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushWithGCDictionary() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testFlushWithGCDictionary(formatVersion); + } + } + + private void testFlushWithGCDictionary(final int formatVersion) { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -383,40 +572,46 @@ public class BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final int unigramProbability = 100; - final int bigramProbability = 10; - binaryDictionary.addUnigramWord("aaa", unigramProbability); - binaryDictionary.addUnigramWord("abb", unigramProbability); - binaryDictionary.addUnigramWord("bcc", unigramProbability); - binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); - binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); - binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); - binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + final int bigramProbability = 150; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "abb", unigramProbability); + addUnigramWord(binaryDictionary, "bcc", unigramProbability); + addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); + addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); + addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); + addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); binaryDictionary.flushWithGC(); binaryDictionary.close(); binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); - assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); - assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); - assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); - assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); - assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); + } + assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); + assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); + assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); binaryDictionary.flushWithGC(); binaryDictionary.close(); dictFile.delete(); } - // TODO: Evaluate performance of GC public void testAddBigramWordsAndFlashWithGC() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddBigramWordsAndFlashWithGC(formatVersion); + } + } + + // TODO: Evaluate performance of GC + private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 30; @@ -425,7 +620,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -434,19 +629,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList<String> words = new ArrayList<String>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int i = 0; i < bigramCount; i++) { @@ -455,11 +649,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final Pair<String, String> bigram = new Pair<>(word0, word1); bigramWords.add(bigram); - final int bigramProbability = random.nextInt(0xF); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } binaryDictionary.flushWithGC(); @@ -468,19 +664,27 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + for (final Pair<String, String> bigram : bigramWords) { - final int unigramProbability = unigramProbabilities.get(bigram.second); final int bigramProbability = bigramProbabilities.get(bigram); - final int probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); - assertEquals(probability, - binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, bigram.first, bigram.second)); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } } dictFile.delete(); } - public void testRandomOperetionsAndFlashWithGC() { + public void testRandomOperationsAndFlashWithGC() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testRandomOperationsAndFlashWithGC(formatVersion); + } + } + + private void testRandomOperationsAndFlashWithGC(final int formatVersion) { final int flashWithGCIterationCount = 50; final int operationCountInEachIteration = 200; final int initialUnigramCount = 100; @@ -494,7 +698,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } @@ -502,18 +706,17 @@ public class BinaryDictionaryTests extends AndroidTestCase { BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList<String> words = new ArrayList<String>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = - new HashMap<Pair<String, String>, Integer>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); for (int i = 0; i < initialUnigramCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -529,7 +732,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } // Add bigram. if (random.nextFloat() < addBigramProb && words.size() > 2) { @@ -543,11 +746,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - final int bigramProbability = random.nextInt(0xF); - final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); + final Pair<String, String> bigram = new Pair<>(word0, word1); bigramWords.add(bigram); bigramProbabilities.put(bigram, bigramProbability); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } // Remove bigram. if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { @@ -555,7 +760,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final Pair<String, String> bigram = bigramWords.get(bigramIndex); bigramWords.remove(bigramIndex); bigramProbabilities.remove(bigram); - binaryDictionary.removeBigramWords(bigram.first, bigram.second); + removeBigramEntry(binaryDictionary, bigram.first, bigram.second); } } @@ -568,17 +773,20 @@ public class BinaryDictionaryTests extends AndroidTestCase { // Test whether the all bigram operations are collectlly handled. for (int i = 0; i < bigramWords.size(); i++) { final Pair<String, String> bigram = bigramWords.get(i); - final int unigramProbability = unigramProbabilities.get(bigram.second); final int probability; if (bigramProbabilities.containsKey(bigram)) { final int bigramProbability = bigramProbabilities.get(bigram); - probability = binaryDictionary.calculateProbability(unigramProbability, - bigramProbability); + probability = bigramProbability; } else { probability = Dictionary.NOT_A_PROBABILITY; } - assertEquals(probability, - binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + + if (canCheckBigramProbability(formatVersion)) { + assertEquals(probability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } + assertEquals(probability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, bigram.first, bigram.second)); } binaryDictionary.flushWithGC(); binaryDictionary.close(); @@ -588,6 +796,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddManyUnigramsAndFlushWithGC() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddManyUnigramsAndFlushWithGC(formatVersion); + } + } + + private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { final int flashWithGCIterationCount = 3; final int codePointSetSize = 50; @@ -596,13 +810,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } - final ArrayList<String> words = new ArrayList<String>(); - final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); BinaryDictionary binaryDictionary; @@ -615,7 +829,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { words.add(word); final int unigramProbability = random.nextInt(0xFF); unigramProbabilities.put(word, unigramProbability); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int j = 0; j < words.size(); j++) { @@ -632,6 +846,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testUnigramAndBigramCount() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testUnigramAndBigramCount(formatVersion); + } + } + + private void testUnigramAndBigramCount(final int formatVersion) { final int flashWithGCIterationCount = 10; final int codePointSetSize = 50; final int unigramCountPerIteration = 1000; @@ -641,13 +861,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { File dictFile = null; try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } - final ArrayList<String> words = new ArrayList<String>(); - final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>(); + final ArrayList<String> words = new ArrayList<>(); + final HashSet<Pair<String, String>> bigrams = new HashSet<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); BinaryDictionary binaryDictionary; @@ -659,7 +879,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); - binaryDictionary.addUnigramWord(word, unigramProbability); + addUnigramWord(binaryDictionary, word, unigramProbability); } for (int j = 0; j < bigramCountPerIteration; j++) { final String word0 = words.get(random.nextInt(words.size())); @@ -667,22 +887,630 @@ public class BinaryDictionaryTests extends AndroidTestCase { if (TextUtils.equals(word0, word1)) { continue; } - bigrams.add(new Pair<String, String>(word0, word1)); + bigrams.add(new Pair<>(word0, word1)); final int bigramProbability = random.nextInt(0xF); - binaryDictionary.addBigramWords(word0, word1, bigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); } - assertEquals(new HashSet<String>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); - assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); + assertEquals(new HashSet<>(words).size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.flushWithGC(); - assertEquals(new HashSet<String>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); - assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); + assertEquals(new HashSet<>(words).size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.close(); } dictFile.delete(); } + + public void testAddMultipleDictionaryEntries() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddMultipleDictionaryEntries(formatVersion); + } + } + + private void testAddMultipleDictionaryEntries(final int formatVersion) { + final int codePointSetSize = 20; + final int lmParamCount = 1000; + final double bigramContinueRate = 0.9; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); + + final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; + String prevWord = null; + for (int i = 0; i < languageModelParams.length; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int probability = random.nextInt(0xFF); + final int bigramProbability = probability + random.nextInt(0xFF - probability); + unigramProbabilities.put(word, probability); + if (prevWord == null) { + languageModelParams[i] = new LanguageModelParam(word, probability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + } else { + languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, + bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); + bigramProbabilities.put(new Pair<>(prevWord, word), + bigramProbability); + } + prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; + } + + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary.addMultipleDictionaryEntries(languageModelParams); + + for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { + assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); + } + + for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { + final String word0 = entry.getKey().first; + final String word1 = entry.getKey().second; + final int bigramProbability = entry.getValue(); + assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, + isValidBigram(binaryDictionary, word0, word1)); + if (canCheckBigramProbability(formatVersion)) { + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, word0, word1)); + } + } + } + + public void testGetWordProperties() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testGetWordProperties(formatVersion); + } + } + + private void testGetWordProperties(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", + false /* isBeginningOfSentence */); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> wordProbabilities = new HashMap<>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + final boolean isNotAWord = random.nextBoolean(); + final boolean isBlacklisted = random.nextBoolean(); + // TODO: Add tests for historical info. + binaryDictionary.addUnigramEntry(word, unigramProbability, + null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, + false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilities.put(word, unigramProbability); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word, + false /* isBeginningOfSentence */); + assertEquals(word, wordProperty.mWord); + assertTrue(wordProperty.isValid()); + assertEquals(isNotAWord, wordProperty.mIsNotAWord); + assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); + assertEquals(false, wordProperty.mHasBigrams); + assertEquals(false, wordProperty.mHasShortcuts); + assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); + assertTrue(wordProperty.mShortcutTargets.isEmpty()); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilities.size()); + final int word1Index = random.nextInt(wordProbabilities.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int unigramProbability = wordProbabilities.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); + } + + for (int i = 0; i < words.size(); i++) { + final String word0 = words.get(i); + if (!bigrams.containsKey(word0)) { + continue; + } + final HashSet<String> bigramWord1s = bigrams.get(word0); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, + false /* isBeginningOfSentence */); + assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); + for (int j = 0; j < wordProperty.mBigrams.size(); j++) { + final String word1 = wordProperty.mBigrams.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + if (canCheckBigramProbability(formatVersion)) { + final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); + assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + } + } + } + } + + public void testIterateAllWords() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testIterateAllWords(formatVersion); + } + } + + private void testIterateAllWords(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", + false /* isBeginningOfSentence */); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = + new HashMap<>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilitiesToCheckLater.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); + final int bigramProbability = + unigramProbability + random.nextInt(0xFF - unigramProbability); + addBigramWords(binaryDictionary, word0, word1, bigramProbability); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); + } + + final HashSet<String> wordSet = new HashSet<>(words); + final HashSet<Pair<String, String>> bigramSet = + new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); + int token = 0; + do { + final BinaryDictionary.GetNextWordPropertyResult result = + binaryDictionary.getNextWordProperty(token); + final WordProperty wordProperty = result.mWordProperty; + final String word0 = wordProperty.mWord; + assertEquals((int)wordProbabilitiesToCheckLater.get(word0), + wordProperty.mProbabilityInfo.mProbability); + wordSet.remove(word0); + final HashSet<String> bigramWord1s = bigrams.get(word0); + for (int j = 0; j < wordProperty.mBigrams.size(); j++) { + final String word1 = wordProperty.mBigrams.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final Pair<String, String> bigram = new Pair<>(word0, word1); + if (canCheckBigramProbability(formatVersion)) { + final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); + assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + } + bigramSet.remove(bigram); + } + token = result.mNextToken; + } while (token != 0); + assertTrue(wordSet.isEmpty()); + assertTrue(bigramSet.isEmpty()); + } + + public void testAddShortcuts() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddShortcuts(formatVersion); + } + } + + private void testAddShortcuts(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int shortcutProbability = 10; + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", + shortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); + WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); + final int updatedShortcutProbability = 2; + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", + updatedShortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(updatedShortcutProbability, + wordProperty.mShortcutTargets.get(0).getProbability()); + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); + final HashMap<String, Integer> shortcutTargets = new HashMap<>(); + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); + shortcutTargets.remove(shortcutTarget.mWord); + } + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + binaryDictionary.flushWithGC(); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); + shortcutTargets.remove(shortcutTarget.mWord); + } + } + + public void testAddManyShortcuts() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testAddManyShortcuts(formatVersion); + } + } + + private void testAddManyShortcuts(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int SHORTCUT_COUNT = 10000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + words.add(word); + unigramProbabilities.put(word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + for (int i = 0; i < SHORTCUT_COUNT; i++) { + final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); + final int shortcutProbability = random.nextInt(0xF); + final String word = words.get(random.nextInt(words.size())); + final int unigramProbability = unigramProbabilities.get(word); + binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); + if (shortcutTargets.containsKey(word)) { + final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + } else { + final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + shortcutTargets.put(word, shortcutTargetsOfWord); + } + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + + for (final String word : words) { + final WordProperty wordProperty = binaryDictionary.getWordProperty(word, + false /* isBeginningOfSentence */); + assertEquals((int)unigramProbabilities.get(word), + wordProperty.mProbabilityInfo.mProbability); + if (!shortcutTargets.containsKey(word)) { + // The word does not have shortcut targets. + continue; + } + assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); + for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + final String targetCodePonts = shortcutTarget.mWord; + assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), + shortcutTarget.getProbability()); + } + } + } + + public void testDictMigration() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); + } + } + + private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int unigramProbability = 100; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "bbb", unigramProbability); + final int bigramProbability = 150; + addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); + final int shortcutProbability = 10; + binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, + false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isBlacklisted */, 0 /* timestamp */); + binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, + Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, + true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); + binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE, + "aaa", bigramProbability, 0 /* timestamp */); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); + assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + if (canCheckBigramProbability(toFormatVersion)) { + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); + assertEquals(bigramProbability, binaryDictionary.getNgramProbability( + PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa")); + } + assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); + WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", + false /* isBeginningOfSentence */); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); + wordProperty = binaryDictionary.getWordProperty("ddd", + false /* isBeginningOfSentence */); + assertTrue(wordProperty.mIsBlacklistEntry); + assertTrue(wordProperty.mIsNotAWord); + } + + public void testLargeDictMigration() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); + } + } + + private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { + final int UNIGRAM_COUNT = 3000; + final int BIGRAM_COUNT = 3000; + final int codePointSetSize = 50; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final ArrayList<String> words = new ArrayList<>(); + final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + unigramProbabilities.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(words.size()); + final int word1Index = random.nextInt(words.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int unigramProbability = unigramProbabilities.get(word1); + final int bigramProbability = + random.nextInt(0xFF - unigramProbability) + unigramProbability; + addBigramWords(binaryDictionary, word0, word1, bigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + final Pair<String, String> bigram = new Pair<>(word0, word1); + bigrams.add(bigram); + bigramProbabilities.put(bigram, bigramProbability); + } + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + + for (final String word : words) { + assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); + } + assertEquals(unigramProbabilities.size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + + for (final Pair<String, String> bigram : bigrams) { + if (canCheckBigramProbability(toFormatVersion)) { + assertEquals((int)bigramProbabilities.get(bigram), + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); + } + assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); + } + assertEquals(bigramProbabilities.size(), Integer.parseInt( + binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); + } + + public void testBeginningOfSentence() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + if (supportsBeginningOfSentence(formatVersion)) { + testBeginningOfSentence(formatVersion); + } + } + } + + private void testBeginningOfSentence(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int dummyProbability = 0; + final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + final int bigramProbability = 200; + addUnigramWord(binaryDictionary, "aaa", dummyProbability); + binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); + binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + addUnigramWord(binaryDictionary, "bbb", dummyProbability); + binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + binaryDictionary.flushWithGC(); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); + assertEquals(bigramProbability, + binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb")); + } + + public void testGetMaxFrequencyOfExactMatches() { + for (final int formatVersion : DICT_FORMAT_VERSIONS) { + testGetMaxFrequencyOfExactMatches(formatVersion); + } + } + + private void testGetMaxFrequencyOfExactMatches(final int formatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + addUnigramWord(binaryDictionary, "abc", 10); + addUnigramWord(binaryDictionary, "aBc", 15); + assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "ab'c", 20); + assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "a-b-c", 25); + assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30); + assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + addUnigramWord(binaryDictionary, "ab c", 255); + assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + } } |