aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
diff options
context:
space:
mode:
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java666
1 files changed, 470 insertions, 196 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 0fb0fa587..160b08c4f 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -43,34 +43,49 @@ import java.util.Random;
public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
+ private static final int[] DICT_FORMAT_VERSIONS =
+ new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
+
+ private static boolean canCheckBigramProbability(final int formatVersion) {
+ return formatVersion > FormatSpec.VERSION401;
+ }
+
+ private static boolean supportsBeginningOfSentence(final int formatVersion) {
+ return formatVersion > FormatSpec.VERSION401;
+ }
private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException {
- if (formatVersion == FormatSpec.VERSION4) {
- return createEmptyVer4DictionaryAndGetFile(dictId);
+ if (formatVersion == FormatSpec.VERSION4
+ || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
+ || formatVersion == FormatSpec.VERSION4_DEV) {
+ return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
} else {
throw new IOException("Dictionary format version " + formatVersion
+ " is not supported.");
}
}
- private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
+ private File createEmptyVer4DictionaryAndGetFile(final String dictId,
+ final int formatVersion) throws IOException {
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
file.delete();
file.mkdir();
- Map<String, String> attributeMap = new HashMap<String, String>();
- if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4,
+ Map<String, String> attributeMap = new HashMap<>();
+ if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
Locale.ENGLISH, attributeMap)) {
return file;
} else {
throw new IOException("Empty dictionary " + file.getAbsolutePath()
- + " cannot be created.");
+ + " cannot be created. Format version: " + formatVersion);
}
}
public void testIsValidDictionary() {
- testIsValidDictionary(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testIsValidDictionary(formatVersion);
+ }
}
private void testIsValidDictionary(final int formatVersion) {
@@ -98,7 +113,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testConstructingDictionaryOnMemory() {
- testConstructingDictionaryOnMemory(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testConstructingDictionaryOnMemory(formatVersion);
+ }
}
private void testConstructingDictionaryOnMemory(final int formatVersion) {
@@ -129,7 +146,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddTooLongWord() {
- testAddTooLongWord(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddTooLongWord(formatVersion);
+ }
}
private void testAddTooLongWord(final int formatVersion) {
@@ -155,8 +174,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, validLongWord, probability);
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
- binaryDictionary.addUnigramWord("a", probability, invalidLongWord,
- 10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
+ binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
+ 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200;
@@ -173,22 +193,39 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
- private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
+ private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
- binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
+ binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
- false /* isNotAWord */, false /* isBlacklisted */,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
- private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
+ private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
final String word1, final int probability) {
- binaryDictionary.addBigramWords(word0, word1, probability,
+ binaryDictionary.addNgramEntry(new PrevWordsInfo(word0), word1, probability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
+ private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
+ final String word0, final String word1) {
+ return binaryDictionary.isValidNgram(new PrevWordsInfo(word0), word1);
+ }
+
+ private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
+ final String word0, final String word1) {
+ binaryDictionary.removeNgramEntry(new PrevWordsInfo(word0), word1);
+ }
+
+ private static int getBigramProbability(final BinaryDictionary binaryDictionary,
+ final String word0, final String word1) {
+ return binaryDictionary.getNgramProbability(new PrevWordsInfo(word0), word1);
+ }
+
public void testAddUnigramWord() {
- testAddUnigramWord(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddUnigramWord(formatVersion);
+ }
}
private void testAddUnigramWord(final int formatVersion) {
@@ -230,7 +267,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomlyAddUnigramWord() {
- testRandomlyAddUnigramWord(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testRandomlyAddUnigramWord(formatVersion);
+ }
}
private void testRandomlyAddUnigramWord(final int formatVersion) {
@@ -248,7 +287,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>();
+ final HashMap<String, Integer> probabilityMap = new HashMap<>();
// Test a word that isn't contained within the dictionary.
final Random random = new Random(seed);
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
@@ -266,7 +305,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWords() {
- testAddBigramWords(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddBigramWords(formatVersion);
+ }
}
private void testAddBigramWords(final int formatVersion) {
@@ -281,8 +322,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
- final int bigramProbability = 10;
- final int updatedBigramProbability = 15;
+ final int bigramProbability = 150;
+ final int updatedBigramProbability = 200;
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
@@ -291,31 +332,32 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
- final int probability = binaryDictionary.calculateProbability(unigramProbability,
- bigramProbability);
- assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
- assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
- assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
- assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
- assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
- assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
- assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
- assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
+ assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
+ assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
+ }
addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
- final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
- updatedBigramProbability);
- assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(updatedBigramProbability,
+ getBigramProbability(binaryDictionary, "aaa", "abb"));
+ }
- assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
- assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
- assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
+ assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
+ assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
+ assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
assertEquals(Dictionary.NOT_A_PROBABILITY,
- binaryDictionary.getBigramProbability("bcc", "aaa"));
+ getBigramProbability(binaryDictionary, "bcc", "aaa"));
assertEquals(Dictionary.NOT_A_PROBABILITY,
- binaryDictionary.getBigramProbability("bcc", "bbc"));
+ getBigramProbability(binaryDictionary, "bcc", "bbc"));
assertEquals(Dictionary.NOT_A_PROBABILITY,
- binaryDictionary.getBigramProbability("aaa", "aaa"));
+ getBigramProbability(binaryDictionary, "aaa", "aaa"));
// Testing bigram link.
addUnigramWord(binaryDictionary, "abcde", unigramProbability);
@@ -324,17 +366,26 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "fgh", unigramProbability);
addUnigramWord(binaryDictionary, "abc", unigramProbability);
addUnigramWord(binaryDictionary, "f", unigramProbability);
- assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
+
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(bigramProbability,
+ getBigramProbability(binaryDictionary, "abcde", "fghij"));
+ }
assertEquals(Dictionary.NOT_A_PROBABILITY,
- binaryDictionary.getBigramProbability("abcde", "fgh"));
+ getBigramProbability(binaryDictionary, "abcde", "fgh"));
addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
- assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(updatedBigramProbability,
+ getBigramProbability(binaryDictionary, "abcde", "fghij"));
+ }
dictFile.delete();
}
public void testRandomlyAddBigramWords() {
- testRandomlyAddBigramWords(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testRandomlyAddBigramWords(formatVersion);
+ }
}
private void testRandomlyAddBigramWords(final int formatVersion) {
@@ -354,12 +405,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final ArrayList<String> words = new ArrayList<String>();
- final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
+ final ArrayList<String> words = new ArrayList<>();
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities =
- new HashMap<Pair<String, String>, Integer>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
for (int i = 0; i < wordCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
@@ -375,27 +425,32 @@ public class BinaryDictionaryTests extends AndroidTestCase {
if (TextUtils.equals(word0, word1)) {
continue;
}
- final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ final Pair<String, String> bigram = new Pair<>(word0, word1);
bigramWords.add(bigram);
- final int bigramProbability = random.nextInt(0xF);
+ final int unigramProbability = unigramProbabilities.get(word1);
+ final int bigramProbability =
+ unigramProbability + random.nextInt(0xFF - unigramProbability);
bigramProbabilities.put(bigram, bigramProbability);
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
for (final Pair<String, String> bigram : bigramWords) {
- final int unigramProbability = unigramProbabilities.get(bigram.second);
final int bigramProbability = bigramProbabilities.get(bigram);
- final int probability = binaryDictionary.calculateProbability(unigramProbability,
- bigramProbability);
- assertEquals(probability,
- binaryDictionary.getBigramProbability(bigram.first, bigram.second));
+ assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
+ isValidBigram(binaryDictionary, bigram.first, bigram.second));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(bigramProbability,
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
+ }
}
dictFile.delete();
}
public void testRemoveBigramWords() {
- testRemoveBigramWords(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testRemoveBigramWords(formatVersion);
+ }
}
private void testRemoveBigramWords(final int formatVersion) {
@@ -409,7 +464,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
- final int bigramProbability = 10;
+ final int bigramProbability = 150;
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
@@ -418,34 +473,36 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
- assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
- assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
- assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
- assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
+ assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
+ assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
- binaryDictionary.removeBigramWords("aaa", "abb");
- assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
+ removeBigramEntry(binaryDictionary, "aaa", "abb");
+ assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
- assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
- binaryDictionary.removeBigramWords("aaa", "bcc");
- assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
- binaryDictionary.removeBigramWords("abb", "aaa");
- assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
- binaryDictionary.removeBigramWords("abb", "bcc");
- assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
+ removeBigramEntry(binaryDictionary, "aaa", "bcc");
+ assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
+ removeBigramEntry(binaryDictionary, "abb", "aaa");
+ assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
+ removeBigramEntry(binaryDictionary, "abb", "bcc");
+ assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
- binaryDictionary.removeBigramWords("aaa", "abb");
+ removeBigramEntry(binaryDictionary, "aaa", "abb");
// Test remove non-existing bigram operation.
- binaryDictionary.removeBigramWords("aaa", "abb");
- binaryDictionary.removeBigramWords("bcc", "aaa");
+ removeBigramEntry(binaryDictionary, "aaa", "abb");
+ removeBigramEntry(binaryDictionary, "bcc", "aaa");
dictFile.delete();
}
public void testFlushDictionary() {
- testFlushDictionary(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testFlushDictionary(formatVersion);
+ }
}
private void testFlushDictionary(final int formatVersion) {
@@ -497,7 +554,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushWithGCDictionary() {
- testFlushWithGCDictionary(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testFlushWithGCDictionary(formatVersion);
+ }
}
private void testFlushWithGCDictionary(final int formatVersion) {
@@ -512,7 +571,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
- final int bigramProbability = 10;
+ final int bigramProbability = 150;
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
@@ -526,18 +585,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final int probability = binaryDictionary.calculateProbability(unigramProbability,
- bigramProbability);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
- assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
- assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
- assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
- assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
- assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
- assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
- assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
+ }
+ assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
+ assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
+ assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
binaryDictionary.flushWithGC();
binaryDictionary.close();
@@ -545,7 +604,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWordsAndFlashWithGC() {
- testAddBigramWordsAndFlashWithGC(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddBigramWordsAndFlashWithGC(formatVersion);
+ }
}
// TODO: Evaluate performance of GC
@@ -567,12 +628,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final ArrayList<String> words = new ArrayList<String>();
- final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
+ final ArrayList<String> words = new ArrayList<>();
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities =
- new HashMap<Pair<String, String>, Integer>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
for (int i = 0; i < wordCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
@@ -588,9 +648,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
if (TextUtils.equals(word0, word1)) {
continue;
}
- final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ final Pair<String, String> bigram = new Pair<>(word0, word1);
bigramWords.add(bigram);
- final int bigramProbability = random.nextInt(0xF);
+ final int unigramProbability = unigramProbabilities.get(word1);
+ final int bigramProbability =
+ unigramProbability + random.nextInt(0xFF - unigramProbability);
bigramProbabilities.put(bigram, bigramProbability);
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
@@ -601,20 +663,24 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
for (final Pair<String, String> bigram : bigramWords) {
- final int unigramProbability = unigramProbabilities.get(bigram.second);
final int bigramProbability = bigramProbabilities.get(bigram);
- final int probability = binaryDictionary.calculateProbability(unigramProbability,
- bigramProbability);
- assertEquals(probability,
- binaryDictionary.getBigramProbability(bigram.first, bigram.second));
+ assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
+ isValidBigram(binaryDictionary, bigram.first, bigram.second));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(bigramProbability,
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
+ }
}
dictFile.delete();
}
public void testRandomOperationsAndFlashWithGC() {
- testRandomOperationsAndFlashWithGC(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testRandomOperationsAndFlashWithGC(formatVersion);
+ }
}
private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
@@ -639,12 +705,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final ArrayList<String> words = new ArrayList<String>();
- final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
+ final ArrayList<String> words = new ArrayList<>();
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities =
- new HashMap<Pair<String, String>, Integer>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
for (int i = 0; i < initialUnigramCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
@@ -680,8 +745,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
if (TextUtils.equals(word0, word1)) {
continue;
}
- final int bigramProbability = random.nextInt(0xF);
- final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ final int unigramProbability = unigramProbabilities.get(word1);
+ final int bigramProbability =
+ unigramProbability + random.nextInt(0xFF - unigramProbability);
+ final Pair<String, String> bigram = new Pair<>(word0, word1);
bigramWords.add(bigram);
bigramProbabilities.put(bigram, bigramProbability);
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
@@ -692,7 +759,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final Pair<String, String> bigram = bigramWords.get(bigramIndex);
bigramWords.remove(bigramIndex);
bigramProbabilities.remove(bigram);
- binaryDictionary.removeBigramWords(bigram.first, bigram.second);
+ removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
}
}
@@ -705,17 +772,20 @@ public class BinaryDictionaryTests extends AndroidTestCase {
// Test whether the all bigram operations are collectlly handled.
for (int i = 0; i < bigramWords.size(); i++) {
final Pair<String, String> bigram = bigramWords.get(i);
- final int unigramProbability = unigramProbabilities.get(bigram.second);
final int probability;
if (bigramProbabilities.containsKey(bigram)) {
final int bigramProbability = bigramProbabilities.get(bigram);
- probability = binaryDictionary.calculateProbability(unigramProbability,
- bigramProbability);
+ probability = bigramProbability;
} else {
probability = Dictionary.NOT_A_PROBABILITY;
}
- assertEquals(probability,
- binaryDictionary.getBigramProbability(bigram.first, bigram.second));
+
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(probability,
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
+ }
+ assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
+ isValidBigram(binaryDictionary, bigram.first, bigram.second));
}
binaryDictionary.flushWithGC();
binaryDictionary.close();
@@ -725,7 +795,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddManyUnigramsAndFlushWithGC() {
- testAddManyUnigramsAndFlushWithGC(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddManyUnigramsAndFlushWithGC(formatVersion);
+ }
}
private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
@@ -742,8 +814,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
fail("IOException while writing an initial dictionary : " + e);
}
- final ArrayList<String> words = new ArrayList<String>();
- final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final ArrayList<String> words = new ArrayList<>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
BinaryDictionary binaryDictionary;
@@ -773,7 +845,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testUnigramAndBigramCount() {
- testUnigramAndBigramCount(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testUnigramAndBigramCount(formatVersion);
+ }
}
private void testUnigramAndBigramCount(final int formatVersion) {
@@ -791,8 +865,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
fail("IOException while writing an initial dictionary : " + e);
}
- final ArrayList<String> words = new ArrayList<String>();
- final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>();
+ final ArrayList<String> words = new ArrayList<>();
+ final HashSet<Pair<String, String>> bigrams = new HashSet<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
BinaryDictionary binaryDictionary;
@@ -812,18 +886,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
if (TextUtils.equals(word0, word1)) {
continue;
}
- bigrams.add(new Pair<String, String>(word0, word1));
+ bigrams.add(new Pair<>(word0, word1));
final int bigramProbability = random.nextInt(0xF);
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
- assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
+ assertEquals(new HashSet<>(words).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
- assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
+ assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.flushWithGC();
- assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
+ assertEquals(new HashSet<>(words).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
- assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
+ assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.close();
}
@@ -832,7 +906,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddMultipleDictionaryEntries() {
- testAddMultipleDictionaryEntries(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddMultipleDictionaryEntries(formatVersion);
+ }
}
private void testAddMultipleDictionaryEntries(final int formatVersion) {
@@ -850,16 +926,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities =
- new HashMap<Pair<String, String>, Integer>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
String prevWord = null;
for (int i = 0; i < languageModelParams.length; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
final int probability = random.nextInt(0xFF);
- final int bigramProbability = random.nextInt(0xF);
+ final int bigramProbability = probability + random.nextInt(0xFF - probability);
unigramProbabilities.put(word, probability);
if (prevWord == null) {
languageModelParams[i] = new LanguageModelParam(word, probability,
@@ -867,7 +942,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
} else {
languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
- bigramProbabilities.put(new Pair<String, String>(prevWord, word),
+ bigramProbabilities.put(new Pair<>(prevWord, word),
bigramProbability);
}
prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
@@ -885,16 +960,20 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
final String word0 = entry.getKey().first;
final String word1 = entry.getKey().second;
- final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability = entry.getValue();
- final int probability = binaryDictionary.calculateProbability(
- unigramProbability, bigramProbability);
- assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
+ assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
+ isValidBigram(binaryDictionary, word0, word1));
+ if (canCheckBigramProbability(formatVersion)) {
+ assertEquals(bigramProbability,
+ getBigramProbability(binaryDictionary, word0, word1));
+ }
}
}
public void testGetWordProperties() {
- testGetWordProperties(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testGetWordProperties(formatVersion);
+ }
}
private void testGetWordProperties(final int formatVersion) {
@@ -918,11 +997,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
assertFalse(invalidWordProperty.isValid());
- final ArrayList<String> words = new ArrayList<String>();
- final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>();
- final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities =
- new HashMap<Pair<String, String>, Integer>();
+ final ArrayList<String> words = new ArrayList<>();
+ final HashMap<String, Integer> wordProbabilities = new HashMap<>();
+ final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
for (int i = 0; i < UNIGRAM_COUNT; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
@@ -930,9 +1008,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final boolean isNotAWord = random.nextBoolean();
final boolean isBlacklisted = random.nextBoolean();
// TODO: Add tests for historical info.
- binaryDictionary.addUnigramWord(word, unigramProbability,
+ binaryDictionary.addUnigramEntry(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
- isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
@@ -957,18 +1036,19 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index);
- final int bigramProbability = random.nextInt(0xF);
- binaryDictionary.addBigramWords(word0, word1, bigramProbability,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ final int unigramProbability = wordProbabilities.get(word1);
+ final int bigramProbability =
+ unigramProbability + random.nextInt(0xFF - unigramProbability);
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
if (!bigrams.containsKey(word0)) {
- final HashSet<String> bigramWord1s = new HashSet<String>();
+ final HashSet<String> bigramWord1s = new HashSet<>();
bigrams.put(word0, bigramWord1s);
}
bigrams.get(word0).add(word1);
- bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability);
+ bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
}
for (int i = 0; i < words.size(); i++) {
@@ -982,18 +1062,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord;
assertTrue(bigramWord1s.contains(word1));
- final int bigramProbabilityDelta = bigramProbabilities.get(
- new Pair<String, String>(word0, word1));
- final int unigramProbability = wordProbabilities.get(word1);
- final int bigramProbablity = binaryDictionary.calculateProbability(
- unigramProbability, bigramProbabilityDelta);
- assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
+ if (canCheckBigramProbability(formatVersion)) {
+ final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
+ assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+ }
}
}
}
public void testIterateAllWords() {
- testIterateAllWords(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testIterateAllWords(formatVersion);
+ }
}
private void testIterateAllWords(final int formatVersion) {
@@ -1017,12 +1097,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
assertFalse(invalidWordProperty.isValid());
- final ArrayList<String> words = new ArrayList<String>();
- final HashMap<String, Integer> wordProbabilitiesToCheckLater =
- new HashMap<String, Integer>();
- final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
+ final ArrayList<String> words = new ArrayList<>();
+ final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
+ final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
- new HashMap<Pair<String, String>, Integer>();
+ new HashMap<>();
for (int i = 0; i < UNIGRAM_COUNT; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
@@ -1043,24 +1122,24 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index);
- final int bigramProbability = random.nextInt(0xF);
- binaryDictionary.addBigramWords(word0, word1, bigramProbability,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
+ final int bigramProbability =
+ unigramProbability + random.nextInt(0xFF - unigramProbability);
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
if (!bigrams.containsKey(word0)) {
- final HashSet<String> bigramWord1s = new HashSet<String>();
+ final HashSet<String> bigramWord1s = new HashSet<>();
bigrams.put(word0, bigramWord1s);
}
bigrams.get(word0).add(word1);
- bigramProbabilitiesToCheckLater.put(
- new Pair<String, String>(word0, word1), bigramProbability);
+ bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
}
- final HashSet<String> wordSet = new HashSet<String>(words);
+ final HashSet<String> wordSet = new HashSet<>(words);
final HashSet<Pair<String, String>> bigramSet =
- new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet());
+ new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
int token = 0;
do {
final BinaryDictionary.GetNextWordPropertyResult result =
@@ -1074,12 +1153,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord;
assertTrue(bigramWord1s.contains(word1));
- final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
- final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
- final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram);
- final int bigramProbablity = binaryDictionary.calculateProbability(
- unigramProbability, bigramProbabilityDelta);
- assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
+ final Pair<String, String> bigram = new Pair<>(word0, word1);
+ if (canCheckBigramProbability(formatVersion)) {
+ final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
+ assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+ }
bigramSet.remove(bigram);
}
token = result.mNextToken;
@@ -1089,7 +1167,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddShortcuts() {
- testAddShortcuts(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddShortcuts(formatVersion);
+ }
}
private void testAddShortcuts(final int formatVersion) {
@@ -1105,26 +1185,26 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = 100;
final int shortcutProbability = 10;
- binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
- shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
+ shortcutProbability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
final int updatedShortcutProbability = 2;
- binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
- updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
+ updatedShortcutProbability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability,
wordProperty.mShortcutTargets.get(0).getProbability());
- binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
- shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
- final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
+ binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
+ shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, 0 /* timestamp */);
+ final HashMap<String, Integer> shortcutTargets = new HashMap<>();
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
wordProperty = binaryDictionary.getWordProperty("aaa");
@@ -1149,7 +1229,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddManyShortcuts() {
- testAddManyShortcuts(FormatSpec.VERSION4);
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testAddManyShortcuts(formatVersion);
+ }
}
private void testAddManyShortcuts(final int formatVersion) {
@@ -1160,10 +1242,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final ArrayList<String> words = new ArrayList<String>();
- final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
- final HashMap<String, HashMap<String, Integer>> shortcutTargets =
- new HashMap<String, HashMap<String, Integer>>();
+ final ArrayList<String> words = new ArrayList<>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
File dictFile = null;
try {
@@ -1190,15 +1271,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int shortcutProbability = random.nextInt(0xF);
final String word = words.get(random.nextInt(words.size()));
final int unigramProbability = unigramProbabilities.get(word);
- binaryDictionary.addUnigramWord(word, unigramProbability, shortcutTarget,
- shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
- 0 /* timestamp */);
+ binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
+ shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, 0 /* timestamp */);
if (shortcutTargets.containsKey(word)) {
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
} else {
- final HashMap<String, Integer> shortcutTargetsOfWord =
- new HashMap<String, Integer>();
+ final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
shortcutTargets.put(word, shortcutTargetsOfWord);
}
@@ -1223,4 +1303,198 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
}
}
+
+ public void testDictMigration() {
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
+ }
+ }
+
+ private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final int unigramProbability = 100;
+ addUnigramWord(binaryDictionary, "aaa", unigramProbability);
+ addUnigramWord(binaryDictionary, "bbb", unigramProbability);
+ final int bigramProbability = 150;
+ addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
+ final int shortcutProbability = 10;
+ binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
+ false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isBlacklisted */, 0 /* timestamp */);
+ binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
+ Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
+ true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
+ assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
+ assertTrue(binaryDictionary.migrateTo(toFormatVersion));
+ assertTrue(binaryDictionary.isValidDictionary());
+ assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
+ if (canCheckBigramProbability(toFormatVersion)) {
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
+ }
+ assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
+ WordProperty wordProperty = binaryDictionary.getWordProperty("ccc");
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
+ wordProperty = binaryDictionary.getWordProperty("ddd");
+ assertTrue(wordProperty.mIsBlacklistEntry);
+ assertTrue(wordProperty.mIsNotAWord);
+ }
+
+ public void testLargeDictMigration() {
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
+ }
+ }
+
+ private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
+ final int UNIGRAM_COUNT = 3000;
+ final int BIGRAM_COUNT = 3000;
+ final int codePointSetSize = 50;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final ArrayList<String> words = new ArrayList<>();
+ final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
+
+ for (int i = 0; i < UNIGRAM_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ words.add(word);
+ unigramProbabilities.put(word, unigramProbability);
+ }
+
+ for (int i = 0; i < BIGRAM_COUNT; i++) {
+ final int word0Index = random.nextInt(words.size());
+ final int word1Index = random.nextInt(words.size());
+ if (word0Index == word1Index) {
+ continue;
+ }
+ final String word0 = words.get(word0Index);
+ final String word1 = words.get(word1Index);
+ final int unigramProbability = unigramProbabilities.get(word1);
+ final int bigramProbability =
+ random.nextInt(0xFF - unigramProbability) + unigramProbability;
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ final Pair<String, String> bigram = new Pair<>(word0, word1);
+ bigrams.add(bigram);
+ bigramProbabilities.put(bigram, bigramProbability);
+ }
+ assertTrue(binaryDictionary.migrateTo(toFormatVersion));
+
+ for (final String word : words) {
+ assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
+ }
+ assertEquals(unigramProbabilities.size(), Integer.parseInt(
+ binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+
+ for (final Pair<String, String> bigram : bigrams) {
+ if (canCheckBigramProbability(toFormatVersion)) {
+ assertEquals((int)bigramProbabilities.get(bigram),
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
+ }
+ assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
+ }
+ assertEquals(bigramProbabilities.size(), Integer.parseInt(
+ binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ }
+
+ public void testBeginningOfSentence() {
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ if (supportsBeginningOfSentence(formatVersion)) {
+ testBeginningOfSentence(formatVersion);
+ }
+ }
+ }
+
+ private void testBeginningOfSentence(final int formatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final int dummyProbability = 0;
+ final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
+ final int bigramProbability = 200;
+ addUnigramWord(binaryDictionary, "aaa", dummyProbability);
+ binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ assertEquals(bigramProbability,
+ binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
+ binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ addUnigramWord(binaryDictionary, "bbb", dummyProbability);
+ binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ binaryDictionary.flushWithGC();
+ assertEquals(bigramProbability,
+ binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
+ assertEquals(bigramProbability,
+ binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb"));
+ }
+
+ public void testGetMaxFrequencyOfExactMatches() {
+ for (final int formatVersion : DICT_FORMAT_VERSIONS) {
+ testGetMaxFrequencyOfExactMatches(formatVersion);
+ }
+ }
+
+ private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ addUnigramWord(binaryDictionary, "abc", 10);
+ addUnigramWord(binaryDictionary, "aBc", 15);
+ assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
+ addUnigramWord(binaryDictionary, "ab'c", 20);
+ assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
+ addUnigramWord(binaryDictionary, "a-b-c", 25);
+ assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
+ addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
+ assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
+ addUnigramWord(binaryDictionary, "ab c", 255);
+ assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
+ }
}