aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
diff options
context:
space:
mode:
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java712
1 files changed, 625 insertions, 87 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 5b8f0e977..0fb0fa587 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -23,6 +23,11 @@ import android.util.Pair;
import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.WeightedString;
+import com.android.inputmethod.latin.makedict.WordProperty;
+import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
+import com.android.inputmethod.latin.utils.FileUtils;
+import com.android.inputmethod.latin.utils.LanguageModelParam;
import java.io.File;
import java.io.IOException;
@@ -33,39 +38,45 @@ import java.util.Locale;
import java.util.Map;
import java.util.Random;
+// TODO Use the seed passed as an argument for makedict test.
@LargeTest
public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
- @Override
- protected void setUp() throws Exception {
- super.setUp();
- }
-
- @Override
- protected void tearDown() throws Exception {
- super.tearDown();
+ private File createEmptyDictionaryAndGetFile(final String dictId,
+ final int formatVersion) throws IOException {
+ if (formatVersion == FormatSpec.VERSION4) {
+ return createEmptyVer4DictionaryAndGetFile(dictId);
+ } else {
+ throw new IOException("Dictionary format version " + formatVersion
+ + " is not supported.");
+ }
}
- private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
- final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
+ private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
+ final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
+ file.delete();
+ file.mkdir();
Map<String, String> attributeMap = new HashMap<String, String>();
- attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
- FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
- if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
- 3 /* dictVersion */, attributeMap)) {
+ if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4,
+ Locale.ENGLISH, attributeMap)) {
return file;
} else {
- throw new IOException("Empty dictionary cannot be created.");
+ throw new IOException("Empty dictionary " + file.getAbsolutePath()
+ + " cannot be created.");
}
}
public void testIsValidDictionary() {
+ testIsValidDictionary(FormatSpec.VERSION4);
+ }
+
+ private void testIsValidDictionary(final int formatVersion) {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -77,7 +88,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close();
assertFalse("binaryDictionary must be invalid after closing.",
binaryDictionary.isValidDictionary());
- dictFile.delete();
+ FileUtils.deleteRecursively(dictFile);
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
TEST_LOCALE, true /* isUpdatable */);
@@ -86,10 +97,104 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close();
}
+ public void testConstructingDictionaryOnMemory() {
+ testConstructingDictionaryOnMemory(FormatSpec.VERSION4);
+ }
+
+ private void testConstructingDictionaryOnMemory(final int formatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ FileUtils.deleteRecursively(dictFile);
+ assertFalse(dictFile.exists());
+ BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
+ new HashMap<String, String>());
+ assertTrue(binaryDictionary.isValidDictionary());
+ assertEquals(formatVersion, binaryDictionary.getFormatVersion());
+ final int probability = 100;
+ addUnigramWord(binaryDictionary, "word", probability);
+ assertEquals(probability, binaryDictionary.getFrequency("word"));
+ assertFalse(dictFile.exists());
+ binaryDictionary.flush();
+ assertTrue(dictFile.exists());
+ assertTrue(binaryDictionary.isValidDictionary());
+ assertEquals(formatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(probability, binaryDictionary.getFrequency("word"));
+ binaryDictionary.close();
+ dictFile.delete();
+ }
+
+ public void testAddTooLongWord() {
+ testAddTooLongWord(FormatSpec.VERSION4);
+ }
+
+ private void testAddTooLongWord(final int formatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final StringBuffer stringBuilder = new StringBuffer();
+ for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
+ stringBuilder.append('a');
+ }
+ final String validLongWord = stringBuilder.toString();
+ stringBuilder.append('a');
+ final String invalidLongWord = stringBuilder.toString();
+ final int probability = 100;
+ addUnigramWord(binaryDictionary, "aaa", probability);
+ addUnigramWord(binaryDictionary, validLongWord, probability);
+ addUnigramWord(binaryDictionary, invalidLongWord, probability);
+ // Too long short cut.
+ binaryDictionary.addUnigramWord("a", probability, invalidLongWord,
+ 10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ addUnigramWord(binaryDictionary, "abc", probability);
+ final int updatedProbability = 200;
+ // Update.
+ addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
+ addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
+ addUnigramWord(binaryDictionary, "abc", updatedProbability);
+
+ assertEquals(probability, binaryDictionary.getFrequency("aaa"));
+ assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
+ assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
+ binaryDictionary.getFrequency(invalidLongWord));
+ assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
+ dictFile.delete();
+ }
+
+ private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
+ final int probability) {
+ binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
+ BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
+ false /* isNotAWord */, false /* isBlacklisted */,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ }
+
+ private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
+ final String word1, final int probability) {
+ binaryDictionary.addBigramWords(word0, word1, probability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ }
+
public void testAddUnigramWord() {
+ testAddUnigramWord(FormatSpec.VERSION4);
+ }
+
+ private void testAddUnigramWord(final int formatVersion) {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -98,21 +203,21 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int probability = 100;
- binaryDictionary.addUnigramWord("aaa", probability);
+ addUnigramWord(binaryDictionary, "aaa", probability);
// Reallocate and create.
- binaryDictionary.addUnigramWord("aab", probability);
+ addUnigramWord(binaryDictionary, "aab", probability);
// Insert into children.
- binaryDictionary.addUnigramWord("aac", probability);
+ addUnigramWord(binaryDictionary, "aac", probability);
// Make terminal.
- binaryDictionary.addUnigramWord("aa", probability);
+ addUnigramWord(binaryDictionary, "aa", probability);
// Create children.
- binaryDictionary.addUnigramWord("aaaa", probability);
+ addUnigramWord(binaryDictionary, "aaaa", probability);
// Reallocate and make termianl.
- binaryDictionary.addUnigramWord("a", probability);
+ addUnigramWord(binaryDictionary, "a", probability);
final int updatedProbability = 200;
// Update.
- binaryDictionary.addUnigramWord("aaa", updatedProbability);
+ addUnigramWord(binaryDictionary, "aaa", updatedProbability);
assertEquals(probability, binaryDictionary.getFrequency("aab"));
assertEquals(probability, binaryDictionary.getFrequency("aac"));
@@ -125,13 +230,17 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomlyAddUnigramWord() {
+ testRandomlyAddUnigramWord(FormatSpec.VERSION4);
+ }
+
+ private void testRandomlyAddUnigramWord(final int formatVersion) {
final int wordCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -148,7 +257,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
probabilityMap.put(word, random.nextInt(0xFF));
}
for (String word : probabilityMap.keySet()) {
- binaryDictionary.addUnigramWord(word, probabilityMap.get(word));
+ addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
}
for (String word : probabilityMap.keySet()) {
assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
@@ -157,9 +266,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWords() {
+ testAddBigramWords(FormatSpec.VERSION4);
+ }
+
+ private void testAddBigramWords(final int formatVersion) {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -170,13 +283,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = 100;
final int bigramProbability = 10;
final int updatedBigramProbability = 15;
- binaryDictionary.addUnigramWord("aaa", unigramProbability);
- binaryDictionary.addUnigramWord("abb", unigramProbability);
- binaryDictionary.addUnigramWord("bcc", unigramProbability);
- binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
- binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
- binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
- binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
+ addUnigramWord(binaryDictionary, "aaa", unigramProbability);
+ addUnigramWord(binaryDictionary, "abb", unigramProbability);
+ addUnigramWord(binaryDictionary, "bcc", unigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
+ addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
+ addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
final int probability = binaryDictionary.calculateProbability(unigramProbability,
bigramProbability);
@@ -189,7 +302,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
- binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
updatedBigramProbability);
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
@@ -205,22 +318,26 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.getBigramProbability("aaa", "aaa"));
// Testing bigram link.
- binaryDictionary.addUnigramWord("abcde", unigramProbability);
- binaryDictionary.addUnigramWord("fghij", unigramProbability);
- binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability);
- binaryDictionary.addUnigramWord("fgh", unigramProbability);
- binaryDictionary.addUnigramWord("abc", unigramProbability);
- binaryDictionary.addUnigramWord("f", unigramProbability);
+ addUnigramWord(binaryDictionary, "abcde", unigramProbability);
+ addUnigramWord(binaryDictionary, "fghij", unigramProbability);
+ addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
+ addUnigramWord(binaryDictionary, "fgh", unigramProbability);
+ addUnigramWord(binaryDictionary, "abc", unigramProbability);
+ addUnigramWord(binaryDictionary, "f", unigramProbability);
assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
assertEquals(Dictionary.NOT_A_PROBABILITY,
binaryDictionary.getBigramProbability("abcde", "fgh"));
- binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability);
+ addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
dictFile.delete();
}
public void testRandomlyAddBigramWords() {
+ testRandomlyAddBigramWords(FormatSpec.VERSION4);
+ }
+
+ private void testRandomlyAddBigramWords(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 50;
@@ -229,7 +346,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -249,7 +366,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
- binaryDictionary.addUnigramWord(word, unigramProbability);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
}
for (int i = 0; i < bigramCount; i++) {
@@ -262,7 +379,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF);
bigramProbabilities.put(bigram, bigramProbability);
- binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
for (final Pair<String, String> bigram : bigramWords) {
@@ -278,9 +395,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRemoveBigramWords() {
+ testRemoveBigramWords(FormatSpec.VERSION4);
+ }
+
+ private void testRemoveBigramWords(final int formatVersion) {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -289,13 +410,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
final int bigramProbability = 10;
- binaryDictionary.addUnigramWord("aaa", unigramProbability);
- binaryDictionary.addUnigramWord("abb", unigramProbability);
- binaryDictionary.addUnigramWord("bcc", unigramProbability);
- binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
- binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
- binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
- binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
+ addUnigramWord(binaryDictionary, "aaa", unigramProbability);
+ addUnigramWord(binaryDictionary, "abb", unigramProbability);
+ addUnigramWord(binaryDictionary, "bcc", unigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
+ addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
+ addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
@@ -304,7 +425,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.removeBigramWords("aaa", "abb");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
- binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
@@ -324,9 +445,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushDictionary() {
+ testFlushDictionary(FormatSpec.VERSION4);
+ }
+
+ private void testFlushDictionary(final int formatVersion) {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -335,8 +460,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int probability = 100;
- binaryDictionary.addUnigramWord("aaa", probability);
- binaryDictionary.addUnigramWord("abcd", probability);
+ addUnigramWord(binaryDictionary, "aaa", probability);
+ addUnigramWord(binaryDictionary, "abcd", probability);
// Close without flushing.
binaryDictionary.close();
@@ -347,8 +472,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
- binaryDictionary.addUnigramWord("aaa", probability);
- binaryDictionary.addUnigramWord("abcd", probability);
+ addUnigramWord(binaryDictionary, "aaa", probability);
+ addUnigramWord(binaryDictionary, "abcd", probability);
binaryDictionary.flush();
binaryDictionary.close();
@@ -358,7 +483,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
assertEquals(probability, binaryDictionary.getFrequency("abcd"));
- binaryDictionary.addUnigramWord("bcde", probability);
+ addUnigramWord(binaryDictionary, "bcde", probability);
binaryDictionary.flush();
binaryDictionary.close();
@@ -372,9 +497,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushWithGCDictionary() {
+ testFlushWithGCDictionary(FormatSpec.VERSION4);
+ }
+
+ private void testFlushWithGCDictionary(final int formatVersion) {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -384,13 +513,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = 100;
final int bigramProbability = 10;
- binaryDictionary.addUnigramWord("aaa", unigramProbability);
- binaryDictionary.addUnigramWord("abb", unigramProbability);
- binaryDictionary.addUnigramWord("bcc", unigramProbability);
- binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
- binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
- binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
- binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
+ addUnigramWord(binaryDictionary, "aaa", unigramProbability);
+ addUnigramWord(binaryDictionary, "abb", unigramProbability);
+ addUnigramWord(binaryDictionary, "bcc", unigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
+ addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
+ addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
+ addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
binaryDictionary.flushWithGC();
binaryDictionary.close();
@@ -415,8 +544,12 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
- // TODO: Evaluate performance of GC
public void testAddBigramWordsAndFlashWithGC() {
+ testAddBigramWordsAndFlashWithGC(FormatSpec.VERSION4);
+ }
+
+ // TODO: Evaluate performance of GC
+ private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 30;
@@ -425,7 +558,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -446,7 +579,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
- binaryDictionary.addUnigramWord(word, unigramProbability);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
}
for (int i = 0; i < bigramCount; i++) {
@@ -459,7 +592,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF);
bigramProbabilities.put(bigram, bigramProbability);
- binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
binaryDictionary.flushWithGC();
@@ -480,7 +613,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
- public void testRandomOperetionsAndFlashWithGC() {
+ public void testRandomOperationsAndFlashWithGC() {
+ testRandomOperationsAndFlashWithGC(FormatSpec.VERSION4);
+ }
+
+ private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
final int flashWithGCIterationCount = 50;
final int operationCountInEachIteration = 200;
final int initialUnigramCount = 100;
@@ -494,7 +631,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -513,7 +650,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
- binaryDictionary.addUnigramWord(word, unigramProbability);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
}
binaryDictionary.flushWithGC();
binaryDictionary.close();
@@ -529,7 +666,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
- binaryDictionary.addUnigramWord(word, unigramProbability);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
}
// Add bigram.
if (random.nextFloat() < addBigramProb && words.size() > 2) {
@@ -547,7 +684,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
bigramWords.add(bigram);
bigramProbabilities.put(bigram, bigramProbability);
- binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
// Remove bigram.
if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
@@ -588,6 +725,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddManyUnigramsAndFlushWithGC() {
+ testAddManyUnigramsAndFlushWithGC(FormatSpec.VERSION4);
+ }
+
+ private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
@@ -596,7 +737,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -615,7 +756,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
unigramProbabilities.put(word, unigramProbability);
- binaryDictionary.addUnigramWord(word, unigramProbability);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
}
for (int j = 0; j < words.size(); j++) {
@@ -632,6 +773,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testUnigramAndBigramCount() {
+ testUnigramAndBigramCount(FormatSpec.VERSION4);
+ }
+
+ private void testUnigramAndBigramCount(final int formatVersion) {
final int flashWithGCIterationCount = 10;
final int codePointSetSize = 50;
final int unigramCountPerIteration = 1000;
@@ -641,7 +786,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
File dictFile = null;
try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
@@ -659,7 +804,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
- binaryDictionary.addUnigramWord(word, unigramProbability);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
}
for (int j = 0; j < bigramCountPerIteration; j++) {
final String word0 = words.get(random.nextInt(words.size()));
@@ -669,20 +814,413 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
bigrams.add(new Pair<String, String>(word0, word1));
final int bigramProbability = random.nextInt(0xF);
- binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.flushWithGC();
assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.close();
}
dictFile.delete();
}
+
+ public void testAddMultipleDictionaryEntries() {
+ testAddMultipleDictionaryEntries(FormatSpec.VERSION4);
+ }
+
+ private void testAddMultipleDictionaryEntries(final int formatVersion) {
+ final int codePointSetSize = 20;
+ final int lmParamCount = 1000;
+ final double bigramContinueRate = 0.9;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+
+ final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
+ String prevWord = null;
+ for (int i = 0; i < languageModelParams.length; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int probability = random.nextInt(0xFF);
+ final int bigramProbability = random.nextInt(0xF);
+ unigramProbabilities.put(word, probability);
+ if (prevWord == null) {
+ languageModelParams[i] = new LanguageModelParam(word, probability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ } else {
+ languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
+ bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ bigramProbabilities.put(new Pair<String, String>(prevWord, word),
+ bigramProbability);
+ }
+ prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
+ }
+
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
+
+ for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
+ assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
+ }
+
+ for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
+ final String word0 = entry.getKey().first;
+ final String word1 = entry.getKey().second;
+ final int unigramProbability = unigramProbabilities.get(word1);
+ final int bigramProbability = entry.getValue();
+ final int probability = binaryDictionary.calculateProbability(
+ unigramProbability, bigramProbability);
+ assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
+ }
+ }
+
+ public void testGetWordProperties() {
+ testGetWordProperties(FormatSpec.VERSION4);
+ }
+
+ private void testGetWordProperties(final int formatVersion) {
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+ final int UNIGRAM_COUNT = 1000;
+ final int BIGRAM_COUNT = 1000;
+ final int codePointSetSize = 20;
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
+ assertFalse(invalidWordProperty.isValid());
+
+ final ArrayList<String> words = new ArrayList<String>();
+ final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>();
+ final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+
+ for (int i = 0; i < UNIGRAM_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ final boolean isNotAWord = random.nextBoolean();
+ final boolean isBlacklisted = random.nextBoolean();
+ // TODO: Add tests for historical info.
+ binaryDictionary.addUnigramWord(word, unigramProbability,
+ null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
+ isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ words.add(word);
+ wordProbabilities.put(word, unigramProbability);
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+ assertEquals(word, wordProperty.mWord);
+ assertTrue(wordProperty.isValid());
+ assertEquals(isNotAWord, wordProperty.mIsNotAWord);
+ assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
+ assertEquals(false, wordProperty.mHasBigrams);
+ assertEquals(false, wordProperty.mHasShortcuts);
+ assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
+ assertTrue(wordProperty.mShortcutTargets.isEmpty());
+ }
+
+ for (int i = 0; i < BIGRAM_COUNT; i++) {
+ final int word0Index = random.nextInt(wordProbabilities.size());
+ final int word1Index = random.nextInt(wordProbabilities.size());
+ if (word0Index == word1Index) {
+ continue;
+ }
+ final String word0 = words.get(word0Index);
+ final String word1 = words.get(word1Index);
+ final int bigramProbability = random.nextInt(0xF);
+ binaryDictionary.addBigramWords(word0, word1, bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ if (!bigrams.containsKey(word0)) {
+ final HashSet<String> bigramWord1s = new HashSet<String>();
+ bigrams.put(word0, bigramWord1s);
+ }
+ bigrams.get(word0).add(word1);
+ bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability);
+ }
+
+ for (int i = 0; i < words.size(); i++) {
+ final String word0 = words.get(i);
+ if (!bigrams.containsKey(word0)) {
+ continue;
+ }
+ final HashSet<String> bigramWord1s = bigrams.get(word0);
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word0);
+ assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
+ for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
+ final String word1 = wordProperty.mBigrams.get(j).mWord;
+ assertTrue(bigramWord1s.contains(word1));
+ final int bigramProbabilityDelta = bigramProbabilities.get(
+ new Pair<String, String>(word0, word1));
+ final int unigramProbability = wordProbabilities.get(word1);
+ final int bigramProbablity = binaryDictionary.calculateProbability(
+ unigramProbability, bigramProbabilityDelta);
+ assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
+ }
+ }
+ }
+
+ public void testIterateAllWords() {
+ testIterateAllWords(FormatSpec.VERSION4);
+ }
+
+ private void testIterateAllWords(final int formatVersion) {
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+ final int UNIGRAM_COUNT = 1000;
+ final int BIGRAM_COUNT = 1000;
+ final int codePointSetSize = 20;
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
+ assertFalse(invalidWordProperty.isValid());
+
+ final ArrayList<String> words = new ArrayList<String>();
+ final HashMap<String, Integer> wordProbabilitiesToCheckLater =
+ new HashMap<String, Integer>();
+ final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
+ new HashMap<Pair<String, String>, Integer>();
+
+ for (int i = 0; i < UNIGRAM_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
+ if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ words.add(word);
+ wordProbabilitiesToCheckLater.put(word, unigramProbability);
+ }
+
+ for (int i = 0; i < BIGRAM_COUNT; i++) {
+ final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
+ final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
+ if (word0Index == word1Index) {
+ continue;
+ }
+ final String word0 = words.get(word0Index);
+ final String word1 = words.get(word1Index);
+ final int bigramProbability = random.nextInt(0xF);
+ binaryDictionary.addBigramWords(word0, word1, bigramProbability,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP);
+ if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ if (!bigrams.containsKey(word0)) {
+ final HashSet<String> bigramWord1s = new HashSet<String>();
+ bigrams.put(word0, bigramWord1s);
+ }
+ bigrams.get(word0).add(word1);
+ bigramProbabilitiesToCheckLater.put(
+ new Pair<String, String>(word0, word1), bigramProbability);
+ }
+
+ final HashSet<String> wordSet = new HashSet<String>(words);
+ final HashSet<Pair<String, String>> bigramSet =
+ new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet());
+ int token = 0;
+ do {
+ final BinaryDictionary.GetNextWordPropertyResult result =
+ binaryDictionary.getNextWordProperty(token);
+ final WordProperty wordProperty = result.mWordProperty;
+ final String word0 = wordProperty.mWord;
+ assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
+ wordProperty.mProbabilityInfo.mProbability);
+ wordSet.remove(word0);
+ final HashSet<String> bigramWord1s = bigrams.get(word0);
+ for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
+ final String word1 = wordProperty.mBigrams.get(j).mWord;
+ assertTrue(bigramWord1s.contains(word1));
+ final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram);
+ final int bigramProbablity = binaryDictionary.calculateProbability(
+ unigramProbability, bigramProbabilityDelta);
+ assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
+ bigramSet.remove(bigram);
+ }
+ token = result.mNextToken;
+ } while (token != 0);
+ assertTrue(wordSet.isEmpty());
+ assertTrue(bigramSet.isEmpty());
+ }
+
+ public void testAddShortcuts() {
+ testAddShortcuts(FormatSpec.VERSION4);
+ }
+
+ private void testAddShortcuts(final int formatVersion) {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final int unigramProbability = 100;
+ final int shortcutProbability = 10;
+ binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
+ shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
+ 0 /* timestamp */);
+ WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
+ assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
+ final int updatedShortcutProbability = 2;
+ binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
+ updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
+ 0 /* timestamp */);
+ wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
+ assertEquals(updatedShortcutProbability,
+ wordProperty.mShortcutTargets.get(0).getProbability());
+ binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
+ shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
+ 0 /* timestamp */);
+ final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
+ shortcutTargets.put("zzz", updatedShortcutProbability);
+ shortcutTargets.put("yyy", shortcutProbability);
+ wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(2, wordProperty.mShortcutTargets.size());
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
+ assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
+ shortcutTarget.getProbability());
+ shortcutTargets.remove(shortcutTarget.mWord);
+ }
+ shortcutTargets.put("zzz", updatedShortcutProbability);
+ shortcutTargets.put("yyy", shortcutProbability);
+ binaryDictionary.flushWithGC();
+ wordProperty = binaryDictionary.getWordProperty("aaa");
+ assertEquals(2, wordProperty.mShortcutTargets.size());
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
+ assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
+ shortcutTarget.getProbability());
+ shortcutTargets.remove(shortcutTarget.mWord);
+ }
+ }
+
+ public void testAddManyShortcuts() {
+ testAddManyShortcuts(FormatSpec.VERSION4);
+ }
+
+ private void testAddManyShortcuts(final int formatVersion) {
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+ final int UNIGRAM_COUNT = 1000;
+ final int SHORTCUT_COUNT = 10000;
+ final int codePointSetSize = 20;
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ final ArrayList<String> words = new ArrayList<String>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<String, HashMap<String, Integer>> shortcutTargets =
+ new HashMap<String, HashMap<String, Integer>>();
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ for (int i = 0; i < UNIGRAM_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
+ words.add(word);
+ unigramProbabilities.put(word, unigramProbability);
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ }
+ for (int i = 0; i < SHORTCUT_COUNT; i++) {
+ final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
+ final int shortcutProbability = random.nextInt(0xF);
+ final String word = words.get(random.nextInt(words.size()));
+ final int unigramProbability = unigramProbabilities.get(word);
+ binaryDictionary.addUnigramWord(word, unigramProbability, shortcutTarget,
+ shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
+ 0 /* timestamp */);
+ if (shortcutTargets.containsKey(word)) {
+ final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
+ shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
+ } else {
+ final HashMap<String, Integer> shortcutTargetsOfWord =
+ new HashMap<String, Integer>();
+ shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
+ shortcutTargets.put(word, shortcutTargetsOfWord);
+ }
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ }
+
+ for (final String word : words) {
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+ assertEquals((int)unigramProbabilities.get(word),
+ wordProperty.mProbabilityInfo.mProbability);
+ if (!shortcutTargets.containsKey(word)) {
+ // The word does not have shortcut targets.
+ continue;
+ }
+ assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
+ for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ final String targetCodePonts = shortcutTarget.mWord;
+ assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
+ shortcutTarget.getProbability());
+ }
+ }
+ }
}