aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
diff options
context:
space:
mode:
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java1014
1 files changed, 191 insertions, 823 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 6ba18d665..e96c934cb 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -21,14 +21,14 @@ import android.test.suitebuilder.annotation.LargeTest;
import android.text.TextUtils;
import android.util.Pair;
-import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
-import com.android.inputmethod.latin.makedict.CodePointUtils;
+import com.android.inputmethod.latin.NgramContext.WordInfo;
+import com.android.inputmethod.latin.common.CodePointUtils;
+import com.android.inputmethod.latin.common.FileUtils;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
-import com.android.inputmethod.latin.utils.FileUtils;
-import com.android.inputmethod.latin.utils.LanguageModelParam;
import java.io.File;
import java.io.IOException;
@@ -36,103 +36,100 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
-import java.util.Map;
import java.util.Random;
-// TODO Use the seed passed as an argument for makedict test.
@LargeTest
public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
- private static final int[] DICT_FORMAT_VERSIONS =
- new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
+ private static final String DICTIONARY_ID = "TestBinaryDictionary";
- private static boolean canCheckBigramProbability(final int formatVersion) {
- return formatVersion > FormatSpec.VERSION401;
+ private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ mDictFilesToBeDeleted.clear();
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ for (final File dictFile : mDictFilesToBeDeleted) {
+ dictFile.delete();
+ }
+ mDictFilesToBeDeleted.clear();
+ super.tearDown();
}
- private static boolean supportsBeginningOfSentence(final int formatVersion) {
- return formatVersion > FormatSpec.VERSION401;
+ private File createEmptyDictionaryAndGetFile(final int formatVersion) {
+ return createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
+ new HashMap<String, String>());
}
- private File createEmptyDictionaryAndGetFile(final String dictId,
- final int formatVersion) throws IOException {
- if (formatVersion == FormatSpec.VERSION4
- || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
- || formatVersion == FormatSpec.VERSION4_DEV) {
- return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
- } else {
- throw new IOException("Dictionary format version " + formatVersion
- + " is not supported.");
+ private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion,
+ final HashMap<String, String> attributeMap) {
+ try {
+ final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
+ attributeMap);
+ mDictFilesToBeDeleted.add(dictFile);
+ return dictFile;
+ } catch (final IOException e) {
+ fail(e.toString());
}
+ return null;
}
- private File createEmptyVer4DictionaryAndGetFile(final String dictId,
- final int formatVersion) throws IOException {
- final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
+ private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
+ final HashMap<String, String> attributeMap) throws IOException {
+ final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
file.delete();
file.mkdir();
- Map<String, String> attributeMap = new HashMap<>();
if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
Locale.ENGLISH, attributeMap)) {
return file;
- } else {
- throw new IOException("Empty dictionary " + file.getAbsolutePath()
- + " cannot be created. Format version: " + formatVersion);
}
+ throw new IOException("Empty dictionary " + file.getAbsolutePath()
+ + " cannot be created. Format version: " + formatVersion);
}
- public void testIsValidDictionary() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testIsValidDictionary(formatVersion);
- }
+ private static BinaryDictionary getBinaryDictionary(final File dictFile) {
+ return new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
}
- private void testIsValidDictionary(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ private BinaryDictionary getEmptyBinaryDictionary(final int formatVersion) {
+ final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ return new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ }
+
+ public void testIsValidDictionary() {
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
+ BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
binaryDictionary.isValidDictionary());
binaryDictionary.close();
assertFalse("binaryDictionary must be invalid after closing.",
binaryDictionary.isValidDictionary());
FileUtils.deleteRecursively(dictFile);
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
- dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
- TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary = getBinaryDictionary(dictFile);
assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
binaryDictionary.isValidDictionary());
binaryDictionary.close();
}
public void testConstructingDictionaryOnMemory() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testConstructingDictionaryOnMemory(formatVersion);
- }
- }
-
- private void testConstructingDictionaryOnMemory(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
FileUtils.deleteRecursively(dictFile);
assertFalse(dictFile.exists());
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
- new HashMap<String, String>());
+ final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE,
+ FormatSpec.VERSION403, new HashMap<String, String>());
assertTrue(binaryDictionary.isValidDictionary());
- assertEquals(formatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
final int probability = 100;
addUnigramWord(binaryDictionary, "word", probability);
assertEquals(probability, binaryDictionary.getFrequency("word"));
@@ -140,31 +137,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flush();
assertTrue(dictFile.exists());
assertTrue(binaryDictionary.isValidDictionary());
- assertEquals(formatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
assertEquals(probability, binaryDictionary.getFrequency("word"));
binaryDictionary.close();
- dictFile.delete();
}
public void testAddTooLongWord() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddTooLongWord(formatVersion);
- }
- }
-
- private void testAddTooLongWord(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final StringBuffer stringBuilder = new StringBuffer();
- for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
+ for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) {
stringBuilder.append('a');
}
final String validLongWord = stringBuilder.toString();
@@ -175,9 +156,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, validLongWord, probability);
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
- binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
- 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
- false /* isNotAWord */, false /* isBlacklisted */,
+ binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200;
@@ -188,58 +168,49 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
- assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
- binaryDictionary.getFrequency(invalidLongWord));
+ assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord));
assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
- dictFile.delete();
}
private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
- binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
- BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
+ binaryDictionary.addUnigramEntry(word, probability,
false /* isBeginningOfSentence */, false /* isNotAWord */,
- false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ false /* isPossiblyOffensive */,
+ BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
final String word1, final int probability) {
- binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability,
+ binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
- private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
- final String word0, final String word1) {
- return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1);
+ private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0,
+ final String word1, final String word2, final int probability) {
+ binaryDictionary.addNgramEntry(
+ new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2,
+ probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
- private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
+ private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
final String word0, final String word1) {
- binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1);
+ return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1);
}
private static int getBigramProbability(final BinaryDictionary binaryDictionary,
final String word0, final String word1) {
- return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1);
+ return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1);
}
- public void testAddUnigramWord() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddUnigramWord(formatVersion);
- }
+ private static int getTrigramProbability(final BinaryDictionary binaryDictionary,
+ final String word0, final String word1, final String word2) {
+ return binaryDictionary.getNgramProbability(
+ new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2);
}
- private void testAddUnigramWord(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
+ public void testAddUnigramWord() {
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int probability = 100;
addUnigramWord(binaryDictionary, "aaa", probability);
// Reallocate and create.
@@ -263,30 +234,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
assertEquals(probability, binaryDictionary.getFrequency("a"));
assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
-
- dictFile.delete();
}
public void testRandomlyAddUnigramWord() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRandomlyAddUnigramWord(formatVersion);
- }
- }
-
- private void testRandomlyAddUnigramWord(final int formatVersion) {
final int wordCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final HashMap<String, Integer> probabilityMap = new HashMap<>();
// Test a word that isn't contained within the dictionary.
@@ -302,25 +256,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (String word : probabilityMap.keySet()) {
assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
}
- dictFile.delete();
}
public void testAddBigramWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddBigramWords(formatVersion);
- }
- }
-
- private void testAddBigramWords(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int unigramProbability = 100;
final int bigramProbability = 150;
@@ -337,18 +276,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
- }
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(updatedBigramProbability,
- getBigramProbability(binaryDictionary, "aaa", "abb"));
- }
+ assertEquals(updatedBigramProbability,
+ getBigramProbability(binaryDictionary, "aaa", "abb"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
@@ -368,43 +303,21 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "abc", unigramProbability);
addUnigramWord(binaryDictionary, "f", unigramProbability);
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(bigramProbability,
- getBigramProbability(binaryDictionary, "abcde", "fghij"));
- }
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij"));
assertEquals(Dictionary.NOT_A_PROBABILITY,
getBigramProbability(binaryDictionary, "abcde", "fgh"));
addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(updatedBigramProbability,
- getBigramProbability(binaryDictionary, "abcde", "fghij"));
- }
-
- dictFile.delete();
+ assertEquals(updatedBigramProbability,
+ getBigramProbability(binaryDictionary, "abcde", "fghij"));
}
public void testRandomlyAddBigramWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRandomlyAddBigramWords(formatVersion);
- }
- }
-
- private void testRandomlyAddBigramWords(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final ArrayList<String> words = new ArrayList<>();
final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
@@ -439,83 +352,40 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramProbability = bigramProbabilities.get(bigram);
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
isValidBigram(binaryDictionary, bigram.first, bigram.second));
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(bigramProbability,
- getBigramProbability(binaryDictionary, bigram.first, bigram.second));
- }
- }
-
- dictFile.delete();
- }
-
- public void testRemoveBigramWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRemoveBigramWords(formatVersion);
+ assertEquals(bigramProbability,
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
}
- private void testRemoveBigramWords(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ public void testAddTrigramWords() {
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int unigramProbability = 100;
- final int bigramProbability = 150;
+ final int trigramProbability = 150;
+ final int updatedTrigramProbability = 200;
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "abb", unigramProbability);
addUnigramWord(binaryDictionary, "bcc", unigramProbability);
- addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
- addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
- addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
- addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
-
- assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
- assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
- assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
- assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
-
- removeBigramEntry(binaryDictionary, "aaa", "abb");
- assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
- addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
- assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
+ addBigramWords(binaryDictionary, "abb", "bcc", 10);
+ addBigramWords(binaryDictionary, "abb", "aaa", 10);
- removeBigramEntry(binaryDictionary, "aaa", "bcc");
- assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
- removeBigramEntry(binaryDictionary, "abb", "aaa");
- assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
- removeBigramEntry(binaryDictionary, "abb", "bcc");
- assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
+ addTrigramEntry(binaryDictionary, "aaa", "abb", "bcc", trigramProbability);
+ addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", trigramProbability);
- removeBigramEntry(binaryDictionary, "aaa", "abb");
- // Test remove non-existing bigram operation.
- removeBigramEntry(binaryDictionary, "aaa", "abb");
- removeBigramEntry(binaryDictionary, "bcc", "aaa");
+ assertEquals(trigramProbability,
+ getTrigramProbability(binaryDictionary, "aaa", "abb", "bcc"));
+ assertEquals(trigramProbability,
+ getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa"));
+ assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
- dictFile.delete();
+ addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", updatedTrigramProbability);
+ assertEquals(updatedTrigramProbability,
+ getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa"));
}
public void testFlushDictionary() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testFlushDictionary(formatVersion);
- }
- }
-
- private void testFlushDictionary(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
+ BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final int probability = 100;
addUnigramWord(binaryDictionary, "aaa", probability);
@@ -535,42 +405,21 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flush();
binaryDictionary.close();
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
+ binaryDictionary = getBinaryDictionary(dictFile);
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
assertEquals(probability, binaryDictionary.getFrequency("abcd"));
addUnigramWord(binaryDictionary, "bcde", probability);
binaryDictionary.flush();
binaryDictionary.close();
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary = getBinaryDictionary(dictFile);
assertEquals(probability, binaryDictionary.getFrequency("bcde"));
binaryDictionary.close();
-
- dictFile.delete();
}
public void testFlushWithGCDictionary() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testFlushWithGCDictionary(formatVersion);
- }
- }
-
- private void testFlushWithGCDictionary(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
+ BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final int unigramProbability = 100;
final int bigramProbability = 150;
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
@@ -583,51 +432,30 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flushWithGC();
binaryDictionary.close();
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary = getBinaryDictionary(dictFile);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
- }
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
+ assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
binaryDictionary.flushWithGC();
binaryDictionary.close();
-
- dictFile.delete();
}
public void testAddBigramWordsAndFlashWithGC() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddBigramWordsAndFlashWithGC(formatVersion);
- }
- }
-
- // TODO: Evaluate performance of GC
- private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 30;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
-
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
+ BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final ArrayList<String> words = new ArrayList<>();
final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
@@ -660,52 +488,37 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flushWithGC();
binaryDictionary.close();
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
+ binaryDictionary = getBinaryDictionary(dictFile);
for (final Pair<String, String> bigram : bigramWords) {
final int bigramProbability = bigramProbabilities.get(bigram);
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
isValidBigram(binaryDictionary, bigram.first, bigram.second));
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(bigramProbability,
- getBigramProbability(binaryDictionary, bigram.first, bigram.second));
- }
+ assertEquals(bigramProbability,
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
-
- dictFile.delete();
}
public void testRandomOperationsAndFlashWithGC() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRandomOperationsAndFlashWithGC(formatVersion);
- }
- }
+ final int maxUnigramCount = 5000;
+ final int maxBigramCount = 10000;
+ final HashMap<String, String> attributeMap = new HashMap<>();
+ attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
+ attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
- private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
final int flashWithGCIterationCount = 50;
final int operationCountInEachIteration = 200;
final int initialUnigramCount = 100;
final float addUnigramProb = 0.5f;
final float addBigramProb = 0.8f;
- final float removeBigramProb = 0.2f;
final int codePointSetSize = 30;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
+ final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
+ attributeMap);
+ BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
-
- BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final ArrayList<String> words = new ArrayList<>();
final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
@@ -722,9 +535,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close();
for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary = getBinaryDictionary(dictFile);
for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
// Add unigram.
if (random.nextFloat() < addUnigramProb) {
@@ -754,14 +565,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
bigramProbabilities.put(bigram, bigramProbability);
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
- // Remove bigram.
- if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
- final int bigramIndex = random.nextInt(bigramWords.size());
- final Pair<String, String> bigram = bigramWords.get(bigramIndex);
- bigramWords.remove(bigramIndex);
- bigramProbabilities.remove(bigram);
- removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
- }
}
// Test whether the all unigram operations are collectlly handled.
@@ -775,45 +578,29 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final Pair<String, String> bigram = bigramWords.get(i);
final int probability;
if (bigramProbabilities.containsKey(bigram)) {
- final int bigramProbability = bigramProbabilities.get(bigram);
- probability = bigramProbability;
+ probability = bigramProbabilities.get(bigram);
} else {
probability = Dictionary.NOT_A_PROBABILITY;
}
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(probability,
- getBigramProbability(binaryDictionary, bigram.first, bigram.second));
- }
+ assertEquals(probability,
+ getBigramProbability(binaryDictionary, bigram.first, bigram.second));
assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
isValidBigram(binaryDictionary, bigram.first, bigram.second));
}
binaryDictionary.flushWithGC();
binaryDictionary.close();
}
-
- dictFile.delete();
}
public void testAddManyUnigramsAndFlushWithGC() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddManyUnigramsAndFlushWithGC(formatVersion);
- }
- }
-
- private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
final ArrayList<String> words = new ArrayList<>();
final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
@@ -821,9 +608,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary;
for (int i = 0; i < flashWithGCIterationCount; i++) {
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary = getBinaryDictionary(dictFile);
while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
@@ -841,30 +626,23 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flushWithGC();
binaryDictionary.close();
}
-
- dictFile.delete();
}
public void testUnigramAndBigramCount() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testUnigramAndBigramCount(formatVersion);
- }
- }
+ final int maxUnigramCount = 5000;
+ final int maxBigramCount = 10000;
+ final HashMap<String, String> attributeMap = new HashMap<>();
+ attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
+ attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
- private void testUnigramAndBigramCount(final int formatVersion) {
final int flashWithGCIterationCount = 10;
final int codePointSetSize = 50;
final int unigramCountPerIteration = 1000;
final int bigramCountPerIteration = 2000;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
+ final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
+ attributeMap);
final ArrayList<String> words = new ArrayList<>();
final HashSet<Pair<String, String>> bigrams = new HashSet<>();
@@ -872,9 +650,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary;
for (int i = 0; i < flashWithGCIterationCount; i++) {
- binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary = getBinaryDictionary(dictFile);
for (int j = 0; j < unigramCountPerIteration; j++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
@@ -892,108 +668,31 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
}
assertEquals(new HashSet<>(words).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForGettingStats(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY)));
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForGettingStats(
+ BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.flushWithGC();
assertEquals(new HashSet<>(words).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForGettingStats(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY)));
assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
- binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ binaryDictionary.getPropertyForGettingStats(
+ BinaryDictionary.BIGRAM_COUNT_QUERY)));
binaryDictionary.close();
}
-
- dictFile.delete();
- }
-
- public void testAddMultipleDictionaryEntries() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddMultipleDictionaryEntries(formatVersion);
- }
- }
-
- private void testAddMultipleDictionaryEntries(final int formatVersion) {
- final int codePointSetSize = 20;
- final int lmParamCount = 1000;
- final double bigramContinueRate = 0.9;
- final long seed = System.currentTimeMillis();
- final Random random = new Random(seed);
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
-
- final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
-
- final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
- String prevWord = null;
- for (int i = 0; i < languageModelParams.length; i++) {
- final String word = CodePointUtils.generateWord(random, codePointSet);
- final int probability = random.nextInt(0xFF);
- final int bigramProbability = probability + random.nextInt(0xFF - probability);
- unigramProbabilities.put(word, probability);
- if (prevWord == null) {
- languageModelParams[i] = new LanguageModelParam(word, probability,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP);
- } else {
- languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
- bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
- bigramProbabilities.put(new Pair<>(prevWord, word),
- bigramProbability);
- }
- prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
- }
-
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
-
- for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
- assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
- }
-
- for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
- final String word0 = entry.getKey().first;
- final String word1 = entry.getKey().second;
- final int bigramProbability = entry.getValue();
- assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
- isValidBigram(binaryDictionary, word0, word1));
- if (canCheckBigramProbability(formatVersion)) {
- assertEquals(bigramProbability,
- getBigramProbability(binaryDictionary, word0, word1));
- }
- }
}
public void testGetWordProperties() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testGetWordProperties(formatVersion);
- }
- }
-
- private void testGetWordProperties(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int UNIGRAM_COUNT = 1000;
final int BIGRAM_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
+ final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
false /* isBeginningOfSentence */);
@@ -1008,11 +707,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word = CodePointUtils.generateWord(random, codePointSet);
final int unigramProbability = random.nextInt(0xFF);
final boolean isNotAWord = random.nextBoolean();
- final boolean isBlacklisted = random.nextBoolean();
+ final boolean isPossiblyOffensive = random.nextBoolean();
// TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability,
- null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
- false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
+ false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
@@ -1024,11 +722,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(word, wordProperty.mWord);
assertTrue(wordProperty.isValid());
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
- assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
- assertEquals(false, wordProperty.mHasBigrams);
- assertEquals(false, wordProperty.mHasShortcuts);
+ assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive);
+ assertEquals(false, wordProperty.mHasNgrams);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
- assertTrue(wordProperty.mShortcutTargets.isEmpty());
}
for (int i = 0; i < BIGRAM_COUNT; i++) {
@@ -1062,41 +758,25 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final HashSet<String> bigramWord1s = bigrams.get(word0);
final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
false /* isBeginningOfSentence */);
- assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
- for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
- final String word1 = wordProperty.mBigrams.get(j).mWord;
+ assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
+ // TODO: Support ngram.
+ for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+ final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
- if (canCheckBigramProbability(formatVersion)) {
- final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
- assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
- }
+ final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
+ assertEquals(bigramProbability, bigramTarget.getProbability());
}
}
}
public void testIterateAllWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testIterateAllWords(formatVersion);
- }
- }
-
- private void testIterateAllWords(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int UNIGRAM_COUNT = 1000;
final int BIGRAM_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
false /* isBeginningOfSentence */);
@@ -1155,15 +835,16 @@ public class BinaryDictionaryTests extends AndroidTestCase {
wordProperty.mProbabilityInfo.mProbability);
wordSet.remove(word0);
final HashSet<String> bigramWord1s = bigrams.get(word0);
- for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
- final String word1 = wordProperty.mBigrams.get(j).mWord;
- assertTrue(bigramWord1s.contains(word1));
- final Pair<String, String> bigram = new Pair<>(word0, word1);
- if (canCheckBigramProbability(formatVersion)) {
+ // TODO: Support ngram.
+ if (wordProperty.mHasNgrams) {
+ for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+ final String word1 = bigramTarget.mWord;
+ assertTrue(bigramWord1s.contains(word1));
+ final Pair<String, String> bigram = new Pair<>(word0, word1);
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
- assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+ assertEquals(bigramProbability, bigramTarget.getProbability());
+ bigramSet.remove(bigram);
}
- bigramSet.remove(bigram);
}
token = result.mNextToken;
} while (token != 0);
@@ -1171,346 +852,33 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(bigramSet.isEmpty());
}
- public void testAddShortcuts() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddShortcuts(formatVersion);
- }
- }
-
- private void testAddShortcuts(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
- final int unigramProbability = 100;
- final int shortcutProbability = 10;
- binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
- shortcutProbability, false /* isBeginningOfSentence */,
- false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
- WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
- false /* isBeginningOfSentence */);
- assertEquals(1, wordProperty.mShortcutTargets.size());
- assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
- assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
- final int updatedShortcutProbability = 2;
- binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
- updatedShortcutProbability, false /* isBeginningOfSentence */,
- false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
- wordProperty = binaryDictionary.getWordProperty("aaa",
- false /* isBeginningOfSentence */);
- assertEquals(1, wordProperty.mShortcutTargets.size());
- assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
- assertEquals(updatedShortcutProbability,
- wordProperty.mShortcutTargets.get(0).getProbability());
- binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
- shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
- false /* isBlacklisted */, 0 /* timestamp */);
- final HashMap<String, Integer> shortcutTargets = new HashMap<>();
- shortcutTargets.put("zzz", updatedShortcutProbability);
- shortcutTargets.put("yyy", shortcutProbability);
- wordProperty = binaryDictionary.getWordProperty("aaa",
- false /* isBeginningOfSentence */);
- assertEquals(2, wordProperty.mShortcutTargets.size());
- for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
- assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
- assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
- shortcutTarget.getProbability());
- shortcutTargets.remove(shortcutTarget.mWord);
- }
- shortcutTargets.put("zzz", updatedShortcutProbability);
- shortcutTargets.put("yyy", shortcutProbability);
- binaryDictionary.flushWithGC();
- wordProperty = binaryDictionary.getWordProperty("aaa",
- false /* isBeginningOfSentence */);
- assertEquals(2, wordProperty.mShortcutTargets.size());
- for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
- assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
- assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
- shortcutTarget.getProbability());
- shortcutTargets.remove(shortcutTarget.mWord);
- }
- }
-
- public void testAddManyShortcuts() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddManyShortcuts(formatVersion);
- }
- }
-
- private void testAddManyShortcuts(final int formatVersion) {
- final long seed = System.currentTimeMillis();
- final Random random = new Random(seed);
- final int UNIGRAM_COUNT = 1000;
- final int SHORTCUT_COUNT = 10000;
- final int codePointSetSize = 20;
- final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
-
- final ArrayList<String> words = new ArrayList<>();
- final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
- final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
- for (int i = 0; i < UNIGRAM_COUNT; i++) {
- final String word = CodePointUtils.generateWord(random, codePointSet);
- final int unigramProbability = random.nextInt(0xFF);
- addUnigramWord(binaryDictionary, word, unigramProbability);
- words.add(word);
- unigramProbabilities.put(word, unigramProbability);
- if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.flushWithGC();
- }
- }
- for (int i = 0; i < SHORTCUT_COUNT; i++) {
- final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
- final int shortcutProbability = random.nextInt(0xF);
- final String word = words.get(random.nextInt(words.size()));
- final int unigramProbability = unigramProbabilities.get(word);
- binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
- shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
- false /* isBlacklisted */, 0 /* timestamp */);
- if (shortcutTargets.containsKey(word)) {
- final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
- shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
- } else {
- final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
- shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
- shortcutTargets.put(word, shortcutTargetsOfWord);
- }
- if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.flushWithGC();
- }
- }
-
- for (final String word : words) {
- final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
- false /* isBeginningOfSentence */);
- assertEquals((int)unigramProbabilities.get(word),
- wordProperty.mProbabilityInfo.mProbability);
- if (!shortcutTargets.containsKey(word)) {
- // The word does not have shortcut targets.
- continue;
- }
- assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
- for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
- final String targetCodePonts = shortcutTarget.mWord;
- assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
- shortcutTarget.getProbability());
- }
- }
- }
-
- public void testDictMigration() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
- }
- }
-
- private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- final int unigramProbability = 100;
- addUnigramWord(binaryDictionary, "aaa", unigramProbability);
- addUnigramWord(binaryDictionary, "bbb", unigramProbability);
- final int bigramProbability = 150;
- addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
- final int shortcutProbability = 10;
- binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
- false /* isBeginningOfSentence */, false /* isNotAWord */,
- false /* isBlacklisted */, 0 /* timestamp */);
- binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
- Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
- true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
- binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE,
- "aaa", bigramProbability, 0 /* timestamp */);
- assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
- assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
- assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
- assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
- assertTrue(binaryDictionary.migrateTo(toFormatVersion));
- assertTrue(binaryDictionary.isValidDictionary());
- assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
- assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
- assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
- if (canCheckBigramProbability(toFormatVersion)) {
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
- assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
- PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa"));
- }
- assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
- WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
- false /* isBeginningOfSentence */);
- assertEquals(1, wordProperty.mShortcutTargets.size());
- assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
- wordProperty = binaryDictionary.getWordProperty("ddd",
- false /* isBeginningOfSentence */);
- assertTrue(wordProperty.mIsBlacklistEntry);
- assertTrue(wordProperty.mIsNotAWord);
- }
-
- public void testLargeDictMigration() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
- }
- }
-
- private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
- final int UNIGRAM_COUNT = 3000;
- final int BIGRAM_COUNT = 3000;
- final int codePointSetSize = 50;
- final long seed = System.currentTimeMillis();
- final Random random = new Random(seed);
-
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
-
- final ArrayList<String> words = new ArrayList<>();
- final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
- final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
-
- for (int i = 0; i < UNIGRAM_COUNT; i++) {
- final String word = CodePointUtils.generateWord(random, codePointSet);
- final int unigramProbability = random.nextInt(0xFF);
- addUnigramWord(binaryDictionary, word, unigramProbability);
- if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.flushWithGC();
- }
- words.add(word);
- unigramProbabilities.put(word, unigramProbability);
- }
-
- for (int i = 0; i < BIGRAM_COUNT; i++) {
- final int word0Index = random.nextInt(words.size());
- final int word1Index = random.nextInt(words.size());
- if (word0Index == word1Index) {
- continue;
- }
- final String word0 = words.get(word0Index);
- final String word1 = words.get(word1Index);
- final int unigramProbability = unigramProbabilities.get(word1);
- final int bigramProbability =
- random.nextInt(0xFF - unigramProbability) + unigramProbability;
- addBigramWords(binaryDictionary, word0, word1, bigramProbability);
- if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.flushWithGC();
- }
- final Pair<String, String> bigram = new Pair<>(word0, word1);
- bigrams.add(bigram);
- bigramProbabilities.put(bigram, bigramProbability);
- }
- assertTrue(binaryDictionary.migrateTo(toFormatVersion));
-
- for (final String word : words) {
- assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
- }
- assertEquals(unigramProbabilities.size(), Integer.parseInt(
- binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
-
- for (final Pair<String, String> bigram : bigrams) {
- if (canCheckBigramProbability(toFormatVersion)) {
- assertEquals((int)bigramProbabilities.get(bigram),
- getBigramProbability(binaryDictionary, bigram.first, bigram.second));
- }
- assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
- }
- assertEquals(bigramProbabilities.size(), Integer.parseInt(
- binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ public void testPossiblyOffensiveAttributeMaintained() {
+ final BinaryDictionary binaryDictionary =
+ getEmptyBinaryDictionary(FormatSpec.VERSION403);
+ binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0);
+ WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
+ assertEquals(true, wordProperty.mIsPossiblyOffensive);
}
public void testBeginningOfSentence() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- if (supportsBeginningOfSentence(formatVersion)) {
- testBeginningOfSentence(formatVersion);
- }
- }
- }
-
- private void testBeginningOfSentence(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int dummyProbability = 0;
- final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
+ final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
final int bigramProbability = 200;
addUnigramWord(binaryDictionary, "aaa", dummyProbability);
- binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
+ binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
assertEquals(bigramProbability,
- binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
- binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
+ binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa"));
+ binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
addUnigramWord(binaryDictionary, "bbb", dummyProbability);
- binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability,
+ binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
binaryDictionary.flushWithGC();
assertEquals(bigramProbability,
- binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
+ binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa"));
assertEquals(bigramProbability,
- binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb"));
- }
-
- public void testGetMaxFrequencyOfExactMatches() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testGetMaxFrequencyOfExactMatches(formatVersion);
- }
- }
-
- private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
- File dictFile = null;
- try {
- dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- }
- final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
- Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- addUnigramWord(binaryDictionary, "abc", 10);
- addUnigramWord(binaryDictionary, "aBc", 15);
- assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
- addUnigramWord(binaryDictionary, "ab'c", 20);
- assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
- addUnigramWord(binaryDictionary, "a-b-c", 25);
- assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
- addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
- assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
- addUnigramWord(binaryDictionary, "ab c", 255);
- assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
+ binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb"));
}
}