diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 1014 |
1 files changed, 191 insertions, 823 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 6ba18d665..e96c934cb 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -21,14 +21,14 @@ import android.test.suitebuilder.annotation.LargeTest; import android.text.TextUtils; import android.util.Pair; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; -import com.android.inputmethod.latin.makedict.CodePointUtils; +import com.android.inputmethod.latin.NgramContext.WordInfo; +import com.android.inputmethod.latin.common.CodePointUtils; +import com.android.inputmethod.latin.common.FileUtils; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.WeightedString; import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; -import com.android.inputmethod.latin.utils.FileUtils; -import com.android.inputmethod.latin.utils.LanguageModelParam; import java.io.File; import java.io.IOException; @@ -36,103 +36,100 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Locale; -import java.util.Map; import java.util.Random; -// TODO Use the seed passed as an argument for makedict test. @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; - private static final int[] DICT_FORMAT_VERSIONS = - new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; + private static final String DICTIONARY_ID = "TestBinaryDictionary"; - private static boolean canCheckBigramProbability(final int formatVersion) { - return formatVersion > FormatSpec.VERSION401; + private HashSet<File> mDictFilesToBeDeleted = new HashSet<>(); + + @Override + protected void setUp() throws Exception { + super.setUp(); + mDictFilesToBeDeleted.clear(); + } + + @Override + protected void tearDown() throws Exception { + for (final File dictFile : mDictFilesToBeDeleted) { + dictFile.delete(); + } + mDictFilesToBeDeleted.clear(); + super.tearDown(); } - private static boolean supportsBeginningOfSentence(final int formatVersion) { - return formatVersion > FormatSpec.VERSION401; + private File createEmptyDictionaryAndGetFile(final int formatVersion) { + return createEmptyDictionaryWithAttributesAndGetFile(formatVersion, + new HashMap<String, String>()); } - private File createEmptyDictionaryAndGetFile(final String dictId, - final int formatVersion) throws IOException { - if (formatVersion == FormatSpec.VERSION4 - || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING - || formatVersion == FormatSpec.VERSION4_DEV) { - return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); - } else { - throw new IOException("Dictionary format version " + formatVersion - + " is not supported."); + private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion, + final HashMap<String, String> attributeMap) { + try { + final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion, + attributeMap); + mDictFilesToBeDeleted.add(dictFile); + return dictFile; + } catch (final IOException e) { + fail(e.toString()); } + return null; } - private File createEmptyVer4DictionaryAndGetFile(final String dictId, - final int formatVersion) throws IOException { - final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, + private File createEmptyVer4DictionaryAndGetFile(final int formatVersion, + final HashMap<String, String> attributeMap) throws IOException { + final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); file.delete(); file.mkdir(); - Map<String, String> attributeMap = new HashMap<>(); if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, Locale.ENGLISH, attributeMap)) { return file; - } else { - throw new IOException("Empty dictionary " + file.getAbsolutePath() - + " cannot be created. Format version: " + formatVersion); } + throw new IOException("Empty dictionary " + file.getAbsolutePath() + + " cannot be created. Format version: " + formatVersion); } - public void testIsValidDictionary() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testIsValidDictionary(formatVersion); - } + private static BinaryDictionary getBinaryDictionary(final File dictFile) { + return new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); } - private void testIsValidDictionary(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + private BinaryDictionary getEmptyBinaryDictionary(final int formatVersion) { + final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + return new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + } + + public void testIsValidDictionary() { + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); + BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); assertTrue("binaryDictionary must be valid for existing valid dictionary file.", binaryDictionary.isValidDictionary()); binaryDictionary.close(); assertFalse("binaryDictionary must be invalid after closing.", binaryDictionary.isValidDictionary()); FileUtils.deleteRecursively(dictFile); - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, - dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), - TEST_LOCALE, true /* isUpdatable */); + binaryDictionary = getBinaryDictionary(dictFile); assertFalse("binaryDictionary must be invalid for not existing dictionary file.", binaryDictionary.isValidDictionary()); binaryDictionary.close(); } public void testConstructingDictionaryOnMemory() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testConstructingDictionaryOnMemory(formatVersion); - } - } - - private void testConstructingDictionaryOnMemory(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); FileUtils.deleteRecursively(dictFile); assertFalse(dictFile.exists()); - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, - new HashMap<String, String>()); + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, + FormatSpec.VERSION403, new HashMap<String, String>()); assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); final int probability = 100; addUnigramWord(binaryDictionary, "word", probability); assertEquals(probability, binaryDictionary.getFrequency("word")); @@ -140,31 +137,15 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.flush(); assertTrue(dictFile.exists()); assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); assertEquals(probability, binaryDictionary.getFrequency("word")); binaryDictionary.close(); - dictFile.delete(); } public void testAddTooLongWord() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddTooLongWord(formatVersion); - } - } - - private void testAddTooLongWord(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final StringBuffer stringBuilder = new StringBuffer(); - for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) { + for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) { stringBuilder.append('a'); } final String validLongWord = stringBuilder.toString(); @@ -175,9 +156,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, validLongWord, probability); addUnigramWord(binaryDictionary, invalidLongWord, probability); // Too long short cut. - binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, - 10 /* shortcutProbability */, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isBlacklisted */, + binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); addUnigramWord(binaryDictionary, "abc", probability); final int updatedProbability = 200; @@ -188,58 +168,49 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("aaa")); assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); - assertEquals(BinaryDictionary.NOT_A_PROBABILITY, - binaryDictionary.getFrequency(invalidLongWord)); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord)); assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); - dictFile.delete(); } private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability) { - binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, - BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, + binaryDictionary.addUnigramEntry(word, probability, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); + false /* isPossiblyOffensive */, + BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, final String word1, final int probability) { - binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability, + binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } - private static boolean isValidBigram(final BinaryDictionary binaryDictionary, - final String word0, final String word1) { - return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1); + private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0, + final String word1, final String word2, final int probability) { + binaryDictionary.addNgramEntry( + new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2, + probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); } - private static void removeBigramEntry(final BinaryDictionary binaryDictionary, + private static boolean isValidBigram(final BinaryDictionary binaryDictionary, final String word0, final String word1) { - binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1); + return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1); } private static int getBigramProbability(final BinaryDictionary binaryDictionary, final String word0, final String word1) { - return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1); + return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1); } - public void testAddUnigramWord() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddUnigramWord(formatVersion); - } + private static int getTrigramProbability(final BinaryDictionary binaryDictionary, + final String word0, final String word1, final String word2) { + return binaryDictionary.getNgramProbability( + new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2); } - private void testAddUnigramWord(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - + public void testAddUnigramWord() { + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int probability = 100; addUnigramWord(binaryDictionary, "aaa", probability); // Reallocate and create. @@ -263,30 +234,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("aaaa")); assertEquals(probability, binaryDictionary.getFrequency("a")); assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); - - dictFile.delete(); } public void testRandomlyAddUnigramWord() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRandomlyAddUnigramWord(formatVersion); - } - } - - private void testRandomlyAddUnigramWord(final int formatVersion) { final int wordCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final HashMap<String, Integer> probabilityMap = new HashMap<>(); // Test a word that isn't contained within the dictionary. @@ -302,25 +256,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (String word : probabilityMap.keySet()) { assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); } - dictFile.delete(); } public void testAddBigramWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddBigramWords(formatVersion); - } - } - - private void testAddBigramWords(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int unigramProbability = 100; final int bigramProbability = 150; @@ -337,18 +276,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); - } + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(updatedBigramProbability, - getBigramProbability(binaryDictionary, "aaa", "abb")); - } + assertEquals(updatedBigramProbability, + getBigramProbability(binaryDictionary, "aaa", "abb")); assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); @@ -368,43 +303,21 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, "abc", unigramProbability); addUnigramWord(binaryDictionary, "f", unigramProbability); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, "abcde", "fghij")); - } + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij")); assertEquals(Dictionary.NOT_A_PROBABILITY, getBigramProbability(binaryDictionary, "abcde", "fgh")); addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(updatedBigramProbability, - getBigramProbability(binaryDictionary, "abcde", "fghij")); - } - - dictFile.delete(); + assertEquals(updatedBigramProbability, + getBigramProbability(binaryDictionary, "abcde", "fghij")); } public void testRandomlyAddBigramWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRandomlyAddBigramWords(formatVersion); - } - } - - private void testRandomlyAddBigramWords(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final ArrayList<String> words = new ArrayList<>(); final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); @@ -439,83 +352,40 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int bigramProbability = bigramProbabilities.get(bigram); assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, isValidBigram(binaryDictionary, bigram.first, bigram.second)); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - } - } - - dictFile.delete(); - } - - public void testRemoveBigramWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRemoveBigramWords(formatVersion); + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); } } - private void testRemoveBigramWords(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + public void testAddTrigramWords() { + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int unigramProbability = 100; - final int bigramProbability = 150; + final int trigramProbability = 150; + final int updatedTrigramProbability = 200; addUnigramWord(binaryDictionary, "aaa", unigramProbability); addUnigramWord(binaryDictionary, "abb", unigramProbability); addUnigramWord(binaryDictionary, "bcc", unigramProbability); - addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); - addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); - addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); - addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); - - assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); - assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); - assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); - assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); - - removeBigramEntry(binaryDictionary, "aaa", "abb"); - assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); - addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); - assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); + addBigramWords(binaryDictionary, "abb", "bcc", 10); + addBigramWords(binaryDictionary, "abb", "aaa", 10); - removeBigramEntry(binaryDictionary, "aaa", "bcc"); - assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc")); - removeBigramEntry(binaryDictionary, "abb", "aaa"); - assertFalse(isValidBigram(binaryDictionary, "abb", "aaa")); - removeBigramEntry(binaryDictionary, "abb", "bcc"); - assertFalse(isValidBigram(binaryDictionary, "abb", "bcc")); + addTrigramEntry(binaryDictionary, "aaa", "abb", "bcc", trigramProbability); + addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", trigramProbability); - removeBigramEntry(binaryDictionary, "aaa", "abb"); - // Test remove non-existing bigram operation. - removeBigramEntry(binaryDictionary, "aaa", "abb"); - removeBigramEntry(binaryDictionary, "bcc", "aaa"); + assertEquals(trigramProbability, + getTrigramProbability(binaryDictionary, "aaa", "abb", "bcc")); + assertEquals(trigramProbability, + getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa")); + assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); - dictFile.delete(); + addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", updatedTrigramProbability); + assertEquals(updatedTrigramProbability, + getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa")); } public void testFlushDictionary() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testFlushDictionary(formatVersion); - } - } - - private void testFlushDictionary(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); + BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int probability = 100; addUnigramWord(binaryDictionary, "aaa", probability); @@ -535,42 +405,21 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.flush(); binaryDictionary.close(); - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - + binaryDictionary = getBinaryDictionary(dictFile); assertEquals(probability, binaryDictionary.getFrequency("aaa")); assertEquals(probability, binaryDictionary.getFrequency("abcd")); addUnigramWord(binaryDictionary, "bcde", probability); binaryDictionary.flush(); binaryDictionary.close(); - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary = getBinaryDictionary(dictFile); assertEquals(probability, binaryDictionary.getFrequency("bcde")); binaryDictionary.close(); - - dictFile.delete(); } public void testFlushWithGCDictionary() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testFlushWithGCDictionary(formatVersion); - } - } - - private void testFlushWithGCDictionary(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); + BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int unigramProbability = 100; final int bigramProbability = 150; addUnigramWord(binaryDictionary, "aaa", unigramProbability); @@ -583,51 +432,30 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.flushWithGC(); binaryDictionary.close(); - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary = getBinaryDictionary(dictFile); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); - } + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); + assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); binaryDictionary.flushWithGC(); binaryDictionary.close(); - - dictFile.delete(); } public void testAddBigramWordsAndFlashWithGC() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddBigramWordsAndFlashWithGC(formatVersion); - } - } - - // TODO: Evaluate performance of GC - private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 30; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); + BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final ArrayList<String> words = new ArrayList<>(); final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); @@ -660,52 +488,37 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.flushWithGC(); binaryDictionary.close(); - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - + binaryDictionary = getBinaryDictionary(dictFile); for (final Pair<String, String> bigram : bigramWords) { final int bigramProbability = bigramProbabilities.get(bigram); assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, isValidBigram(binaryDictionary, bigram.first, bigram.second)); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - } + assertEquals(bigramProbability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); } - - dictFile.delete(); } public void testRandomOperationsAndFlashWithGC() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRandomOperationsAndFlashWithGC(formatVersion); - } - } + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap<String, String> attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); - private void testRandomOperationsAndFlashWithGC(final int formatVersion) { final int flashWithGCIterationCount = 50; final int operationCountInEachIteration = 200; final int initialUnigramCount = 100; final float addUnigramProb = 0.5f; final float addBigramProb = 0.8f; - final float removeBigramProb = 0.2f; final int codePointSetSize = 30; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); + final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, + attributeMap); + BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - - BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); final ArrayList<String> words = new ArrayList<>(); final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -722,9 +535,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary = getBinaryDictionary(dictFile); for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { // Add unigram. if (random.nextFloat() < addUnigramProb) { @@ -754,14 +565,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { bigramProbabilities.put(bigram, bigramProbability); addBigramWords(binaryDictionary, word0, word1, bigramProbability); } - // Remove bigram. - if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { - final int bigramIndex = random.nextInt(bigramWords.size()); - final Pair<String, String> bigram = bigramWords.get(bigramIndex); - bigramWords.remove(bigramIndex); - bigramProbabilities.remove(bigram); - removeBigramEntry(binaryDictionary, bigram.first, bigram.second); - } } // Test whether the all unigram operations are collectlly handled. @@ -775,45 +578,29 @@ public class BinaryDictionaryTests extends AndroidTestCase { final Pair<String, String> bigram = bigramWords.get(i); final int probability; if (bigramProbabilities.containsKey(bigram)) { - final int bigramProbability = bigramProbabilities.get(bigram); - probability = bigramProbability; + probability = bigramProbabilities.get(bigram); } else { probability = Dictionary.NOT_A_PROBABILITY; } - if (canCheckBigramProbability(formatVersion)) { - assertEquals(probability, - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - } + assertEquals(probability, + getBigramProbability(binaryDictionary, bigram.first, bigram.second)); assertEquals(probability != Dictionary.NOT_A_PROBABILITY, isValidBigram(binaryDictionary, bigram.first, bigram.second)); } binaryDictionary.flushWithGC(); binaryDictionary.close(); } - - dictFile.delete(); } public void testAddManyUnigramsAndFlushWithGC() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddManyUnigramsAndFlushWithGC(formatVersion); - } - } - - private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { final int flashWithGCIterationCount = 3; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); final ArrayList<String> words = new ArrayList<>(); final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); @@ -821,9 +608,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { BinaryDictionary binaryDictionary; for (int i = 0; i < flashWithGCIterationCount; i++) { - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary = getBinaryDictionary(dictFile); while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); @@ -841,30 +626,23 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.flushWithGC(); binaryDictionary.close(); } - - dictFile.delete(); } public void testUnigramAndBigramCount() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testUnigramAndBigramCount(formatVersion); - } - } + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap<String, String> attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); - private void testUnigramAndBigramCount(final int formatVersion) { final int flashWithGCIterationCount = 10; final int codePointSetSize = 50; final int unigramCountPerIteration = 1000; final int bigramCountPerIteration = 2000; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } + final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, + attributeMap); final ArrayList<String> words = new ArrayList<>(); final HashSet<Pair<String, String>> bigrams = new HashSet<>(); @@ -872,9 +650,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { BinaryDictionary binaryDictionary; for (int i = 0; i < flashWithGCIterationCount; i++) { - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + binaryDictionary = getBinaryDictionary(dictFile); for (int j = 0; j < unigramCountPerIteration; j++) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); @@ -892,108 +668,31 @@ public class BinaryDictionaryTests extends AndroidTestCase { addBigramWords(binaryDictionary, word0, word1, bigramProbability); } assertEquals(new HashSet<>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForGettingStats( + BinaryDictionary.UNIGRAM_COUNT_QUERY))); assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForGettingStats( + BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.flushWithGC(); assertEquals(new HashSet<>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForGettingStats( + BinaryDictionary.UNIGRAM_COUNT_QUERY))); assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); + binaryDictionary.getPropertyForGettingStats( + BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.close(); } - - dictFile.delete(); - } - - public void testAddMultipleDictionaryEntries() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddMultipleDictionaryEntries(formatVersion); - } - } - - private void testAddMultipleDictionaryEntries(final int formatVersion) { - final int codePointSetSize = 20; - final int lmParamCount = 1000; - final double bigramContinueRate = 0.9; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - - final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; - String prevWord = null; - for (int i = 0; i < languageModelParams.length; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int probability = random.nextInt(0xFF); - final int bigramProbability = probability + random.nextInt(0xFF - probability); - unigramProbabilities.put(word, probability); - if (prevWord == null) { - languageModelParams[i] = new LanguageModelParam(word, probability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP); - } else { - languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, - bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - bigramProbabilities.put(new Pair<>(prevWord, word), - bigramProbability); - } - prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; - } - - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - binaryDictionary.addMultipleDictionaryEntries(languageModelParams); - - for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { - assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); - } - - for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { - final String word0 = entry.getKey().first; - final String word1 = entry.getKey().second; - final int bigramProbability = entry.getValue(); - assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, - isValidBigram(binaryDictionary, word0, word1)); - if (canCheckBigramProbability(formatVersion)) { - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, word0, word1)); - } - } } public void testGetWordProperties() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testGetWordProperties(formatVersion); - } - } - - private void testGetWordProperties(final int formatVersion) { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); final int UNIGRAM_COUNT = 1000; final int BIGRAM_COUNT = 1000; final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); + final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", false /* isBeginningOfSentence */); @@ -1008,11 +707,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { final String word = CodePointUtils.generateWord(random, codePointSet); final int unigramProbability = random.nextInt(0xFF); final boolean isNotAWord = random.nextBoolean(); - final boolean isBlacklisted = random.nextBoolean(); + final boolean isPossiblyOffensive = random.nextBoolean(); // TODO: Add tests for historical info. binaryDictionary.addUnigramEntry(word, unigramProbability, - null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, - false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, + false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { binaryDictionary.flushWithGC(); @@ -1024,11 +722,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(word, wordProperty.mWord); assertTrue(wordProperty.isValid()); assertEquals(isNotAWord, wordProperty.mIsNotAWord); - assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); - assertEquals(false, wordProperty.mHasBigrams); - assertEquals(false, wordProperty.mHasShortcuts); + assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive); + assertEquals(false, wordProperty.mHasNgrams); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); - assertTrue(wordProperty.mShortcutTargets.isEmpty()); } for (int i = 0; i < BIGRAM_COUNT; i++) { @@ -1062,41 +758,25 @@ public class BinaryDictionaryTests extends AndroidTestCase { final HashSet<String> bigramWord1s = bigrams.get(word0); final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, false /* isBeginningOfSentence */); - assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size()); + // TODO: Support ngram. + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; assertTrue(bigramWord1s.contains(word1)); - if (canCheckBigramProbability(formatVersion)) { - final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); - assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); - } + final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); + assertEquals(bigramProbability, bigramTarget.getProbability()); } } } public void testIterateAllWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testIterateAllWords(formatVersion); - } - } - - private void testIterateAllWords(final int formatVersion) { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); final int UNIGRAM_COUNT = 1000; final int BIGRAM_COUNT = 1000; final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", false /* isBeginningOfSentence */); @@ -1155,15 +835,16 @@ public class BinaryDictionaryTests extends AndroidTestCase { wordProperty.mProbabilityInfo.mProbability); wordSet.remove(word0); final HashSet<String> bigramWord1s = bigrams.get(word0); - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; - assertTrue(bigramWord1s.contains(word1)); - final Pair<String, String> bigram = new Pair<>(word0, word1); - if (canCheckBigramProbability(formatVersion)) { + // TODO: Support ngram. + if (wordProperty.mHasNgrams) { + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; + assertTrue(bigramWord1s.contains(word1)); + final Pair<String, String> bigram = new Pair<>(word0, word1); final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); - assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + assertEquals(bigramProbability, bigramTarget.getProbability()); + bigramSet.remove(bigram); } - bigramSet.remove(bigram); } token = result.mNextToken; } while (token != 0); @@ -1171,346 +852,33 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertTrue(bigramSet.isEmpty()); } - public void testAddShortcuts() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddShortcuts(formatVersion); - } - } - - private void testAddShortcuts(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - - final int unigramProbability = 100; - final int shortcutProbability = 10; - binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", - shortcutProbability, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); - WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", - false /* isBeginningOfSentence */); - assertEquals(1, wordProperty.mShortcutTargets.size()); - assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); - assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); - final int updatedShortcutProbability = 2; - binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", - updatedShortcutProbability, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); - wordProperty = binaryDictionary.getWordProperty("aaa", - false /* isBeginningOfSentence */); - assertEquals(1, wordProperty.mShortcutTargets.size()); - assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); - assertEquals(updatedShortcutProbability, - wordProperty.mShortcutTargets.get(0).getProbability()); - binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", - shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, 0 /* timestamp */); - final HashMap<String, Integer> shortcutTargets = new HashMap<>(); - shortcutTargets.put("zzz", updatedShortcutProbability); - shortcutTargets.put("yyy", shortcutProbability); - wordProperty = binaryDictionary.getWordProperty("aaa", - false /* isBeginningOfSentence */); - assertEquals(2, wordProperty.mShortcutTargets.size()); - for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { - assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); - assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), - shortcutTarget.getProbability()); - shortcutTargets.remove(shortcutTarget.mWord); - } - shortcutTargets.put("zzz", updatedShortcutProbability); - shortcutTargets.put("yyy", shortcutProbability); - binaryDictionary.flushWithGC(); - wordProperty = binaryDictionary.getWordProperty("aaa", - false /* isBeginningOfSentence */); - assertEquals(2, wordProperty.mShortcutTargets.size()); - for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { - assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); - assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), - shortcutTarget.getProbability()); - shortcutTargets.remove(shortcutTarget.mWord); - } - } - - public void testAddManyShortcuts() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddManyShortcuts(formatVersion); - } - } - - private void testAddManyShortcuts(final int formatVersion) { - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final int UNIGRAM_COUNT = 1000; - final int SHORTCUT_COUNT = 10000; - final int codePointSetSize = 20; - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - - final ArrayList<String> words = new ArrayList<>(); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - - for (int i = 0; i < UNIGRAM_COUNT; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int unigramProbability = random.nextInt(0xFF); - addUnigramWord(binaryDictionary, word, unigramProbability); - words.add(word); - unigramProbabilities.put(word, unigramProbability); - if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - } - for (int i = 0; i < SHORTCUT_COUNT; i++) { - final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); - final int shortcutProbability = random.nextInt(0xF); - final String word = words.get(random.nextInt(words.size())); - final int unigramProbability = unigramProbabilities.get(word); - binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, - shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, 0 /* timestamp */); - if (shortcutTargets.containsKey(word)) { - final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); - shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); - } else { - final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); - shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); - shortcutTargets.put(word, shortcutTargetsOfWord); - } - if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - } - - for (final String word : words) { - final WordProperty wordProperty = binaryDictionary.getWordProperty(word, - false /* isBeginningOfSentence */); - assertEquals((int)unigramProbabilities.get(word), - wordProperty.mProbabilityInfo.mProbability); - if (!shortcutTargets.containsKey(word)) { - // The word does not have shortcut targets. - continue; - } - assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); - for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { - final String targetCodePonts = shortcutTarget.mWord; - assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), - shortcutTarget.getProbability()); - } - } - } - - public void testDictMigration() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); - } - } - - private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final int unigramProbability = 100; - addUnigramWord(binaryDictionary, "aaa", unigramProbability); - addUnigramWord(binaryDictionary, "bbb", unigramProbability); - final int bigramProbability = 150; - addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); - final int shortcutProbability = 10; - binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, - false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isBlacklisted */, 0 /* timestamp */); - binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, - Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, - true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); - binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE, - "aaa", bigramProbability, 0 /* timestamp */); - assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); - assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); - assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); - assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); - assertTrue(binaryDictionary.migrateTo(toFormatVersion)); - assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); - assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); - assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); - if (canCheckBigramProbability(toFormatVersion)) { - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); - assertEquals(bigramProbability, binaryDictionary.getNgramProbability( - PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa")); - } - assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); - WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", - false /* isBeginningOfSentence */); - assertEquals(1, wordProperty.mShortcutTargets.size()); - assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); - wordProperty = binaryDictionary.getWordProperty("ddd", - false /* isBeginningOfSentence */); - assertTrue(wordProperty.mIsBlacklistEntry); - assertTrue(wordProperty.mIsNotAWord); - } - - public void testLargeDictMigration() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); - } - } - - private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { - final int UNIGRAM_COUNT = 3000; - final int BIGRAM_COUNT = 3000; - final int codePointSetSize = 50; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - - final ArrayList<String> words = new ArrayList<>(); - final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - - for (int i = 0; i < UNIGRAM_COUNT; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int unigramProbability = random.nextInt(0xFF); - addUnigramWord(binaryDictionary, word, unigramProbability); - if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - words.add(word); - unigramProbabilities.put(word, unigramProbability); - } - - for (int i = 0; i < BIGRAM_COUNT; i++) { - final int word0Index = random.nextInt(words.size()); - final int word1Index = random.nextInt(words.size()); - if (word0Index == word1Index) { - continue; - } - final String word0 = words.get(word0Index); - final String word1 = words.get(word1Index); - final int unigramProbability = unigramProbabilities.get(word1); - final int bigramProbability = - random.nextInt(0xFF - unigramProbability) + unigramProbability; - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - final Pair<String, String> bigram = new Pair<>(word0, word1); - bigrams.add(bigram); - bigramProbabilities.put(bigram, bigramProbability); - } - assertTrue(binaryDictionary.migrateTo(toFormatVersion)); - - for (final String word : words) { - assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); - } - assertEquals(unigramProbabilities.size(), Integer.parseInt( - binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); - - for (final Pair<String, String> bigram : bigrams) { - if (canCheckBigramProbability(toFormatVersion)) { - assertEquals((int)bigramProbabilities.get(bigram), - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - } - assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); - } - assertEquals(bigramProbabilities.size(), Integer.parseInt( - binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); + public void testPossiblyOffensiveAttributeMaintained() { + final BinaryDictionary binaryDictionary = + getEmptyBinaryDictionary(FormatSpec.VERSION403); + binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0); + WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false); + assertEquals(true, wordProperty.mIsPossiblyOffensive); } public void testBeginningOfSentence() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - if (supportsBeginningOfSentence(formatVersion)) { - testBeginningOfSentence(formatVersion); - } - } - } - - private void testBeginningOfSentence(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int dummyProbability = 0; - final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; + final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; final int bigramProbability = 200; addUnigramWord(binaryDictionary, "aaa", dummyProbability); - binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); - binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, + binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); addUnigramWord(binaryDictionary, "bbb", dummyProbability); - binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability, + binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); binaryDictionary.flushWithGC(); assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); + binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb")); - } - - public void testGetMaxFrequencyOfExactMatches() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testGetMaxFrequencyOfExactMatches(formatVersion); - } - } - - private void testGetMaxFrequencyOfExactMatches(final int formatVersion) { - File dictFile = null; - try { - dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - addUnigramWord(binaryDictionary, "abc", 10); - addUnigramWord(binaryDictionary, "aBc", 15); - assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); - addUnigramWord(binaryDictionary, "ab'c", 20); - assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); - addUnigramWord(binaryDictionary, "a-b-c", 25); - assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); - addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30); - assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); - addUnigramWord(binaryDictionary, "ab c", 255); - assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); + binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb")); } } |