diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 297 |
1 files changed, 269 insertions, 28 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 00d76c990..7ed3ee180 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -18,26 +18,21 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; +import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; -import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Locale; +import java.util.Map; import java.util.Random; @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(3 /* version */, true /* supportsDynamicUpdate */); private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; @@ -51,15 +46,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { super.tearDown(); } - private File createEmptyDictionaryAndGetFile(final String filename) throws IOException, - UnsupportedFormatException { - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); + private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - return file; + Map<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + 3 /* dictVersion */, attributeMap)) { + return file; + } else { + throw new IOException("Empty dictionary cannot be created."); + } } public void testIsValidDictionary() { @@ -68,8 +66,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -94,8 +90,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -138,8 +132,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -168,8 +160,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -239,8 +229,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -293,8 +281,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -341,8 +327,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -384,4 +368,261 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + public void testFlushWithGCDictionary() { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + dictFile.delete(); + } + + // TODO: Evaluate performance of GC + public void testAddBigramWordsAndFlashWithGC() { + final int wordCount = 100; + final int bigramCount = 1000; + final int codePointSetSize = 30; + // TODO: Use various seeds such as a current timestamp to make this test more random. + final int seed = 314159265; + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final ArrayList<String> words = new ArrayList<String>(); + // Test a word that isn't contained within the dictionary. + final Random random = new Random(seed); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final int[] unigramProbabilities = new int[wordCount]; + for (int i = 0; i < wordCount; ++i) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities[i] = unigramProbability; + binaryDictionary.addUnigramWord(word, unigramProbability); + } + + final int[][] probabilities = new int[wordCount][wordCount]; + + for (int i = 0; i < wordCount; ++i) { + for (int j = 0; j < wordCount; ++j) { + probabilities[i][j] = Dictionary.NOT_A_PROBABILITY; + } + } + + for (int i = 0; i < bigramCount; i++) { + final int word0Index = random.nextInt(wordCount); + final int word1Index = random.nextInt(wordCount); + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability( + unigramProbabilities[word1Index], bigramProbability); + binaryDictionary.addBigramWords(word0, word1, bigramProbability); + } + + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + for (int i = 0; i < words.size(); i++) { + for (int j = 0; j < words.size(); j++) { + assertEquals(probabilities[i][j], + binaryDictionary.getBigramProbability(words.get(i), words.get(j))); + } + } + dictFile.delete(); + } + + public void testRandomOperetionsAndFlashWithGC() { + final int flashWithGCIterationCount = 50; + final int operationCountInEachIteration = 200; + final int initialUnigramCount = 100; + final float addUnigramProb = 0.5f; + final float addBigramProb = 0.8f; + final float removeBigramProb = 0.2f; + final int codePointSetSize = 30; + final int seed = 141421356; + + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final ArrayList<String> words = new ArrayList<String>(); + final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilities = + new HashMap<Pair<String, String>, Integer>(); + for (int i = 0; i < initialUnigramCount; ++i) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities.put(word, unigramProbability); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { + // Add unigram. + if (random.nextFloat() < addUnigramProb) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities.put(word, unigramProbability); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + // Add bigram. + if (random.nextFloat() < addBigramProb && words.size() > 2) { + final int word0Index = random.nextInt(words.size()); + int word1Index = random.nextInt(words.size() - 1); + if (word0Index <= word1Index) { + word1Index++; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + bigramWords.add(bigram); + bigramProbabilities.put(bigram, bigramProbability); + binaryDictionary.addBigramWords(word0, word1, bigramProbability); + } + // Remove bigram. + if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { + final int bigramIndex = random.nextInt(bigramWords.size()); + final Pair<String, String> bigram = bigramWords.get(bigramIndex); + bigramWords.remove(bigramIndex); + bigramProbabilities.remove(bigram); + binaryDictionary.removeBigramWords(bigram.first, bigram.second); + } + } + + // Test whether the all unigram operations are collectlly handled. + for (int i = 0; i < words.size(); i++) { + final String word = words.get(i); + final int unigramProbability = unigramProbabilities.get(word); + assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); + } + // Test whether the all bigram operations are collectlly handled. + for (int i = 0; i < bigramWords.size(); i++) { + final Pair<String, String> bigram = bigramWords.get(i); + final int unigramProbability = unigramProbabilities.get(bigram.second); + final int probability; + if (bigramProbabilities.containsKey(bigram)) { + final int bigramProbability = bigramProbabilities.get(bigram); + probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + } else { + probability = Dictionary.NOT_A_PROBABILITY; + } + assertEquals(probability, + binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + } + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + } + + dictFile.delete(); + } + + public void testAddManyUnigramsAndFlushWithGC() { + final int flashWithGCIterationCount = 3; + final int codePointSetSize = 50; + final int seed = 22360679; + + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + BinaryDictionary binaryDictionary; + for (int i = 0; i < flashWithGCIterationCount; i++) { + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + while(!binaryDictionary.needsToRunGC()) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities.put(word, unigramProbability); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + + for (int j = 0; j < words.size(); j++) { + final String word = words.get(j); + final int unigramProbability = unigramProbabilities.get(word); + assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); + } + + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + } + + dictFile.delete(); + } } |