diff options
Diffstat (limited to 'tests/src')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java | 281 |
1 files changed, 177 insertions, 104 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java index 6036562a8..523287b48 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java @@ -17,23 +17,22 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.CollectionUtils; -import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; -import com.android.inputmethod.latin.makedict.FusionDictionary; +import com.android.inputmethod.latin.UserHistoryDictIOUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; -import com.android.inputmethod.latin.makedict.PendingAttribute; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import android.test.AndroidTestCase; import android.util.Log; import android.util.SparseArray; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; import java.io.File; -import java.io.FileOutputStream; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -53,16 +52,76 @@ public class BinaryDictIOTests extends AndroidTestCase { private static final int BIGRAM_FREQ = 50; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; + private static final int USE_BYTE_ARRAY = 1; + private static final int USE_BYTE_BUFFER = 2; + + private static final List<String> sWords = CollectionUtils.newArrayList(); + private static final SparseArray<List<Integer>> sEmptyBigrams = + CollectionUtils.newSparseArray(); + private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); + private static final SparseArray<List<Integer>> sChainBigrams = + CollectionUtils.newSparseArray(); + private static final BinaryDictInputOutput.FormatOptions VERSION2 = new BinaryDictInputOutput.FormatOptions(2); - private static final String[] CHARACTERS = - { + private static final String[] CHARACTERS = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" - }; + }; + + public BinaryDictIOTests() { + super(); + + final Random random = new Random(123456); + sWords.clear(); + generateWords(MAX_UNIGRAMS, random); + + for (int i = 0; i < sWords.size(); ++i) { + sChainBigrams.put(i, new ArrayList<Integer>()); + if (i > 0) { + sChainBigrams.get(i-1).add(i); + } + } + + sStarBigrams.put(0, new ArrayList<Integer>()); + for (int i = 1; i < sWords.size(); ++i) { + sStarBigrams.get(0).add(i); + } + } // Utilities for test + + /** + * Makes new buffer according to BUFFER_TYPE. + */ + private FusionDictionaryBufferInterface getBuffer(final File file,final int bufferType) { + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + if (bufferType == USE_BYTE_ARRAY) { + final byte[] array = new byte[(int)file.length()]; + inStream.read(array); + return new UserHistoryDictIOUtils.ByteArrayWrapper(array); + } else if (bufferType == USE_BYTE_BUFFER){ + final ByteBuffer buffer = inStream.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, file.length()); + return new BinaryDictInputOutput.ByteBufferWrapper(buffer); + } + } catch (IOException e) { + Log.e(TAG, "IOException while making buffer: " + e); + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + Log.e(TAG, "IOException while closing stream: " + e); + } + } + } + return null; + } + /** * Generates a random word. */ @@ -77,23 +136,29 @@ public class BinaryDictIOTests extends AndroidTestCase { return builder.toString(); } - private List<String> generateWords(final int number, final Random random) { + private void generateWords(final int number, final Random random) { final Set<String> wordSet = CollectionUtils.newHashSet(); while (wordSet.size() < number) { wordSet.add(generateWord(random.nextInt())); } - return new ArrayList<String>(wordSet); + sWords.addAll(wordSet); } /** * Adds unigrams to the dictionary. */ - private void addUnigrams(final int number, - final FusionDictionary dict, - final List<String> words) { + private void addUnigrams(final int number, final FusionDictionary dict, + final List<String> words, final Map<String, List<String>> shortcutMap) { for (int i = 0; i < number; ++i) { final String word = words.get(i); - dict.add(word, UNIGRAM_FREQ, null, false /* isNotAWord */); + final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); + if (shortcutMap != null && shortcutMap.containsKey(word)) { + for (final String shortcut : shortcutMap.get(word)) { + shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); + } + } + dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, + false /* isNotAWord */); } } @@ -130,9 +195,8 @@ public class BinaryDictIOTests extends AndroidTestCase { return diff; } - private void checkDictionary(final FusionDictionary dict, - final List<String> words, - final SparseArray<List<Integer>> bigrams) { + private void checkDictionary(final FusionDictionary dict, final List<String> words, + final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap) { assertNotNull(dict); // check unigram @@ -149,94 +213,93 @@ public class BinaryDictIOTests extends AndroidTestCase { assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2))); } } + + // check shortcut + if (shortcutMap != null) { + for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { + final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey()); + for (final String word : entry.getValue()) { + assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, + group.getShortcut(word)); + } + } + } } // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams) { - + final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap, + final int bufferType) { long now, diff = -1; + final FusionDictionaryBufferInterface buffer = getBuffer(file, bufferType); + assertNotNull(buffer); - FileInputStream inStream = null; + FusionDictionary dict = null; try { - inStream = new FileInputStream(file); - final ByteBuffer buffer = inStream.getChannel().map( - FileChannel.MapMode.READ_ONLY, 0, file.length()); - now = System.currentTimeMillis(); - - final FusionDictionary dict = - BinaryDictInputOutput.readDictionaryBinary(buffer, null); - - diff = System.currentTimeMillis() - now; - - checkDictionary(dict, words, bigrams); - return diff; - + dict = BinaryDictInputOutput.readDictionaryBinary(buffer, null); + diff = System.currentTimeMillis() - now; } catch (IOException e) { - Log.e(TAG, "raise IOException while reading file " + e); + Log.e(TAG, "IOException while reading dictionary: " + e); } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format: " + e); - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } + Log.e(TAG, "Unsupported format: "+ e); } + checkDictionary(dict, words, bigrams, shortcutMap); return diff; } + // Tests for readDictionaryBinary and writeDictionaryBinary private String runReadAndWrite(final List<String> words, - final SparseArray<List<Integer>> bigrams, - final String message) { - final FusionDictionary dict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions( - new HashMap<String,String>(), false, false)); - + final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcuts, + final int bufferType, final String message) { File file = null; try { file = File.createTempFile("runReadAndWrite", ".dict"); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } - assertNotNull(file); - addUnigrams(words.size(), dict, words); + final FusionDictionary dict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions( + new HashMap<String,String>(), false, false)); + addUnigrams(words.size(), dict, words, shortcuts); addBigrams(dict, words, bigrams); - // check original dictionary - checkDictionary(dict, words, bigrams); + checkDictionary(dict, words, bigrams, shortcuts); final long write = timeWritingDictToFile(file, dict); - final long read = timeReadingAndCheckDict(file, words, bigrams); + final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); - return "PROF: read=" + read + "ms, write=" + write + "ms :" + message; + return "PROF: read=" + read + "ms, write=" + write + "ms :" + message + + " : buffer type = " + bufferType; } - public void testReadAndWrite() { - final List<String> results = new ArrayList<String>(); + public void testReadAndWriteWithByteBuffer() { + final List<String> results = CollectionUtils.newArrayList(); - final Random random = new Random(123456); - final List<String> words = generateWords(MAX_UNIGRAMS, random); - final SparseArray<List<Integer>> emptyArray = CollectionUtils.newSparseArray(); + results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, USE_BYTE_BUFFER, + "unigram")); + results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, USE_BYTE_BUFFER, + "chain")); + results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, USE_BYTE_BUFFER, + "star")); - final SparseArray<List<Integer>> chain = CollectionUtils.newSparseArray(); - for (int i = 0; i < words.size(); ++i) chain.put(i, new ArrayList<Integer>()); - for (int i = 1; i < words.size(); ++i) chain.get(i-1).add(i); + for (final String result : results) { + Log.d(TAG, result); + } + } - final SparseArray<List<Integer>> star = CollectionUtils.newSparseArray(); - final List<Integer> list0 = CollectionUtils.newArrayList(); - star.put(0, list0); - for (int i = 1; i < words.size(); ++i) star.get(0).add(i); + public void testReadAndWriteWithByteArray() { + final List<String> results = CollectionUtils.newArrayList(); - results.add(runReadAndWrite(words, emptyArray, "only unigram")); - results.add(runReadAndWrite(words, chain, "chain")); - results.add(runReadAndWrite(words, star, "star")); + results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, USE_BYTE_ARRAY, + "unigram")); + results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, USE_BYTE_ARRAY, + "chain")); + results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, USE_BYTE_ARRAY, + "star")); for (final String result : results) { Log.d(TAG, result); @@ -292,7 +355,7 @@ public class BinaryDictIOTests extends AndroidTestCase { } private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams) { + final SparseArray<List<Integer>> bigrams, final int bufferType) { FileInputStream inStream = null; final Map<Integer, String> resultWords = CollectionUtils.newTreeMap(); @@ -301,17 +364,13 @@ public class BinaryDictIOTests extends AndroidTestCase { final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); long now = -1, diff = -1; + final FusionDictionaryBufferInterface buffer = getBuffer(file, bufferType); + assertNotNull("Can't get buffer.", buffer); try { - inStream = new FileInputStream(file); - final ByteBuffer buffer = inStream.getChannel().map( - FileChannel.MapMode.READ_ONLY, 0, file.length()); - now = System.currentTimeMillis(); - BinaryDictInputOutput.readUnigramsAndBigramsBinary( - new BinaryDictInputOutput.ByteBufferWrapper(buffer), resultWords, resultFreqs, + BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; - checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); } catch (IOException e) { Log.e(TAG, "IOException " + e); } catch (UnsupportedFormatException e) { @@ -326,50 +385,64 @@ public class BinaryDictIOTests extends AndroidTestCase { } } + checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); return diff; } - private void runReadUnigramsAndBigramsBinary(final List<String> words, - final SparseArray<List<Integer>> bigrams) { - - // making the dictionary from lists of words. - final FusionDictionary dict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions( - new HashMap<String, String>(), false, false)); - + private String runReadUnigramsAndBigramsBinary(final List<String> words, + final SparseArray<List<Integer>> bigrams, final int bufferType, + final String message) { File file = null; try { file = File.createTempFile("runReadUnigrams", ".dict"); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } - assertNotNull(file); - addUnigrams(words.size(), dict, words); + // making the dictionary from lists of words. + final FusionDictionary dict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions( + new HashMap<String, String>(), false, false)); + addUnigrams(words.size(), dict, words, null /* shortcutMap */); addBigrams(dict, words, bigrams); + timeWritingDictToFile(file, dict); - long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams); - long fullReading = timeReadingAndCheckDict(file, words, bigrams); + long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType); + long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, + bufferType); - Log.d(TAG, "read=" + fullReading + ", bytearray=" + wordMap); + return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap + + " : " + message + " : buffer type = " + bufferType; } - public void testReadUnigramsAndBigramsBinary() { - final List<String> results = new ArrayList<String>(); + public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { + final List<String> results = CollectionUtils.newArrayList(); - final Random random = new Random(123456); - final List<String> words = generateWords(MAX_UNIGRAMS, random); - final SparseArray<List<Integer>> emptyArray = CollectionUtils.newSparseArray(); + results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, USE_BYTE_BUFFER, + "unigram")); + results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, USE_BYTE_BUFFER, + "chain")); + results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, USE_BYTE_BUFFER, + "star")); + + for (final String result : results) { + Log.d(TAG, result); + } + } - runReadUnigramsAndBigramsBinary(words, emptyArray); + public void testReadUnigramsAndBigramsBinaryWithByteArray() { + final List<String> results = CollectionUtils.newArrayList(); - final SparseArray<List<Integer>> star = CollectionUtils.newSparseArray(); - for (int i = 1; i < words.size(); ++i) { - star.put(i-1, new ArrayList<Integer>()); - star.get(i-1).add(i); + results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, USE_BYTE_ARRAY, + "unigram")); + results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, USE_BYTE_ARRAY, + "chain")); + results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, USE_BYTE_ARRAY, "star")); + + for (final String result : results) { + Log.d(TAG, result); } - runReadUnigramsAndBigramsBinary(words, star); } } |