diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict')
6 files changed, 1160 insertions, 717 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java new file mode 100644 index 000000000..a4d94262f --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -0,0 +1,684 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import android.test.AndroidTestCase; +import android.test.MoreAsserts; +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Log; +import android.util.SparseArray; + +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; +import com.android.inputmethod.latin.utils.CollectionUtils; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map.Entry; +import java.util.Random; +import java.util.Set; +import java.util.TreeMap; + +/** + * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. + */ +@LargeTest +public class BinaryDictDecoderEncoderTests extends AndroidTestCase { + private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); + private static final int DEFAULT_MAX_UNIGRAMS = 100; + private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; + private static final int UNIGRAM_FREQ = 10; + private static final int BIGRAM_FREQ = 50; + private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; + private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50; + private static final int NUM_OF_SHORTCUTS = 5; + + private static final int USE_BYTE_ARRAY = 1; + private static final int USE_BYTE_BUFFER = 2; + + private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); + private static final SparseArray<List<Integer>> sEmptyBigrams = + CollectionUtils.newSparseArray(); + private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); + private static final SparseArray<List<Integer>> sChainBigrams = + CollectionUtils.newSparseArray(); + private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); + + private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); + private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); + private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); + private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); + private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); + + private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; + + public BinaryDictDecoderEncoderTests() { + this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); + } + + public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { + super(); + Log.e(TAG, "Testing dictionary: seed is " + seed); + final Random random = new Random(seed); + sWords.clear(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, + random); + generateWords(maxUnigrams, random, codePointSet); + + for (int i = 0; i < sWords.size(); ++i) { + sChainBigrams.put(i, new ArrayList<Integer>()); + if (i > 0) { + sChainBigrams.get(i - 1).add(i); + } + } + + sStarBigrams.put(0, new ArrayList<Integer>()); + for (int i = 1; i < sWords.size(); ++i) { + sStarBigrams.get(0).add(i); + } + + sShortcuts.clear(); + for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { + final int from = Math.abs(random.nextInt()) % sWords.size(); + sShortcuts.put(sWords.get(from), new ArrayList<String>()); + for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { + final int to = Math.abs(random.nextInt()) % sWords.size(); + sShortcuts.get(sWords.get(from)).add(sWords.get(to)); + } + } + } + + private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) { + if (formatOptions.mVersion == FormatSpec.VERSION4) { + return new Ver4DictEncoder(getContext().getCacheDir()); + } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) { + return new Ver3DictEncoder(file); + } else { + throw new RuntimeException("The format option has a wrong version : " + + formatOptions.mVersion); + } + } + + private void generateWords(final int number, final Random random, final int[] codePointSet) { + final Set<String> wordSet = CollectionUtils.newHashSet(); + while (wordSet.size() < number) { + wordSet.add(CodePointUtils.generateWord(random, codePointSet)); + } + sWords.addAll(wordSet); + } + + /** + * Adds unigrams to the dictionary. + */ + private void addUnigrams(final int number, final FusionDictionary dict, + final List<String> words, final HashMap<String, List<String>> shortcutMap) { + for (int i = 0; i < number; ++i) { + final String word = words.get(i); + final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); + if (shortcutMap != null && shortcutMap.containsKey(word)) { + for (final String shortcut : shortcutMap.get(word)) { + shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); + } + } + dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, + false /* isNotAWord */); + } + } + + private void addBigrams(final FusionDictionary dict, + final List<String> words, + final SparseArray<List<Integer>> bigrams) { + for (int i = 0; i < bigrams.size(); ++i) { + final int w1 = bigrams.keyAt(i); + for (int w2 : bigrams.valueAt(i)) { + dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); + } + } + } + +// The following is useful to dump the dictionary into a textual file, but it can't compile +// on-device, so it's commented out. +// private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) +// throws IOException { +// com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( +// new java.io.FileWriter(new File(filename)), dict); +// } + + private long timeWritingDictToFile(final File file, final FusionDictionary dict, + final FormatSpec.FormatOptions formatOptions) { + + long now = -1, diff = -1; + + try { + final DictEncoder dictEncoder = getDictEncoder(file, formatOptions); + + now = System.currentTimeMillis(); + // If you need to dump the dict to a textual file, uncomment the line below and the + // function above + // dumpToCombinedFileForDebug(file, "/tmp/foo"); + dictEncoder.writeDictionary(dict, formatOptions); + diff = System.currentTimeMillis() - now; + } catch (IOException e) { + Log.e(TAG, "IO exception while writing file", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "UnsupportedFormatException", e); + } + + return diff; + } + + private void checkDictionary(final FusionDictionary dict, final List<String> words, + final SparseArray<List<Integer>> bigrams, + final HashMap<String, List<String>> shortcutMap) { + assertNotNull(dict); + + // check unigram + for (final String word : words) { + final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); + assertNotNull(ptNode); + } + + // check bigram + for (int i = 0; i < bigrams.size(); ++i) { + final int w1 = bigrams.keyAt(i); + for (final int w2 : bigrams.valueAt(i)) { + final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, + words.get(w1)); + assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); + } + } + + // check shortcut + if (shortcutMap != null) { + for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) { + assertTrue(words.contains(entry.getKey())); + final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, + entry.getKey()); + for (final String word : entry.getValue()) { + assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, + ptNode.getShortcut(word)); + } + } + } + } + + private String outputOptions(final int bufferType, + final FormatSpec.FormatOptions formatOptions) { + String result = " : buffer type = " + + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); + result += " : version = " + formatOptions.mVersion; + return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate; + } + + private DictionaryOptions getDictionaryOptions(final String id, final String version) { + final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(), + false, false); + options.mAttributes.put("version", version); + options.mAttributes.put("dictionary", id); + return options; + } + + private File setUpDictionaryFile(final String name, final String version) { + File file = null; + try { + file = new File(getContext().getCacheDir(), name + "." + version + + TEST_DICT_FILE_EXTENSION); + file.createNewFile(); + } catch (IOException e) { + // do nothing + } + assertTrue("Failed to create the dictionary file.", file.exists()); + return file; + } + + private DictDecoder getDictDecoder(final File file, final int bufferType, + final FormatOptions formatOptions, final DictionaryOptions dictOptions) { + if (formatOptions.mVersion == FormatSpec.VERSION4) { + final FileHeader header = new FileHeader(0, dictOptions, formatOptions); + return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(), + header.getId() + "." + header.getVersion()), bufferType); + } else { + return FormatSpec.getDictDecoder(file, bufferType); + } + } + // Tests for readDictionaryBinary and writeDictionaryBinary + + private long timeReadingAndCheckDict(final File file, final List<String> words, + final SparseArray<List<Integer>> bigrams, + final HashMap<String, List<String>> shortcutMap, final int bufferType, + final FormatOptions formatOptions, final DictionaryOptions dictOptions) { + long now, diff = -1; + + FusionDictionary dict = null; + try { + final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, + dictOptions); + now = System.currentTimeMillis(); + dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); + diff = System.currentTimeMillis() - now; + } catch (IOException e) { + Log.e(TAG, "IOException while reading dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + + checkDictionary(dict, words, bigrams, shortcutMap); + return diff; + } + + // Tests for readDictionaryBinary and writeDictionaryBinary + private String runReadAndWrite(final List<String> words, + final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, + final int bufferType, final FormatSpec.FormatOptions formatOptions, + final String message) { + + final String dictName = "runReadAndWrite"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final File file = setUpDictionaryFile(dictName, dictVersion); + + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(dictName, dictVersion)); + addUnigrams(words.size(), dict, words, shortcuts); + addBigrams(dict, words, bigrams); + checkDictionary(dict, words, bigrams, shortcuts); + + final long write = timeWritingDictToFile(file, dict, formatOptions); + final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType, + formatOptions, dict.mOptions); + + return "PROF: read=" + read + "ms, write=" + write + "ms :" + message + + " : " + outputOptions(bufferType, formatOptions); + } + + private void runReadAndWriteTests(final List<String> results, final int bufferType, + final FormatSpec.FormatOptions formatOptions) { + results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, + formatOptions, "unigram")); + results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, + formatOptions, "chain")); + results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, + formatOptions, "star")); + results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions, + "unigram with shortcuts")); + results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions, + "chain with shortcuts")); + results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions, + "star with shortcuts")); + } + + // Unit test for CharEncoding.readString and CharEncoding.writeString. + public void testCharEncoding() { + // the max length of a word in sWords is less than 50. + // See generateWords. + final byte[] buffer = new byte[50 * 3]; + final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); + for (final String word : sWords) { + Log.d("testReadAndWriteString", "write : " + word); + Arrays.fill(buffer, (byte)0); + CharEncoding.writeString(buffer, 0, word); + dictBuffer.position(0); + final String str = CharEncoding.readString(dictBuffer); + assertEquals(word, str); + } + } + + public void testReadAndWriteWithByteBuffer() { + final List<String> results = CollectionUtils.newArrayList(); + + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + + for (final String result : results) { + Log.d(TAG, result); + } + } + + public void testReadAndWriteWithByteArray() { + final List<String> results = CollectionUtils.newArrayList(); + + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + + for (final String result : results) { + Log.d(TAG, result); + } + } + + // Tests for readUnigramsAndBigramsBinary + + private void checkWordMap(final List<String> expectedWords, + final SparseArray<List<Integer>> expectedBigrams, + final TreeMap<Integer, String> resultWords, + final TreeMap<Integer, Integer> resultFrequencies, + final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) { + // check unigrams + final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); + final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); + assertEquals(actualWordsSet, expectedWordsSet); + + for (int freq : resultFrequencies.values()) { + assertEquals(freq, UNIGRAM_FREQ); + } + + // check bigrams + final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>(); + for (int i = 0; i < expectedBigrams.size(); ++i) { + final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); + for (int w2 : expectedBigrams.valueAt(i)) { + if (expBigrams.get(word1) == null) { + expBigrams.put(word1, new ArrayList<String>()); + } + expBigrams.get(word1).add(expectedWords.get(w2)); + } + } + + final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>(); + for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { + final String word1 = resultWords.get(entry.getKey()); + final int unigramFreq = resultFrequencies.get(entry.getKey()); + for (PendingAttribute attr : entry.getValue()) { + final String word2 = resultWords.get(attr.mAddress); + if (actBigrams.get(word1) == null) { + actBigrams.put(word1, new ArrayList<String>()); + } + actBigrams.get(word1).add(word2); + + final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( + unigramFreq, attr.mFrequency); + assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); + } + } + + assertEquals(actBigrams, expBigrams); + } + + private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, + final SparseArray<List<Integer>> bigrams, final int bufferType, + final FormatOptions formatOptions, final DictionaryOptions dictOptions) { + FileInputStream inStream = null; + + final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap(); + final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = + CollectionUtils.newTreeMap(); + final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); + + long now = -1, diff = -1; + try { + final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, + dictOptions); + now = System.currentTimeMillis(); + dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); + diff = System.currentTimeMillis() - now; + } catch (IOException e) { + Log.e(TAG, "IOException", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "UnsupportedFormatException", e); + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + + checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); + return diff; + } + + private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, + final SparseArray<List<Integer>> bigrams, final int bufferType, + final FormatSpec.FormatOptions formatOptions, final String message) { + final String dictName = "runReadUnigrams"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final File file = setUpDictionaryFile(dictName, dictVersion); + + // making the dictionary from lists of words. + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(dictName, dictVersion)); + addUnigrams(words.size(), dict, words, null /* shortcutMap */); + addBigrams(dict, words, bigrams); + + timeWritingDictToFile(file, dict, formatOptions); + + long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, + formatOptions, dict.mOptions); + long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, + bufferType, formatOptions, dict.mOptions); + + return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap + + " : " + message + " : " + outputOptions(bufferType, formatOptions); + } + + private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, + final int bufferType, final FormatSpec.FormatOptions formatOptions) { + results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, + formatOptions, "unigram")); + results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, + formatOptions, "chain")); + results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, + formatOptions, "star")); + } + + public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { + final ArrayList<String> results = CollectionUtils.newArrayList(); + + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + + for (final String result : results) { + Log.d(TAG, result); + } + } + + public void testReadUnigramsAndBigramsBinaryWithByteArray() { + final ArrayList<String> results = CollectionUtils.newArrayList(); + + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + + for (final String result : results) { + Log.d(TAG, result); + } + } + + // Tests for getTerminalPosition + private String getWordFromBinary(final DictDecoder dictDecoder, final int address) { + if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); + + FileHeader fileHeader = null; + try { + fileHeader = dictDecoder.readHeader(); + } catch (IOException e) { + return null; + } catch (UnsupportedFormatException e) { + return null; + } + if (fileHeader == null) return null; + return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize, + address, fileHeader.mFormatOptions).mWord; + } + + private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, + int index, boolean contained) { + final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; + long diff = -1; + int position = -1; + try { + final long now = System.nanoTime(); + position = dictDecoder.getTerminalPosition(word); + diff = System.nanoTime() - now; + } catch (IOException e) { + Log.e(TAG, "IOException while getTerminalPosition", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); + } + + assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); + if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); + return diff; + } + + private void runGetTerminalPosition(final ArrayList<String> words, + final SparseArray<List<Integer>> bigrams, final int bufferType, + final FormatOptions formatOptions, final String message) { + final String dictName = "testGetTerminalPosition"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final File file = setUpDictionaryFile(dictName, dictVersion); + + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(dictName, dictVersion)); + addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); + addBigrams(dict, words, bigrams); + timeWritingDictToFile(file, dict, formatOptions); + + final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY, + formatOptions, dict.mOptions); + try { + dictDecoder.openDictBuffer(); + } catch (IOException e) { + // ignore + Log.e(TAG, "IOException while opening the buffer", e); + } + assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); + + try { + // too long word + final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; + assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); + + // null + assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); + + // empty string + assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } + + // Test a word that is contained within the dictionary. + long sum = 0; + for (int i = 0; i < sWords.size(); ++i) { + final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), i, true); + sum += time == -1 ? 0 : time; + } + Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message + + " : " + outputOptions(bufferType, formatOptions)); + + // Test a word that isn't contained within the dictionary. + final Random random = new Random((int)System.currentTimeMillis()); + final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, + random); + for (int i = 0; i < 1000; ++i) { + final String word = CodePointUtils.generateWord(random, codePointSet); + if (sWords.indexOf(word) != -1) continue; + checkGetTerminalPosition(dictDecoder, word, i, false); + } + } + + private void runGetTerminalPositionTests(final ArrayList<String> results, final int bufferType, + final FormatOptions formatOptions) { + runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); + } + + public void testGetTerminalPosition() { + final ArrayList<String> results = CollectionUtils.newArrayList(); + + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION2); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION2); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + + for (final String result : results) { + Log.d(TAG, result); + } + } + + public void testDeleteWord() { + final String dictName = "testDeleteWord"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final File file = setUpDictionaryFile(dictName, dictVersion); + + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + new FusionDictionary.DictionaryOptions( + new HashMap<String, String>(), false, false)); + addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); + timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); + + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, DictDecoder.USE_BYTEARRAY); + try { + dictDecoder.openDictBuffer(); + } catch (IOException e) { + // ignore + Log.e(TAG, "IOException while opening the buffer", e); + } + assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); + + try { + MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, + dictDecoder.getTerminalPosition(sWords.get(0))); + DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0)); + assertEquals(FormatSpec.NOT_VALID_WORD, + dictDecoder.getTerminalPosition(sWords.get(0))); + + MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, + dictDecoder.getTerminalPosition(sWords.get(5))); + DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5)); + assertEquals(FormatSpec.NOT_VALID_WORD, + dictDecoder.getTerminalPosition(sWords.get(5))); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } + } +} diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java deleted file mode 100644 index b704d08b3..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java +++ /dev/null @@ -1,601 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import android.test.AndroidTestCase; -import android.test.MoreAsserts; -import android.test.suitebuilder.annotation.LargeTest; -import android.util.Log; -import android.util.SparseArray; - -import com.android.inputmethod.latin.CollectionUtils; -import com.android.inputmethod.latin.UserHistoryDictIOUtils; -import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.Set; - -/** - * Unit tests for BinaryDictInputOutput - */ -@LargeTest -public class BinaryDictIOTests extends AndroidTestCase { - private static final String TAG = BinaryDictIOTests.class.getSimpleName(); - private static final int MAX_UNIGRAMS = 100; - private static final int UNIGRAM_FREQ = 10; - private static final int BIGRAM_FREQ = 50; - private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; - - private static final int USE_BYTE_ARRAY = 1; - private static final int USE_BYTE_BUFFER = 2; - - private static final List<String> sWords = CollectionUtils.newArrayList(); - private static final SparseArray<List<Integer>> sEmptyBigrams = - CollectionUtils.newSparseArray(); - private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); - private static final SparseArray<List<Integer>> sChainBigrams = - CollectionUtils.newSparseArray(); - - private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); - private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = - new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); - private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = - new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); - - public BinaryDictIOTests() { - super(); - - final long time = System.currentTimeMillis(); - Log.e(TAG, "Testing dictionary: seed is " + time); - final Random random = new Random(time); - sWords.clear(); - generateWords(MAX_UNIGRAMS, random); - - for (int i = 0; i < sWords.size(); ++i) { - sChainBigrams.put(i, new ArrayList<Integer>()); - if (i > 0) { - sChainBigrams.get(i - 1).add(i); - } - } - - sStarBigrams.put(0, new ArrayList<Integer>()); - for (int i = 1; i < sWords.size(); ++i) { - sStarBigrams.get(0).add(i); - } - } - - // Utilities for test - - /** - * Makes new buffer according to BUFFER_TYPE. - */ - private FusionDictionaryBufferInterface getBuffer(final File file, final int bufferType) { - FileInputStream inStream = null; - try { - inStream = new FileInputStream(file); - if (bufferType == USE_BYTE_ARRAY) { - final byte[] array = new byte[(int)file.length()]; - inStream.read(array); - return new UserHistoryDictIOUtils.ByteArrayWrapper(array); - } else if (bufferType == USE_BYTE_BUFFER){ - final ByteBuffer buffer = inStream.getChannel().map( - FileChannel.MapMode.READ_ONLY, 0, file.length()); - return new BinaryDictInputOutput.ByteBufferWrapper(buffer); - } - } catch (IOException e) { - Log.e(TAG, "IOException while making buffer", e); - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - Log.e(TAG, "IOException while closing stream", e); - } - } - } - return null; - } - - /** - * Generates a random word. - */ - private String generateWord(final Random random) { - StringBuilder builder = new StringBuilder("a"); - int count = random.nextInt() % 30; // Arbitrarily 30 chars max - while (count > 0) { - final long r = Math.abs(random.nextInt()); - if (r < 0) continue; - // Don't insert 0~0x20, but insert any other code point. - // Code points are in the range 0~0x10FFFF. - final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20)); - // Code points between MIN_ and MAX_SURROGATE are not valid on their own. - if (candidateCodePoint >= Character.MIN_SURROGATE - && candidateCodePoint <= Character.MAX_SURROGATE) continue; - builder.appendCodePoint(candidateCodePoint); - --count; - } - return builder.toString(); - } - - private void generateWords(final int number, final Random random) { - final Set<String> wordSet = CollectionUtils.newHashSet(); - while (wordSet.size() < number) { - wordSet.add(generateWord(random)); - } - sWords.addAll(wordSet); - } - - /** - * Adds unigrams to the dictionary. - */ - private void addUnigrams(final int number, final FusionDictionary dict, - final List<String> words, final Map<String, List<String>> shortcutMap) { - for (int i = 0; i < number; ++i) { - final String word = words.get(i); - final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); - if (shortcutMap != null && shortcutMap.containsKey(word)) { - for (final String shortcut : shortcutMap.get(word)) { - shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); - } - } - dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, - false /* isNotAWord */); - } - } - - private void addBigrams(final FusionDictionary dict, - final List<String> words, - final SparseArray<List<Integer>> bigrams) { - for (int i = 0; i < bigrams.size(); ++i) { - final int w1 = bigrams.keyAt(i); - for (int w2 : bigrams.valueAt(i)) { - dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); - } - } - } - - private long timeWritingDictToFile(final File file, final FusionDictionary dict, - final FormatSpec.FormatOptions formatOptions) { - - long now = -1, diff = -1; - - try { - final FileOutputStream out = new FileOutputStream(file); - - now = System.currentTimeMillis(); - BinaryDictInputOutput.writeDictionaryBinary(out, dict, formatOptions); - diff = System.currentTimeMillis() - now; - - out.flush(); - out.close(); - } catch (IOException e) { - Log.e(TAG, "IO exception while writing file", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "UnsupportedFormatException", e); - } - - return diff; - } - - private void checkDictionary(final FusionDictionary dict, final List<String> words, - final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap) { - assertNotNull(dict); - - // check unigram - for (final String word : words) { - final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word); - assertNotNull(cg); - } - - // check bigram - for (int i = 0; i < bigrams.size(); ++i) { - final int w1 = bigrams.keyAt(i); - for (final int w2 : bigrams.valueAt(i)) { - final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1)); - assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2))); - } - } - - // check shortcut - if (shortcutMap != null) { - for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { - final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey()); - for (final String word : entry.getValue()) { - assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, - group.getShortcut(word)); - } - } - } - } - - private String outputOptions(final int bufferType, - final FormatSpec.FormatOptions formatOptions) { - String result = " : buffer type = " - + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); - result += " : version = " + formatOptions.mVersion; - return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate; - } - - // Tests for readDictionaryBinary and writeDictionaryBinary - - private long timeReadingAndCheckDict(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap, - final int bufferType) { - long now, diff = -1; - final FusionDictionaryBufferInterface buffer = getBuffer(file, bufferType); - assertNotNull(buffer); - - FusionDictionary dict = null; - try { - now = System.currentTimeMillis(); - dict = BinaryDictInputOutput.readDictionaryBinary(buffer, null); - diff = System.currentTimeMillis() - now; - } catch (IOException e) { - Log.e(TAG, "IOException while reading dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); - } - - checkDictionary(dict, words, bigrams, shortcutMap); - return diff; - } - - // Tests for readDictionaryBinary and writeDictionaryBinary - private String runReadAndWrite(final List<String> words, - final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcuts, - final int bufferType, final FormatSpec.FormatOptions formatOptions, - final String message) { - File file = null; - try { - file = File.createTempFile("runReadAndWrite", ".dict", getContext().getCacheDir()); - } catch (IOException e) { - Log.e(TAG, "IOException", e); - } - assertNotNull(file); - - final FusionDictionary dict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); - addUnigrams(words.size(), dict, words, shortcuts); - addBigrams(dict, words, bigrams); - checkDictionary(dict, words, bigrams, shortcuts); - - final long write = timeWritingDictToFile(file, dict, formatOptions); - final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); - - return "PROF: read=" + read + "ms, write=" + write + "ms :" + message - + " : " + outputOptions(bufferType, formatOptions); - } - - private void runReadAndWriteTests(final List<String> results, final int bufferType, - final FormatSpec.FormatOptions formatOptions) { - results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, - formatOptions, "unigram")); - results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, - formatOptions, "chain")); - results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, - formatOptions, "star")); - } - - public void testReadAndWriteWithByteBuffer() { - final List<String> results = CollectionUtils.newArrayList(); - - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - public void testReadAndWriteWithByteArray() { - final List<String> results = CollectionUtils.newArrayList(); - - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - // Tests for readUnigramsAndBigramsBinary - - private void checkWordMap(final List<String> expectedWords, - final SparseArray<List<Integer>> expectedBigrams, - final Map<Integer, String> resultWords, - final Map<Integer, Integer> resultFrequencies, - final Map<Integer, ArrayList<PendingAttribute>> resultBigrams) { - // check unigrams - final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); - final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); - assertEquals(actualWordsSet, expectedWordsSet); - - for (int freq : resultFrequencies.values()) { - assertEquals(freq, UNIGRAM_FREQ); - } - - // check bigrams - final Map<String, List<String>> expBigrams = new HashMap<String, List<String>>(); - for (int i = 0; i < expectedBigrams.size(); ++i) { - final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); - for (int w2 : expectedBigrams.valueAt(i)) { - if (expBigrams.get(word1) == null) { - expBigrams.put(word1, new ArrayList<String>()); - } - expBigrams.get(word1).add(expectedWords.get(w2)); - } - } - - final Map<String, List<String>> actBigrams = new HashMap<String, List<String>>(); - for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { - final String word1 = resultWords.get(entry.getKey()); - final int unigramFreq = resultFrequencies.get(entry.getKey()); - for (PendingAttribute attr : entry.getValue()) { - final String word2 = resultWords.get(attr.mAddress); - if (actBigrams.get(word1) == null) { - actBigrams.put(word1, new ArrayList<String>()); - } - actBigrams.get(word1).add(word2); - - final int bigramFreq = BinaryDictInputOutput.reconstructBigramFrequency( - unigramFreq, attr.mFrequency); - assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); - } - } - - assertEquals(actBigrams, expBigrams); - } - - private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams, final int bufferType) { - FileInputStream inStream = null; - - final Map<Integer, String> resultWords = CollectionUtils.newTreeMap(); - final Map<Integer, ArrayList<PendingAttribute>> resultBigrams = - CollectionUtils.newTreeMap(); - final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); - - long now = -1, diff = -1; - final FusionDictionaryBufferInterface buffer = getBuffer(file, bufferType); - assertNotNull("Can't get buffer.", buffer); - try { - now = System.currentTimeMillis(); - BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, resultWords, resultFreqs, - resultBigrams); - diff = System.currentTimeMillis() - now; - } catch (IOException e) { - Log.e(TAG, "IOException", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "UnsupportedFormatException", e); - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } - } - - checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); - return diff; - } - - private String runReadUnigramsAndBigramsBinary(final List<String> words, - final SparseArray<List<Integer>> bigrams, final int bufferType, - final FormatSpec.FormatOptions formatOptions, final String message) { - File file = null; - try { - file = File.createTempFile("runReadUnigrams", ".dict", getContext().getCacheDir()); - } catch (IOException e) { - Log.e(TAG, "IOException", e); - } - assertNotNull(file); - - // making the dictionary from lists of words. - final FusionDictionary dict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions( - new HashMap<String, String>(), false, false)); - addUnigrams(words.size(), dict, words, null /* shortcutMap */); - addBigrams(dict, words, bigrams); - - timeWritingDictToFile(file, dict, formatOptions); - - long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType); - long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, - bufferType); - - return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap - + " : " + message + " : " + outputOptions(bufferType, formatOptions); - } - - private void runReadUnigramsAndBigramsTests(final List<String> results, final int bufferType, - final FormatSpec.FormatOptions formatOptions) { - results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, - formatOptions, "unigram")); - results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, - formatOptions, "chain")); - results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, - formatOptions, "star")); - } - - public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { - final List<String> results = CollectionUtils.newArrayList(); - - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - public void testReadUnigramsAndBigramsBinaryWithByteArray() { - final List<String> results = CollectionUtils.newArrayList(); - - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - // Tests for getTerminalPosition - private String getWordFromBinary(final FusionDictionaryBufferInterface buffer, - final int address) { - if (buffer.position() != 0) buffer.position(0); - - FileHeader header = null; - try { - header = BinaryDictInputOutput.readHeader(buffer); - } catch (IOException e) { - return null; - } catch (UnsupportedFormatException e) { - return null; - } - if (header == null) return null; - return BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, - address - header.mHeaderSize, header.mFormatOptions).mWord; - } - - private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer, - final String word, int index, boolean contained) { - final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; - long diff = -1; - int position = -1; - try { - final long now = System.nanoTime(); - position = BinaryDictIOUtils.getTerminalPosition(buffer, word); - diff = System.nanoTime() - now; - } catch (IOException e) { - Log.e(TAG, "IOException while getTerminalPosition", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); - } - - assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); - if (contained) assertEquals(getWordFromBinary(buffer, position), word); - return diff; - } - - public void testGetTerminalPosition() { - File file = null; - try { - file = File.createTempFile("testGetTerminalPosition", ".dict", - getContext().getCacheDir()); - } catch (IOException e) { - // do nothing - } - assertNotNull(file); - - final FusionDictionary dict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions( - new HashMap<String, String>(), false, false)); - addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); - timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); - - final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY); - - try { - // too long word - final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; - assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, longWord)); - - // null - assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, null)); - - // empty string - assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, "")); - } catch (IOException e) { - } catch (UnsupportedFormatException e) { - } - - // Test a word that is contained within the dictionary. - long sum = 0; - for (int i = 0; i < sWords.size(); ++i) { - final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true); - sum += time == -1 ? 0 : time; - } - Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); - - // Test a word that isn't contained within the dictionary. - final Random random = new Random((int)System.currentTimeMillis()); - for (int i = 0; i < 1000; ++i) { - final String word = generateWord(random); - if (sWords.indexOf(word) != -1) continue; - runGetTerminalPosition(buffer, word, i, false); - } - } - - public void testDeleteWord() { - File file = null; - try { - file = File.createTempFile("testDeleteWord", ".dict", getContext().getCacheDir()); - } catch (IOException e) { - // do nothing - } - assertNotNull(file); - - final FusionDictionary dict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions( - new HashMap<String, String>(), false, false)); - addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); - timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); - - final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY); - - try { - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0))); - BinaryDictIOUtils.deleteWord(buffer, sWords.get(0)); - assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0))); - - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5))); - BinaryDictIOUtils.deleteWord(buffer, sWords.get(5)); - assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5))); - } catch (IOException e) { - } catch (UnsupportedFormatException e) { - } - } -} diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index 47885f023..a83749499 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -21,46 +21,52 @@ import android.test.MoreAsserts; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; -import com.android.inputmethod.latin.CollectionUtils; -import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper; -import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.CollectionUtils; import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.HashMap; import java.util.Random; @LargeTest -public class BinaryDictIOUtilsTests extends AndroidTestCase { +public class BinaryDictIOUtilsTests extends AndroidTestCase { private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); private static final FormatSpec.FormatOptions FORMAT_OPTIONS = new FormatSpec.FormatOptions(3, true); - private static final int MAX_UNIGRAMS = 1500; private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); + public static final int DEFAULT_MAX_UNIGRAMS = 1500; + private final int mMaxUnigrams; + + private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String[] CHARACTERS = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji - "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDeD7" /* 𨛗 */ // surrogate pair + "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDED7" /* 𨛗 */ // surrogate pair }; public BinaryDictIOUtilsTests() { + // 1500 is the default max unigrams + this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); + } + + public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) { super(); - final Random random = new Random(123456); + Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams); + mMaxUnigrams = maxUnigrams; + final Random random = new Random(seed); sWords.clear(); - for (int i = 0; i < MAX_UNIGRAMS; ++i) { + for (int i = 0; i < maxUnigrams; ++i) { sWords.add(generateWord(random.nextInt())); } } @@ -78,8 +84,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { return builder.toString(); } - private static void printCharGroup(final CharGroupInfo info) { - Log.d(TAG, " CharGroup at " + info.mOriginalAddress); + private static void printPtNode(final PtNodeInfo info) { + Log.d(TAG, " PtNode at " + info.mOriginalAddress); Log.d(TAG, " flags = " + info.mFlags); Log.d(TAG, " parentAddress = " + info.mParentAddress); Log.d(TAG, " characters = " + new String(info.mCharacters, 0, @@ -103,70 +109,75 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { Log.d(TAG, " end address = " + info.mEndAddress); } - private static void printNode(final FusionDictionaryBufferInterface buffer, + private static void printNode(final Ver3DictDecoder dictDecoder, final FormatSpec.FormatOptions formatOptions) { - Log.d(TAG, "Node at " + buffer.position()); - final int count = BinaryDictInputOutput.readCharGroupCount(buffer); - Log.d(TAG, " charGroupCount = " + count); + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); + Log.d(TAG, "Node at " + dictBuffer.position()); + final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer); + Log.d(TAG, " ptNodeCount = " + count); for (int i = 0; i < count; ++i) { - final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, - buffer.position(), formatOptions); - printCharGroup(currentInfo); + final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(), + formatOptions); + printPtNode(currentInfo); } if (formatOptions.mSupportsDynamicUpdate) { - final int forwardLinkAddress = buffer.readUnsignedInt24(); + final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); } } - private static void printBinaryFile(final FusionDictionaryBufferInterface buffer) + @SuppressWarnings("unused") + private static void printBinaryFile(final Ver3DictDecoder dictDecoder) throws IOException, UnsupportedFormatException { - FileHeader header = BinaryDictInputOutput.readHeader(buffer); - while (buffer.position() < buffer.limit()) { - printNode(buffer, header.mFormatOptions); + final FileHeader fileHeader = dictDecoder.readHeader(); + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); + while (dictBuffer.position() < dictBuffer.limit()) { + printNode(dictDecoder, fileHeader.mFormatOptions); } } private int getWordPosition(final File file, final String word) { int position = FormatSpec.NOT_VALID_WORD; - FileInputStream inStream = null; + try { - inStream = new FileInputStream(file); - final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( - inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); - position = BinaryDictIOUtils.getTerminalPosition(buffer, word); + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, + DictDecoder.USE_READONLY_BYTEBUFFER); + position = dictDecoder.getTerminalPosition(word); } catch (IOException e) { } catch (UnsupportedFormatException e) { - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } } return position; } - private CharGroupInfo findWordFromFile(final File file, final String word) { - FileInputStream inStream = null; - CharGroupInfo info = null; + /** + * Find a word using the DictDecoder. + * + * @param dictDecoder the dict decoder + * @param word the word searched + * @return the found ptNodeInfo + * @throws IOException + * @throws UnsupportedFormatException + */ + private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder, + final String word) throws IOException, UnsupportedFormatException { + int position = dictDecoder.getTerminalPosition(word); + if (position != FormatSpec.NOT_VALID_WORD) { + dictDecoder.setPosition(0); + final FileHeader header = dictDecoder.readHeader(); + dictDecoder.setPosition(position); + return dictDecoder.readPtNode(position, header.mFormatOptions); + } + return null; + } + + private PtNodeInfo findWordFromFile(final File file, final String word) { + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); + PtNodeInfo info = null; try { - inStream = new FileInputStream(file); - final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( - inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); - info = BinaryDictIOUtils.findWordFromBuffer(buffer, word); + dictDecoder.openDictBuffer(); + info = findWordByBinaryDictReader(dictDecoder, word); } catch (IOException e) { } catch (UnsupportedFormatException e) { - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } } return info; } @@ -175,42 +186,34 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { private long insertAndCheckWord(final File file, final String word, final int frequency, final boolean exist, final ArrayList<WeightedString> bigrams, final ArrayList<WeightedString> shortcuts) { - RandomAccessFile raFile = null; BufferedOutputStream outStream = null; - FusionDictionaryBufferInterface buffer = null; long amountOfTime = -1; try { - raFile = new RandomAccessFile(file, "rw"); - buffer = new ByteBufferWrapper(raFile.getChannel().map( - FileChannel.MapMode.READ_WRITE, 0, file.length())); + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, + DictDecoder.USE_WRITABLE_BYTEBUFFER); + dictDecoder.openDictBuffer(); outStream = new BufferedOutputStream(new FileOutputStream(file, true)); if (!exist) { assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); } final long now = System.nanoTime(); - BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, shortcuts, - false, false); + DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams, + shortcuts, false, false); amountOfTime = System.nanoTime() - now; outStream.flush(); MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); outStream.close(); - raFile.close(); } catch (IOException e) { + Log.e(TAG, "Raised an IOException while inserting a word", e); } catch (UnsupportedFormatException e) { + Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e); } finally { if (outStream != null) { try { outStream.close(); } catch (IOException e) { - // do nothing - } - } - if (raFile != null) { - try { - raFile.close(); - } catch (IOException e) { - // do nothing + Log.e(TAG, "Failed to close the output stream", e); } } } @@ -218,65 +221,48 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } private void deleteWord(final File file, final String word) { - RandomAccessFile raFile = null; - FusionDictionaryBufferInterface buffer = null; try { - raFile = new RandomAccessFile(file, "rw"); - buffer = new ByteBufferWrapper(raFile.getChannel().map( - FileChannel.MapMode.READ_WRITE, 0, file.length())); - BinaryDictIOUtils.deleteWord(buffer, word); + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, + DictDecoder.USE_WRITABLE_BYTEBUFFER); + dictDecoder.openDictBuffer(); + DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word); } catch (IOException e) { } catch (UnsupportedFormatException e) { - } finally { - if (raFile != null) { - try { - raFile.close(); - } catch (IOException e) { - // do nothing - } - } } } private void checkReverseLookup(final File file, final String word, final int position) { - FileInputStream inStream = null; + try { - inStream = new FileInputStream(file); - final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( - inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); - final FileHeader header = BinaryDictInputOutput.readHeader(buffer); - assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, - position - header.mHeaderSize, header.mFormatOptions).mWord); + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); + final FileHeader fileHeader = dictDecoder.readHeader(); + assertEquals(word, + BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize, + position, fileHeader.mFormatOptions).mWord); } catch (IOException e) { + Log.e(TAG, "Raised an IOException while looking up a word", e); } catch (UnsupportedFormatException e) { - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } + Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e); } } public void testInsertWord() { File file = null; try { - file = File.createTempFile("testInsertWord", ".dict", getContext().getCacheDir()); + file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION, + getContext().getCacheDir()); } catch (IOException e) { fail("IOException while creating temporary file: " + e); } // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); dict.add("abcd", 10, null, false); try { - final FileOutputStream out = new FileOutputStream(file); - BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); - out.close(); + final DictEncoder dictEncoder = new Ver3DictEncoder(file); + dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } catch (UnsupportedFormatException e) { @@ -313,22 +299,21 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { public void testInsertWordWithBigrams() { File file = null; try { - file = File.createTempFile("testInsertWordWithBigrams", ".dict", + file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); } catch (IOException e) { fail("IOException while creating temporary file: " + e); } // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); dict.add("abcd", 10, null, false); dict.add("efgh", 15, null, false); try { - final FileOutputStream out = new FileOutputStream(file); - BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); - out.close(); + final DictEncoder dictEncoder = new Ver3DictEncoder(file); + dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } catch (UnsupportedFormatException e) { @@ -341,7 +326,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { insertAndCheckWord(file, "banana", 0, false, null, null); insertAndCheckWord(file, "recursive", 60, true, banana, null); - final CharGroupInfo info = findWordFromFile(file, "recursive"); + final PtNodeInfo info = findWordFromFile(file, "recursive"); int bananaPos = getWordPosition(file, "banana"); assertNotNull(info.mBigrams); assertEquals(info.mBigrams.size(), 1); @@ -351,21 +336,21 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { public void testRandomWords() { File file = null; try { - file = File.createTempFile("testRandomWord", ".dict", getContext().getCacheDir()); + file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION, + getContext().getCacheDir()); } catch (IOException e) { } assertNotNull(file); // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, false)); dict.add("initial", 10, null, false); try { - final FileOutputStream out = new FileOutputStream(file); - BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); - out.close(); + final DictEncoder dictEncoder = new Ver3DictEncoder(file); + dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); } catch (IOException e) { assertTrue(false); } catch (UnsupportedFormatException e) { @@ -390,6 +375,6 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms."); Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms."); - Log.d(TAG, "avg = " + ((double)sum/MAX_UNIGRAMS/1000000) + " ms."); + Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms."); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java b/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java new file mode 100644 index 000000000..36b958af8 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import java.util.Random; + +// Utility methods related with code points used for tests. +public class CodePointUtils { + private CodePointUtils() { + // This utility class is not publicly instantiable. + } + + public static int[] generateCodePointSet(final int codePointSetSize, final Random random) { + final int[] codePointSet = new int[codePointSetSize]; + for (int i = codePointSet.length - 1; i >= 0; ) { + final int r = Math.abs(random.nextInt()); + if (r < 0) continue; + // Don't insert 0~0x20, but insert any other code point. + // Code points are in the range 0~0x10FFFF. + final int candidateCodePoint = 0x20 + r % (Character.MAX_CODE_POINT - 0x20); + // Code points between MIN_ and MAX_SURROGATE are not valid on their own. + if (candidateCodePoint >= Character.MIN_SURROGATE + && candidateCodePoint <= Character.MAX_SURROGATE) continue; + codePointSet[i] = candidateCodePoint; + --i; + } + return codePointSet; + } + + /** + * Generates a random word. + */ + public static String generateWord(final Random random, final int[] codePointSet) { + StringBuilder builder = new StringBuilder(); + // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward + // longer words. This should be closer to natural language, and more importantly, it will + // exercise the algorithms in dicttool much more. + final int count = 1 + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5) + + (Math.abs(random.nextInt()) % 5); + while (builder.length() < count) { + builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]); + } + return builder.toString(); + } +} diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java new file mode 100644 index 000000000..132483d5e --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Log; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Random; + +/** + * Unit tests for SparseTable. + */ +@LargeTest +public class SparseTableTests extends AndroidTestCase { + private static final String TAG = SparseTableTests.class.getSimpleName(); + + private static final int[] SMALL_INDEX = { SparseTable.NOT_EXIST, 0 }; + private static final int[] BIG_INDEX = { SparseTable.NOT_EXIST, 1, 2, 3, 4, 5, 6, 7}; + + private final Random mRandom; + private final ArrayList<Integer> mRandomIndex; + + private static final int DEFAULT_SIZE = 10000; + private static final int BLOCK_SIZE = 8; + + public SparseTableTests() { + this(System.currentTimeMillis(), DEFAULT_SIZE); + } + + public SparseTableTests(final long seed, final int tableSize) { + super(); + Log.d(TAG, "Seed for test is " + seed + ", size is " + tableSize); + mRandom = new Random(seed); + mRandomIndex = new ArrayList<Integer>(tableSize); + for (int i = 0; i < tableSize; ++i) { + mRandomIndex.add(SparseTable.NOT_EXIST); + } + } + + public void testInitializeWithArray() { + final SparseTable table = new SparseTable(SMALL_INDEX, BIG_INDEX, BLOCK_SIZE); + for (int i = 0; i < 8; ++i) { + assertEquals(SparseTable.NOT_EXIST, table.get(i)); + } + assertEquals(SparseTable.NOT_EXIST, table.get(8)); + for (int i = 9; i < 16; ++i) { + assertEquals(i - 8, table.get(i)); + } + } + + public void testSet() { + final SparseTable table = new SparseTable(16, BLOCK_SIZE); + table.set(3, 6); + table.set(8, 16); + for (int i = 0; i < 16; ++i) { + if (i == 3 || i == 8) { + assertEquals(i * 2, table.get(i)); + } else { + assertEquals(SparseTable.NOT_EXIST, table.get(i)); + } + } + } + + private void generateRandomIndex(final int size, final int prop) { + for (int i = 0; i < DEFAULT_SIZE; ++i) { + if (mRandom.nextInt(100) < prop) { + mRandomIndex.set(i, mRandom.nextInt()); + } else { + mRandomIndex.set(i, SparseTable.NOT_EXIST); + } + } + } + + private void runTestRandomSet() { + final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE); + int elementCount = 0; + for (int i = 0; i < DEFAULT_SIZE; ++i) { + if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) { + table.set(i, mRandomIndex.get(i)); + elementCount++; + } + } + + Log.d(TAG, "table size = " + table.getLookupTableSize() + " + " + + table.getContentTableSize()); + Log.d(TAG, "the table has " + elementCount + " elements"); + for (int i = 0; i < DEFAULT_SIZE; ++i) { + assertEquals(table.get(i), (int)mRandomIndex.get(i)); + } + + // flush and reload + OutputStream lookupOutStream = null; + OutputStream contentOutStream = null; + InputStream lookupInStream = null; + InputStream contentInStream = null; + try { + final File lookupIndexFile = File.createTempFile("testRandomSet", ".small"); + final File contentFile = File.createTempFile("testRandomSet", ".big"); + lookupOutStream = new FileOutputStream(lookupIndexFile); + contentOutStream = new FileOutputStream(contentFile); + table.write(lookupOutStream, contentOutStream); + lookupInStream = new FileInputStream(lookupIndexFile); + contentInStream = new FileInputStream(contentFile); + final byte[] lookupArray = new byte[(int) lookupIndexFile.length()]; + final byte[] contentArray = new byte[(int) contentFile.length()]; + lookupInStream.read(lookupArray); + contentInStream.read(contentArray); + final SparseTable newTable = new SparseTable(lookupArray, contentArray, BLOCK_SIZE); + for (int i = 0; i < DEFAULT_SIZE; ++i) { + assertEquals(table.get(i), newTable.get(i)); + } + } catch (IOException e) { + Log.d(TAG, "IOException while flushing and realoding", e); + } finally { + if (lookupOutStream != null) { + try { + lookupOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing the stream", e); + } + } + if (contentOutStream != null) { + try { + contentOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing contentStream.", e); + } + } + } + } + + public void testRandomSet() { + for (int i = 0; i <= 100; i += 10) { + generateRandomIndex(DEFAULT_SIZE, i); + runTestRandomSet(); + } + } +} diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java new file mode 100644 index 000000000..9611599b9 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; +import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFromByteArrayFactory; +import com.android.inputmethod.latin.makedict.DictDecoder. + DictionaryBufferFromReadOnlyByteBufferFactory; +import com.android.inputmethod.latin.makedict.DictDecoder. + DictionaryBufferFromWritableByteBufferFactory; + +import android.test.AndroidTestCase; +import android.util.Log; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +/** + * Unit tests for Ver3DictDecoder + */ +public class Ver3DictDecoderTests extends AndroidTestCase { + private static final String TAG = Ver3DictDecoderTests.class.getSimpleName(); + + private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + + // Utilities for testing + public void writeDataToFile(final File file) { + FileOutputStream outStream = null; + try { + outStream = new FileOutputStream(file); + outStream.write(data); + } catch (IOException e) { + fail ("Can't write data to the test file"); + } finally { + if (outStream != null) { + try { + outStream.close(); + } catch (IOException e) { + Log.e(TAG, "Failed to close the output stream", e); + } + } + } + } + + @SuppressWarnings("null") + public void runTestOpenBuffer(final String testName, final DictionaryBufferFactory factory) { + File testFile = null; + try { + testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); + } catch (IOException e) { + Log.e(TAG, "IOException while the creating temporary file", e); + } + + assertNotNull(testFile); + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(testFile, factory); + try { + dictDecoder.openDictBuffer(); + } catch (Exception e) { + Log.e(TAG, "Failed to open the buffer", e); + } + + writeDataToFile(testFile); + + try { + dictDecoder.openDictBuffer(); + } catch (Exception e) { + Log.e(TAG, "Raised the exception while opening buffer", e); + } + + assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity()); + } + + public void testOpenBufferWithByteBuffer() { + runTestOpenBuffer("testOpenBufferWithByteBuffer", + new DictionaryBufferFromReadOnlyByteBufferFactory()); + } + + public void testOpenBufferWithByteArray() { + runTestOpenBuffer("testOpenBufferWithByteArray", + new DictionaryBufferFromByteArrayFactory()); + } + + public void testOpenBufferWithWritableByteBuffer() { + runTestOpenBuffer("testOpenBufferWithWritableByteBuffer", + new DictionaryBufferFromWritableByteBufferFactory()); + } + + @SuppressWarnings("null") + public void runTestGetBuffer(final String testName, final DictionaryBufferFactory factory) { + File testFile = null; + try { + testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); + } catch (IOException e) { + Log.e(TAG, "IOException while the creating temporary file", e); + } + + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(testFile, factory); + + // the default return value of getBuffer() must be null. + assertNull("the default return value of getBuffer() is not null", + dictDecoder.getDictBuffer()); + + writeDataToFile(testFile); + assertTrue(testFile.exists()); + Log.d(TAG, "file length = " + testFile.length()); + + DictBuffer dictBuffer = null; + try { + dictBuffer = dictDecoder.openAndGetDictBuffer(); + } catch (IOException e) { + Log.e(TAG, "Failed to open and get the buffer", e); + } + assertNotNull("the buffer must not be null", dictBuffer); + + for (int i = 0; i < data.length; ++i) { + assertEquals(data[i], dictBuffer.readUnsignedByte()); + } + } + + public void testGetBufferWithByteBuffer() { + runTestGetBuffer("testGetBufferWithByteBuffer", + new DictionaryBufferFromReadOnlyByteBufferFactory()); + } + + public void testGetBufferWithByteArray() { + runTestGetBuffer("testGetBufferWithByteArray", + new DictionaryBufferFromByteArrayFactory()); + } + + public void testGetBufferWithWritableByteBuffer() { + runTestGetBuffer("testGetBufferWithWritableByteBuffer", + new DictionaryBufferFromWritableByteBufferFactory()); + } +} |