aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/makedict
diff options
context:
space:
mode:
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict')
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java684
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java601
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java217
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java65
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java160
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java150
6 files changed, 1160 insertions, 717 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
new file mode 100644
index 000000000..a4d94262f
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -0,0 +1,684 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import android.test.AndroidTestCase;
+import android.test.MoreAsserts;
+import android.test.suitebuilder.annotation.LargeTest;
+import android.util.Log;
+import android.util.SparseArray;
+
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
+import com.android.inputmethod.latin.utils.CollectionUtils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
+ */
+@LargeTest
+public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
+ private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
+ private static final int DEFAULT_MAX_UNIGRAMS = 100;
+ private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
+ private static final int UNIGRAM_FREQ = 10;
+ private static final int BIGRAM_FREQ = 50;
+ private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
+ private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
+ private static final int NUM_OF_SHORTCUTS = 5;
+
+ private static final int USE_BYTE_ARRAY = 1;
+ private static final int USE_BYTE_BUFFER = 2;
+
+ private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
+ private static final SparseArray<List<Integer>> sEmptyBigrams =
+ CollectionUtils.newSparseArray();
+ private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
+ private static final SparseArray<List<Integer>> sChainBigrams =
+ CollectionUtils.newSparseArray();
+ private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
+
+ private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
+ private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
+ new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */);
+ private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
+ new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
+ private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
+ new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */);
+ private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
+ new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */);
+
+ private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
+
+ public BinaryDictDecoderEncoderTests() {
+ this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
+ }
+
+ public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
+ super();
+ Log.e(TAG, "Testing dictionary: seed is " + seed);
+ final Random random = new Random(seed);
+ sWords.clear();
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
+ random);
+ generateWords(maxUnigrams, random, codePointSet);
+
+ for (int i = 0; i < sWords.size(); ++i) {
+ sChainBigrams.put(i, new ArrayList<Integer>());
+ if (i > 0) {
+ sChainBigrams.get(i - 1).add(i);
+ }
+ }
+
+ sStarBigrams.put(0, new ArrayList<Integer>());
+ for (int i = 1; i < sWords.size(); ++i) {
+ sStarBigrams.get(0).add(i);
+ }
+
+ sShortcuts.clear();
+ for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
+ final int from = Math.abs(random.nextInt()) % sWords.size();
+ sShortcuts.put(sWords.get(from), new ArrayList<String>());
+ for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
+ final int to = Math.abs(random.nextInt()) % sWords.size();
+ sShortcuts.get(sWords.get(from)).add(sWords.get(to));
+ }
+ }
+ }
+
+ private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) {
+ if (formatOptions.mVersion == FormatSpec.VERSION4) {
+ return new Ver4DictEncoder(getContext().getCacheDir());
+ } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) {
+ return new Ver3DictEncoder(file);
+ } else {
+ throw new RuntimeException("The format option has a wrong version : "
+ + formatOptions.mVersion);
+ }
+ }
+
+ private void generateWords(final int number, final Random random, final int[] codePointSet) {
+ final Set<String> wordSet = CollectionUtils.newHashSet();
+ while (wordSet.size() < number) {
+ wordSet.add(CodePointUtils.generateWord(random, codePointSet));
+ }
+ sWords.addAll(wordSet);
+ }
+
+ /**
+ * Adds unigrams to the dictionary.
+ */
+ private void addUnigrams(final int number, final FusionDictionary dict,
+ final List<String> words, final HashMap<String, List<String>> shortcutMap) {
+ for (int i = 0; i < number; ++i) {
+ final String word = words.get(i);
+ final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
+ if (shortcutMap != null && shortcutMap.containsKey(word)) {
+ for (final String shortcut : shortcutMap.get(word)) {
+ shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
+ }
+ }
+ dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts,
+ false /* isNotAWord */);
+ }
+ }
+
+ private void addBigrams(final FusionDictionary dict,
+ final List<String> words,
+ final SparseArray<List<Integer>> bigrams) {
+ for (int i = 0; i < bigrams.size(); ++i) {
+ final int w1 = bigrams.keyAt(i);
+ for (int w2 : bigrams.valueAt(i)) {
+ dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ);
+ }
+ }
+ }
+
+// The following is useful to dump the dictionary into a textual file, but it can't compile
+// on-device, so it's commented out.
+// private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
+// throws IOException {
+// com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
+// new java.io.FileWriter(new File(filename)), dict);
+// }
+
+ private long timeWritingDictToFile(final File file, final FusionDictionary dict,
+ final FormatSpec.FormatOptions formatOptions) {
+
+ long now = -1, diff = -1;
+
+ try {
+ final DictEncoder dictEncoder = getDictEncoder(file, formatOptions);
+
+ now = System.currentTimeMillis();
+ // If you need to dump the dict to a textual file, uncomment the line below and the
+ // function above
+ // dumpToCombinedFileForDebug(file, "/tmp/foo");
+ dictEncoder.writeDictionary(dict, formatOptions);
+ diff = System.currentTimeMillis() - now;
+ } catch (IOException e) {
+ Log.e(TAG, "IO exception while writing file", e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "UnsupportedFormatException", e);
+ }
+
+ return diff;
+ }
+
+ private void checkDictionary(final FusionDictionary dict, final List<String> words,
+ final SparseArray<List<Integer>> bigrams,
+ final HashMap<String, List<String>> shortcutMap) {
+ assertNotNull(dict);
+
+ // check unigram
+ for (final String word : words) {
+ final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
+ assertNotNull(ptNode);
+ }
+
+ // check bigram
+ for (int i = 0; i < bigrams.size(); ++i) {
+ final int w1 = bigrams.keyAt(i);
+ for (final int w2 : bigrams.valueAt(i)) {
+ final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
+ words.get(w1));
+ assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
+ }
+ }
+
+ // check shortcut
+ if (shortcutMap != null) {
+ for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
+ assertTrue(words.contains(entry.getKey()));
+ final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
+ entry.getKey());
+ for (final String word : entry.getValue()) {
+ assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
+ ptNode.getShortcut(word));
+ }
+ }
+ }
+ }
+
+ private String outputOptions(final int bufferType,
+ final FormatSpec.FormatOptions formatOptions) {
+ String result = " : buffer type = "
+ + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
+ result += " : version = " + formatOptions.mVersion;
+ return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate;
+ }
+
+ private DictionaryOptions getDictionaryOptions(final String id, final String version) {
+ final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(),
+ false, false);
+ options.mAttributes.put("version", version);
+ options.mAttributes.put("dictionary", id);
+ return options;
+ }
+
+ private File setUpDictionaryFile(final String name, final String version) {
+ File file = null;
+ try {
+ file = new File(getContext().getCacheDir(), name + "." + version
+ + TEST_DICT_FILE_EXTENSION);
+ file.createNewFile();
+ } catch (IOException e) {
+ // do nothing
+ }
+ assertTrue("Failed to create the dictionary file.", file.exists());
+ return file;
+ }
+
+ private DictDecoder getDictDecoder(final File file, final int bufferType,
+ final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
+ if (formatOptions.mVersion == FormatSpec.VERSION4) {
+ final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
+ return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(),
+ header.getId() + "." + header.getVersion()), bufferType);
+ } else {
+ return FormatSpec.getDictDecoder(file, bufferType);
+ }
+ }
+ // Tests for readDictionaryBinary and writeDictionaryBinary
+
+ private long timeReadingAndCheckDict(final File file, final List<String> words,
+ final SparseArray<List<Integer>> bigrams,
+ final HashMap<String, List<String>> shortcutMap, final int bufferType,
+ final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
+ long now, diff = -1;
+
+ FusionDictionary dict = null;
+ try {
+ final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions,
+ dictOptions);
+ now = System.currentTimeMillis();
+ dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
+ diff = System.currentTimeMillis() - now;
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while reading dictionary", e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "Unsupported format", e);
+ }
+
+ checkDictionary(dict, words, bigrams, shortcutMap);
+ return diff;
+ }
+
+ // Tests for readDictionaryBinary and writeDictionaryBinary
+ private String runReadAndWrite(final List<String> words,
+ final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
+ final int bufferType, final FormatSpec.FormatOptions formatOptions,
+ final String message) {
+
+ final String dictName = "runReadAndWrite";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final File file = setUpDictionaryFile(dictName, dictVersion);
+
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ getDictionaryOptions(dictName, dictVersion));
+ addUnigrams(words.size(), dict, words, shortcuts);
+ addBigrams(dict, words, bigrams);
+ checkDictionary(dict, words, bigrams, shortcuts);
+
+ final long write = timeWritingDictToFile(file, dict, formatOptions);
+ final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType,
+ formatOptions, dict.mOptions);
+
+ return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
+ + " : " + outputOptions(bufferType, formatOptions);
+ }
+
+ private void runReadAndWriteTests(final List<String> results, final int bufferType,
+ final FormatSpec.FormatOptions formatOptions) {
+ results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
+ formatOptions, "unigram"));
+ results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
+ formatOptions, "chain"));
+ results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
+ formatOptions, "star"));
+ results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
+ "unigram with shortcuts"));
+ results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
+ "chain with shortcuts"));
+ results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
+ "star with shortcuts"));
+ }
+
+ // Unit test for CharEncoding.readString and CharEncoding.writeString.
+ public void testCharEncoding() {
+ // the max length of a word in sWords is less than 50.
+ // See generateWords.
+ final byte[] buffer = new byte[50 * 3];
+ final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
+ for (final String word : sWords) {
+ Log.d("testReadAndWriteString", "write : " + word);
+ Arrays.fill(buffer, (byte)0);
+ CharEncoding.writeString(buffer, 0, word);
+ dictBuffer.position(0);
+ final String str = CharEncoding.readString(dictBuffer);
+ assertEquals(word, str);
+ }
+ }
+
+ public void testReadAndWriteWithByteBuffer() {
+ final List<String> results = CollectionUtils.newArrayList();
+
+ runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2);
+ runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
+ runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
+ runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
+ runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
+
+ for (final String result : results) {
+ Log.d(TAG, result);
+ }
+ }
+
+ public void testReadAndWriteWithByteArray() {
+ final List<String> results = CollectionUtils.newArrayList();
+
+ runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2);
+ runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
+ runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
+ runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
+ runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
+
+ for (final String result : results) {
+ Log.d(TAG, result);
+ }
+ }
+
+ // Tests for readUnigramsAndBigramsBinary
+
+ private void checkWordMap(final List<String> expectedWords,
+ final SparseArray<List<Integer>> expectedBigrams,
+ final TreeMap<Integer, String> resultWords,
+ final TreeMap<Integer, Integer> resultFrequencies,
+ final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) {
+ // check unigrams
+ final Set<String> actualWordsSet = new HashSet<String>(resultWords.values());
+ final Set<String> expectedWordsSet = new HashSet<String>(expectedWords);
+ assertEquals(actualWordsSet, expectedWordsSet);
+
+ for (int freq : resultFrequencies.values()) {
+ assertEquals(freq, UNIGRAM_FREQ);
+ }
+
+ // check bigrams
+ final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>();
+ for (int i = 0; i < expectedBigrams.size(); ++i) {
+ final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
+ for (int w2 : expectedBigrams.valueAt(i)) {
+ if (expBigrams.get(word1) == null) {
+ expBigrams.put(word1, new ArrayList<String>());
+ }
+ expBigrams.get(word1).add(expectedWords.get(w2));
+ }
+ }
+
+ final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>();
+ for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
+ final String word1 = resultWords.get(entry.getKey());
+ final int unigramFreq = resultFrequencies.get(entry.getKey());
+ for (PendingAttribute attr : entry.getValue()) {
+ final String word2 = resultWords.get(attr.mAddress);
+ if (actBigrams.get(word1) == null) {
+ actBigrams.put(word1, new ArrayList<String>());
+ }
+ actBigrams.get(word1).add(word2);
+
+ final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
+ unigramFreq, attr.mFrequency);
+ assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
+ }
+ }
+
+ assertEquals(actBigrams, expBigrams);
+ }
+
+ private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
+ final SparseArray<List<Integer>> bigrams, final int bufferType,
+ final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
+ FileInputStream inStream = null;
+
+ final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap();
+ final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams =
+ CollectionUtils.newTreeMap();
+ final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
+
+ long now = -1, diff = -1;
+ try {
+ final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions,
+ dictOptions);
+ now = System.currentTimeMillis();
+ dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
+ diff = System.currentTimeMillis() - now;
+ } catch (IOException e) {
+ Log.e(TAG, "IOException", e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "UnsupportedFormatException", e);
+ } finally {
+ if (inStream != null) {
+ try {
+ inStream.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+
+ checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams);
+ return diff;
+ }
+
+ private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
+ final SparseArray<List<Integer>> bigrams, final int bufferType,
+ final FormatSpec.FormatOptions formatOptions, final String message) {
+ final String dictName = "runReadUnigrams";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final File file = setUpDictionaryFile(dictName, dictVersion);
+
+ // making the dictionary from lists of words.
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ getDictionaryOptions(dictName, dictVersion));
+ addUnigrams(words.size(), dict, words, null /* shortcutMap */);
+ addBigrams(dict, words, bigrams);
+
+ timeWritingDictToFile(file, dict, formatOptions);
+
+ long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
+ formatOptions, dict.mOptions);
+ long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
+ bufferType, formatOptions, dict.mOptions);
+
+ return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
+ + " : " + message + " : " + outputOptions(bufferType, formatOptions);
+ }
+
+ private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
+ final int bufferType, final FormatSpec.FormatOptions formatOptions) {
+ results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
+ formatOptions, "unigram"));
+ results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
+ formatOptions, "chain"));
+ results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
+ formatOptions, "star"));
+ }
+
+ public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
+ final ArrayList<String> results = CollectionUtils.newArrayList();
+
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
+
+ for (final String result : results) {
+ Log.d(TAG, result);
+ }
+ }
+
+ public void testReadUnigramsAndBigramsBinaryWithByteArray() {
+ final ArrayList<String> results = CollectionUtils.newArrayList();
+
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
+ runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
+
+ for (final String result : results) {
+ Log.d(TAG, result);
+ }
+ }
+
+ // Tests for getTerminalPosition
+ private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
+ if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
+
+ FileHeader fileHeader = null;
+ try {
+ fileHeader = dictDecoder.readHeader();
+ } catch (IOException e) {
+ return null;
+ } catch (UnsupportedFormatException e) {
+ return null;
+ }
+ if (fileHeader == null) return null;
+ return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
+ address, fileHeader.mFormatOptions).mWord;
+ }
+
+ private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
+ int index, boolean contained) {
+ final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
+ long diff = -1;
+ int position = -1;
+ try {
+ final long now = System.nanoTime();
+ position = dictDecoder.getTerminalPosition(word);
+ diff = System.nanoTime() - now;
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while getTerminalPosition", e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
+ }
+
+ assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
+ if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
+ return diff;
+ }
+
+ private void runGetTerminalPosition(final ArrayList<String> words,
+ final SparseArray<List<Integer>> bigrams, final int bufferType,
+ final FormatOptions formatOptions, final String message) {
+ final String dictName = "testGetTerminalPosition";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final File file = setUpDictionaryFile(dictName, dictVersion);
+
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ getDictionaryOptions(dictName, dictVersion));
+ addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
+ addBigrams(dict, words, bigrams);
+ timeWritingDictToFile(file, dict, formatOptions);
+
+ final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY,
+ formatOptions, dict.mOptions);
+ try {
+ dictDecoder.openDictBuffer();
+ } catch (IOException e) {
+ // ignore
+ Log.e(TAG, "IOException while opening the buffer", e);
+ }
+ assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
+
+ try {
+ // too long word
+ final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+ assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
+
+ // null
+ assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
+
+ // empty string
+ assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ }
+
+ // Test a word that is contained within the dictionary.
+ long sum = 0;
+ for (int i = 0; i < sWords.size(); ++i) {
+ final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), i, true);
+ sum += time == -1 ? 0 : time;
+ }
+ Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
+ + " : " + outputOptions(bufferType, formatOptions));
+
+ // Test a word that isn't contained within the dictionary.
+ final Random random = new Random((int)System.currentTimeMillis());
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
+ random);
+ for (int i = 0; i < 1000; ++i) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ if (sWords.indexOf(word) != -1) continue;
+ checkGetTerminalPosition(dictDecoder, word, i, false);
+ }
+ }
+
+ private void runGetTerminalPositionTests(final ArrayList<String> results, final int bufferType,
+ final FormatOptions formatOptions) {
+ runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
+ }
+
+ public void testGetTerminalPosition() {
+ final ArrayList<String> results = CollectionUtils.newArrayList();
+
+ runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION2);
+ runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
+ runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
+ runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
+ runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
+
+ runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION2);
+ runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
+ runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
+ runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
+ runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
+
+ for (final String result : results) {
+ Log.d(TAG, result);
+ }
+ }
+
+ public void testDeleteWord() {
+ final String dictName = "testDeleteWord";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final File file = setUpDictionaryFile(dictName, dictVersion);
+
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ new FusionDictionary.DictionaryOptions(
+ new HashMap<String, String>(), false, false));
+ addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
+ timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
+
+ final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, DictDecoder.USE_BYTEARRAY);
+ try {
+ dictDecoder.openDictBuffer();
+ } catch (IOException e) {
+ // ignore
+ Log.e(TAG, "IOException while opening the buffer", e);
+ }
+ assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
+
+ try {
+ MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
+ dictDecoder.getTerminalPosition(sWords.get(0)));
+ DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0));
+ assertEquals(FormatSpec.NOT_VALID_WORD,
+ dictDecoder.getTerminalPosition(sWords.get(0)));
+
+ MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
+ dictDecoder.getTerminalPosition(sWords.get(5)));
+ DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5));
+ assertEquals(FormatSpec.NOT_VALID_WORD,
+ dictDecoder.getTerminalPosition(sWords.get(5)));
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ }
+ }
+}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
deleted file mode 100644
index b704d08b3..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
+++ /dev/null
@@ -1,601 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import android.test.AndroidTestCase;
-import android.test.MoreAsserts;
-import android.test.suitebuilder.annotation.LargeTest;
-import android.util.Log;
-import android.util.SparseArray;
-
-import com.android.inputmethod.latin.CollectionUtils;
-import com.android.inputmethod.latin.UserHistoryDictIOUtils;
-import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
-import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
-import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
-import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.Set;
-
-/**
- * Unit tests for BinaryDictInputOutput
- */
-@LargeTest
-public class BinaryDictIOTests extends AndroidTestCase {
- private static final String TAG = BinaryDictIOTests.class.getSimpleName();
- private static final int MAX_UNIGRAMS = 100;
- private static final int UNIGRAM_FREQ = 10;
- private static final int BIGRAM_FREQ = 50;
- private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
-
- private static final int USE_BYTE_ARRAY = 1;
- private static final int USE_BYTE_BUFFER = 2;
-
- private static final List<String> sWords = CollectionUtils.newArrayList();
- private static final SparseArray<List<Integer>> sEmptyBigrams =
- CollectionUtils.newSparseArray();
- private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
- private static final SparseArray<List<Integer>> sChainBigrams =
- CollectionUtils.newSparseArray();
-
- private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
- private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
- new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */);
- private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
- new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
-
- public BinaryDictIOTests() {
- super();
-
- final long time = System.currentTimeMillis();
- Log.e(TAG, "Testing dictionary: seed is " + time);
- final Random random = new Random(time);
- sWords.clear();
- generateWords(MAX_UNIGRAMS, random);
-
- for (int i = 0; i < sWords.size(); ++i) {
- sChainBigrams.put(i, new ArrayList<Integer>());
- if (i > 0) {
- sChainBigrams.get(i - 1).add(i);
- }
- }
-
- sStarBigrams.put(0, new ArrayList<Integer>());
- for (int i = 1; i < sWords.size(); ++i) {
- sStarBigrams.get(0).add(i);
- }
- }
-
- // Utilities for test
-
- /**
- * Makes new buffer according to BUFFER_TYPE.
- */
- private FusionDictionaryBufferInterface getBuffer(final File file, final int bufferType) {
- FileInputStream inStream = null;
- try {
- inStream = new FileInputStream(file);
- if (bufferType == USE_BYTE_ARRAY) {
- final byte[] array = new byte[(int)file.length()];
- inStream.read(array);
- return new UserHistoryDictIOUtils.ByteArrayWrapper(array);
- } else if (bufferType == USE_BYTE_BUFFER){
- final ByteBuffer buffer = inStream.getChannel().map(
- FileChannel.MapMode.READ_ONLY, 0, file.length());
- return new BinaryDictInputOutput.ByteBufferWrapper(buffer);
- }
- } catch (IOException e) {
- Log.e(TAG, "IOException while making buffer", e);
- } finally {
- if (inStream != null) {
- try {
- inStream.close();
- } catch (IOException e) {
- Log.e(TAG, "IOException while closing stream", e);
- }
- }
- }
- return null;
- }
-
- /**
- * Generates a random word.
- */
- private String generateWord(final Random random) {
- StringBuilder builder = new StringBuilder("a");
- int count = random.nextInt() % 30; // Arbitrarily 30 chars max
- while (count > 0) {
- final long r = Math.abs(random.nextInt());
- if (r < 0) continue;
- // Don't insert 0~0x20, but insert any other code point.
- // Code points are in the range 0~0x10FFFF.
- final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20));
- // Code points between MIN_ and MAX_SURROGATE are not valid on their own.
- if (candidateCodePoint >= Character.MIN_SURROGATE
- && candidateCodePoint <= Character.MAX_SURROGATE) continue;
- builder.appendCodePoint(candidateCodePoint);
- --count;
- }
- return builder.toString();
- }
-
- private void generateWords(final int number, final Random random) {
- final Set<String> wordSet = CollectionUtils.newHashSet();
- while (wordSet.size() < number) {
- wordSet.add(generateWord(random));
- }
- sWords.addAll(wordSet);
- }
-
- /**
- * Adds unigrams to the dictionary.
- */
- private void addUnigrams(final int number, final FusionDictionary dict,
- final List<String> words, final Map<String, List<String>> shortcutMap) {
- for (int i = 0; i < number; ++i) {
- final String word = words.get(i);
- final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
- if (shortcutMap != null && shortcutMap.containsKey(word)) {
- for (final String shortcut : shortcutMap.get(word)) {
- shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
- }
- }
- dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts,
- false /* isNotAWord */);
- }
- }
-
- private void addBigrams(final FusionDictionary dict,
- final List<String> words,
- final SparseArray<List<Integer>> bigrams) {
- for (int i = 0; i < bigrams.size(); ++i) {
- final int w1 = bigrams.keyAt(i);
- for (int w2 : bigrams.valueAt(i)) {
- dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ);
- }
- }
- }
-
- private long timeWritingDictToFile(final File file, final FusionDictionary dict,
- final FormatSpec.FormatOptions formatOptions) {
-
- long now = -1, diff = -1;
-
- try {
- final FileOutputStream out = new FileOutputStream(file);
-
- now = System.currentTimeMillis();
- BinaryDictInputOutput.writeDictionaryBinary(out, dict, formatOptions);
- diff = System.currentTimeMillis() - now;
-
- out.flush();
- out.close();
- } catch (IOException e) {
- Log.e(TAG, "IO exception while writing file", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "UnsupportedFormatException", e);
- }
-
- return diff;
- }
-
- private void checkDictionary(final FusionDictionary dict, final List<String> words,
- final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap) {
- assertNotNull(dict);
-
- // check unigram
- for (final String word : words) {
- final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
- assertNotNull(cg);
- }
-
- // check bigram
- for (int i = 0; i < bigrams.size(); ++i) {
- final int w1 = bigrams.keyAt(i);
- for (final int w2 : bigrams.valueAt(i)) {
- final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1));
- assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2)));
- }
- }
-
- // check shortcut
- if (shortcutMap != null) {
- for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
- final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey());
- for (final String word : entry.getValue()) {
- assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
- group.getShortcut(word));
- }
- }
- }
- }
-
- private String outputOptions(final int bufferType,
- final FormatSpec.FormatOptions formatOptions) {
- String result = " : buffer type = "
- + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
- result += " : version = " + formatOptions.mVersion;
- return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate;
- }
-
- // Tests for readDictionaryBinary and writeDictionaryBinary
-
- private long timeReadingAndCheckDict(final File file, final List<String> words,
- final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap,
- final int bufferType) {
- long now, diff = -1;
- final FusionDictionaryBufferInterface buffer = getBuffer(file, bufferType);
- assertNotNull(buffer);
-
- FusionDictionary dict = null;
- try {
- now = System.currentTimeMillis();
- dict = BinaryDictInputOutput.readDictionaryBinary(buffer, null);
- diff = System.currentTimeMillis() - now;
- } catch (IOException e) {
- Log.e(TAG, "IOException while reading dictionary", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "Unsupported format", e);
- }
-
- checkDictionary(dict, words, bigrams, shortcutMap);
- return diff;
- }
-
- // Tests for readDictionaryBinary and writeDictionaryBinary
- private String runReadAndWrite(final List<String> words,
- final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcuts,
- final int bufferType, final FormatSpec.FormatOptions formatOptions,
- final String message) {
- File file = null;
- try {
- file = File.createTempFile("runReadAndWrite", ".dict", getContext().getCacheDir());
- } catch (IOException e) {
- Log.e(TAG, "IOException", e);
- }
- assertNotNull(file);
-
- final FusionDictionary dict = new FusionDictionary(new Node(),
- new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
- addUnigrams(words.size(), dict, words, shortcuts);
- addBigrams(dict, words, bigrams);
- checkDictionary(dict, words, bigrams, shortcuts);
-
- final long write = timeWritingDictToFile(file, dict, formatOptions);
- final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType);
-
- return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
- + " : " + outputOptions(bufferType, formatOptions);
- }
-
- private void runReadAndWriteTests(final List<String> results, final int bufferType,
- final FormatSpec.FormatOptions formatOptions) {
- results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
- formatOptions, "unigram"));
- results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
- formatOptions, "chain"));
- results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
- formatOptions, "star"));
- }
-
- public void testReadAndWriteWithByteBuffer() {
- final List<String> results = CollectionUtils.newArrayList();
-
- runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2);
- runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
- runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
-
- for (final String result : results) {
- Log.d(TAG, result);
- }
- }
-
- public void testReadAndWriteWithByteArray() {
- final List<String> results = CollectionUtils.newArrayList();
-
- runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2);
- runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
- runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
-
- for (final String result : results) {
- Log.d(TAG, result);
- }
- }
-
- // Tests for readUnigramsAndBigramsBinary
-
- private void checkWordMap(final List<String> expectedWords,
- final SparseArray<List<Integer>> expectedBigrams,
- final Map<Integer, String> resultWords,
- final Map<Integer, Integer> resultFrequencies,
- final Map<Integer, ArrayList<PendingAttribute>> resultBigrams) {
- // check unigrams
- final Set<String> actualWordsSet = new HashSet<String>(resultWords.values());
- final Set<String> expectedWordsSet = new HashSet<String>(expectedWords);
- assertEquals(actualWordsSet, expectedWordsSet);
-
- for (int freq : resultFrequencies.values()) {
- assertEquals(freq, UNIGRAM_FREQ);
- }
-
- // check bigrams
- final Map<String, List<String>> expBigrams = new HashMap<String, List<String>>();
- for (int i = 0; i < expectedBigrams.size(); ++i) {
- final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
- for (int w2 : expectedBigrams.valueAt(i)) {
- if (expBigrams.get(word1) == null) {
- expBigrams.put(word1, new ArrayList<String>());
- }
- expBigrams.get(word1).add(expectedWords.get(w2));
- }
- }
-
- final Map<String, List<String>> actBigrams = new HashMap<String, List<String>>();
- for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
- final String word1 = resultWords.get(entry.getKey());
- final int unigramFreq = resultFrequencies.get(entry.getKey());
- for (PendingAttribute attr : entry.getValue()) {
- final String word2 = resultWords.get(attr.mAddress);
- if (actBigrams.get(word1) == null) {
- actBigrams.put(word1, new ArrayList<String>());
- }
- actBigrams.get(word1).add(word2);
-
- final int bigramFreq = BinaryDictInputOutput.reconstructBigramFrequency(
- unigramFreq, attr.mFrequency);
- assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
- }
- }
-
- assertEquals(actBigrams, expBigrams);
- }
-
- private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
- final SparseArray<List<Integer>> bigrams, final int bufferType) {
- FileInputStream inStream = null;
-
- final Map<Integer, String> resultWords = CollectionUtils.newTreeMap();
- final Map<Integer, ArrayList<PendingAttribute>> resultBigrams =
- CollectionUtils.newTreeMap();
- final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
-
- long now = -1, diff = -1;
- final FusionDictionaryBufferInterface buffer = getBuffer(file, bufferType);
- assertNotNull("Can't get buffer.", buffer);
- try {
- now = System.currentTimeMillis();
- BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, resultWords, resultFreqs,
- resultBigrams);
- diff = System.currentTimeMillis() - now;
- } catch (IOException e) {
- Log.e(TAG, "IOException", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "UnsupportedFormatException", e);
- } finally {
- if (inStream != null) {
- try {
- inStream.close();
- } catch (IOException e) {
- // do nothing
- }
- }
- }
-
- checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams);
- return diff;
- }
-
- private String runReadUnigramsAndBigramsBinary(final List<String> words,
- final SparseArray<List<Integer>> bigrams, final int bufferType,
- final FormatSpec.FormatOptions formatOptions, final String message) {
- File file = null;
- try {
- file = File.createTempFile("runReadUnigrams", ".dict", getContext().getCacheDir());
- } catch (IOException e) {
- Log.e(TAG, "IOException", e);
- }
- assertNotNull(file);
-
- // making the dictionary from lists of words.
- final FusionDictionary dict = new FusionDictionary(new Node(),
- new FusionDictionary.DictionaryOptions(
- new HashMap<String, String>(), false, false));
- addUnigrams(words.size(), dict, words, null /* shortcutMap */);
- addBigrams(dict, words, bigrams);
-
- timeWritingDictToFile(file, dict, formatOptions);
-
- long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType);
- long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
- bufferType);
-
- return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
- + " : " + message + " : " + outputOptions(bufferType, formatOptions);
- }
-
- private void runReadUnigramsAndBigramsTests(final List<String> results, final int bufferType,
- final FormatSpec.FormatOptions formatOptions) {
- results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
- formatOptions, "unigram"));
- results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
- formatOptions, "chain"));
- results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
- formatOptions, "star"));
- }
-
- public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
- final List<String> results = CollectionUtils.newArrayList();
-
- runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2);
- runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
- runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
-
- for (final String result : results) {
- Log.d(TAG, result);
- }
- }
-
- public void testReadUnigramsAndBigramsBinaryWithByteArray() {
- final List<String> results = CollectionUtils.newArrayList();
-
- runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2);
- runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
- runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
-
- for (final String result : results) {
- Log.d(TAG, result);
- }
- }
-
- // Tests for getTerminalPosition
- private String getWordFromBinary(final FusionDictionaryBufferInterface buffer,
- final int address) {
- if (buffer.position() != 0) buffer.position(0);
-
- FileHeader header = null;
- try {
- header = BinaryDictInputOutput.readHeader(buffer);
- } catch (IOException e) {
- return null;
- } catch (UnsupportedFormatException e) {
- return null;
- }
- if (header == null) return null;
- return BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize,
- address - header.mHeaderSize, header.mFormatOptions).mWord;
- }
-
- private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer,
- final String word, int index, boolean contained) {
- final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
- long diff = -1;
- int position = -1;
- try {
- final long now = System.nanoTime();
- position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
- diff = System.nanoTime() - now;
- } catch (IOException e) {
- Log.e(TAG, "IOException while getTerminalPosition", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
- }
-
- assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
- if (contained) assertEquals(getWordFromBinary(buffer, position), word);
- return diff;
- }
-
- public void testGetTerminalPosition() {
- File file = null;
- try {
- file = File.createTempFile("testGetTerminalPosition", ".dict",
- getContext().getCacheDir());
- } catch (IOException e) {
- // do nothing
- }
- assertNotNull(file);
-
- final FusionDictionary dict = new FusionDictionary(new Node(),
- new FusionDictionary.DictionaryOptions(
- new HashMap<String, String>(), false, false));
- addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
- timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
-
- final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY);
-
- try {
- // too long word
- final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
- assertEquals(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, longWord));
-
- // null
- assertEquals(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, null));
-
- // empty string
- assertEquals(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, ""));
- } catch (IOException e) {
- } catch (UnsupportedFormatException e) {
- }
-
- // Test a word that is contained within the dictionary.
- long sum = 0;
- for (int i = 0; i < sWords.size(); ++i) {
- final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true);
- sum += time == -1 ? 0 : time;
- }
- Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
-
- // Test a word that isn't contained within the dictionary.
- final Random random = new Random((int)System.currentTimeMillis());
- for (int i = 0; i < 1000; ++i) {
- final String word = generateWord(random);
- if (sWords.indexOf(word) != -1) continue;
- runGetTerminalPosition(buffer, word, i, false);
- }
- }
-
- public void testDeleteWord() {
- File file = null;
- try {
- file = File.createTempFile("testDeleteWord", ".dict", getContext().getCacheDir());
- } catch (IOException e) {
- // do nothing
- }
- assertNotNull(file);
-
- final FusionDictionary dict = new FusionDictionary(new Node(),
- new FusionDictionary.DictionaryOptions(
- new HashMap<String, String>(), false, false));
- addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
- timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
-
- final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY);
-
- try {
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0)));
- BinaryDictIOUtils.deleteWord(buffer, sWords.get(0));
- assertEquals(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0)));
-
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5)));
- BinaryDictIOUtils.deleteWord(buffer, sWords.get(5));
- assertEquals(FormatSpec.NOT_VALID_WORD,
- BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5)));
- } catch (IOException e) {
- } catch (UnsupportedFormatException e) {
- }
- }
-}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
index 47885f023..a83749499 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
@@ -21,46 +21,52 @@ import android.test.MoreAsserts;
import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log;
-import com.android.inputmethod.latin.CollectionUtils;
-import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper;
-import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
-import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.CollectionUtils;
import java.io.BufferedOutputStream;
import java.io.File;
-import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Random;
@LargeTest
-public class BinaryDictIOUtilsTests extends AndroidTestCase {
+public class BinaryDictIOUtilsTests extends AndroidTestCase {
private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
new FormatSpec.FormatOptions(3, true);
- private static final int MAX_UNIGRAMS = 1500;
private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
+ public static final int DEFAULT_MAX_UNIGRAMS = 1500;
+ private final int mMaxUnigrams;
+
+ private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String[] CHARACTERS = {
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters
"\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji
- "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDeD7" /* 𨛗 */ // surrogate pair
+ "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDED7" /* 𨛗 */ // surrogate pair
};
public BinaryDictIOUtilsTests() {
+ // 1500 is the default max unigrams
+ this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
+ }
+
+ public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) {
super();
- final Random random = new Random(123456);
+ Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams);
+ mMaxUnigrams = maxUnigrams;
+ final Random random = new Random(seed);
sWords.clear();
- for (int i = 0; i < MAX_UNIGRAMS; ++i) {
+ for (int i = 0; i < maxUnigrams; ++i) {
sWords.add(generateWord(random.nextInt()));
}
}
@@ -78,8 +84,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
return builder.toString();
}
- private static void printCharGroup(final CharGroupInfo info) {
- Log.d(TAG, " CharGroup at " + info.mOriginalAddress);
+ private static void printPtNode(final PtNodeInfo info) {
+ Log.d(TAG, " PtNode at " + info.mOriginalAddress);
Log.d(TAG, " flags = " + info.mFlags);
Log.d(TAG, " parentAddress = " + info.mParentAddress);
Log.d(TAG, " characters = " + new String(info.mCharacters, 0,
@@ -103,70 +109,75 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
Log.d(TAG, " end address = " + info.mEndAddress);
}
- private static void printNode(final FusionDictionaryBufferInterface buffer,
+ private static void printNode(final Ver3DictDecoder dictDecoder,
final FormatSpec.FormatOptions formatOptions) {
- Log.d(TAG, "Node at " + buffer.position());
- final int count = BinaryDictInputOutput.readCharGroupCount(buffer);
- Log.d(TAG, " charGroupCount = " + count);
+ final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
+ Log.d(TAG, "Node at " + dictBuffer.position());
+ final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
+ Log.d(TAG, " ptNodeCount = " + count);
for (int i = 0; i < count; ++i) {
- final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
- buffer.position(), formatOptions);
- printCharGroup(currentInfo);
+ final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
+ formatOptions);
+ printPtNode(currentInfo);
}
if (formatOptions.mSupportsDynamicUpdate) {
- final int forwardLinkAddress = buffer.readUnsignedInt24();
+ final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
}
}
- private static void printBinaryFile(final FusionDictionaryBufferInterface buffer)
+ @SuppressWarnings("unused")
+ private static void printBinaryFile(final Ver3DictDecoder dictDecoder)
throws IOException, UnsupportedFormatException {
- FileHeader header = BinaryDictInputOutput.readHeader(buffer);
- while (buffer.position() < buffer.limit()) {
- printNode(buffer, header.mFormatOptions);
+ final FileHeader fileHeader = dictDecoder.readHeader();
+ final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
+ while (dictBuffer.position() < dictBuffer.limit()) {
+ printNode(dictDecoder, fileHeader.mFormatOptions);
}
}
private int getWordPosition(final File file, final String word) {
int position = FormatSpec.NOT_VALID_WORD;
- FileInputStream inStream = null;
+
try {
- inStream = new FileInputStream(file);
- final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
- inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
- position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
+ final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file,
+ DictDecoder.USE_READONLY_BYTEBUFFER);
+ position = dictDecoder.getTerminalPosition(word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
- } finally {
- if (inStream != null) {
- try {
- inStream.close();
- } catch (IOException e) {
- // do nothing
- }
- }
}
return position;
}
- private CharGroupInfo findWordFromFile(final File file, final String word) {
- FileInputStream inStream = null;
- CharGroupInfo info = null;
+ /**
+ * Find a word using the DictDecoder.
+ *
+ * @param dictDecoder the dict decoder
+ * @param word the word searched
+ * @return the found ptNodeInfo
+ * @throws IOException
+ * @throws UnsupportedFormatException
+ */
+ private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder,
+ final String word) throws IOException, UnsupportedFormatException {
+ int position = dictDecoder.getTerminalPosition(word);
+ if (position != FormatSpec.NOT_VALID_WORD) {
+ dictDecoder.setPosition(0);
+ final FileHeader header = dictDecoder.readHeader();
+ dictDecoder.setPosition(position);
+ return dictDecoder.readPtNode(position, header.mFormatOptions);
+ }
+ return null;
+ }
+
+ private PtNodeInfo findWordFromFile(final File file, final String word) {
+ final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
+ PtNodeInfo info = null;
try {
- inStream = new FileInputStream(file);
- final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
- inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
- info = BinaryDictIOUtils.findWordFromBuffer(buffer, word);
+ dictDecoder.openDictBuffer();
+ info = findWordByBinaryDictReader(dictDecoder, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
- } finally {
- if (inStream != null) {
- try {
- inStream.close();
- } catch (IOException e) {
- // do nothing
- }
- }
}
return info;
}
@@ -175,42 +186,34 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) {
- RandomAccessFile raFile = null;
BufferedOutputStream outStream = null;
- FusionDictionaryBufferInterface buffer = null;
long amountOfTime = -1;
try {
- raFile = new RandomAccessFile(file, "rw");
- buffer = new ByteBufferWrapper(raFile.getChannel().map(
- FileChannel.MapMode.READ_WRITE, 0, file.length()));
+ final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file,
+ DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ dictDecoder.openDictBuffer();
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
}
final long now = System.nanoTime();
- BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, shortcuts,
- false, false);
+ DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams,
+ shortcuts, false, false);
amountOfTime = System.nanoTime() - now;
outStream.flush();
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
outStream.close();
- raFile.close();
} catch (IOException e) {
+ Log.e(TAG, "Raised an IOException while inserting a word", e);
} catch (UnsupportedFormatException e) {
+ Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
} finally {
if (outStream != null) {
try {
outStream.close();
} catch (IOException e) {
- // do nothing
- }
- }
- if (raFile != null) {
- try {
- raFile.close();
- } catch (IOException e) {
- // do nothing
+ Log.e(TAG, "Failed to close the output stream", e);
}
}
}
@@ -218,65 +221,48 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
private void deleteWord(final File file, final String word) {
- RandomAccessFile raFile = null;
- FusionDictionaryBufferInterface buffer = null;
try {
- raFile = new RandomAccessFile(file, "rw");
- buffer = new ByteBufferWrapper(raFile.getChannel().map(
- FileChannel.MapMode.READ_WRITE, 0, file.length()));
- BinaryDictIOUtils.deleteWord(buffer, word);
+ final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file,
+ DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ dictDecoder.openDictBuffer();
+ DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
- } finally {
- if (raFile != null) {
- try {
- raFile.close();
- } catch (IOException e) {
- // do nothing
- }
- }
}
}
private void checkReverseLookup(final File file, final String word, final int position) {
- FileInputStream inStream = null;
+
try {
- inStream = new FileInputStream(file);
- final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
- inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
- final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
- assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize,
- position - header.mHeaderSize, header.mFormatOptions).mWord);
+ final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
+ final FileHeader fileHeader = dictDecoder.readHeader();
+ assertEquals(word,
+ BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
+ position, fileHeader.mFormatOptions).mWord);
} catch (IOException e) {
+ Log.e(TAG, "Raised an IOException while looking up a word", e);
} catch (UnsupportedFormatException e) {
- } finally {
- if (inStream != null) {
- try {
- inStream.close();
- } catch (IOException e) {
- // do nothing
- }
- }
+ Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
}
}
public void testInsertWord() {
File file = null;
try {
- file = File.createTempFile("testInsertWord", ".dict", getContext().getCacheDir());
+ file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION,
+ getContext().getCacheDir());
} catch (IOException e) {
fail("IOException while creating temporary file: " + e);
}
// set an initial dictionary.
- final FusionDictionary dict = new FusionDictionary(new Node(),
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false);
try {
- final FileOutputStream out = new FileOutputStream(file);
- BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
- out.close();
+ final DictEncoder dictEncoder = new Ver3DictEncoder(file);
+ dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
@@ -313,22 +299,21 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
public void testInsertWordWithBigrams() {
File file = null;
try {
- file = File.createTempFile("testInsertWordWithBigrams", ".dict",
+ file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
} catch (IOException e) {
fail("IOException while creating temporary file: " + e);
}
// set an initial dictionary.
- final FusionDictionary dict = new FusionDictionary(new Node(),
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false);
dict.add("efgh", 15, null, false);
try {
- final FileOutputStream out = new FileOutputStream(file);
- BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
- out.close();
+ final DictEncoder dictEncoder = new Ver3DictEncoder(file);
+ dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
@@ -341,7 +326,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
insertAndCheckWord(file, "banana", 0, false, null, null);
insertAndCheckWord(file, "recursive", 60, true, banana, null);
- final CharGroupInfo info = findWordFromFile(file, "recursive");
+ final PtNodeInfo info = findWordFromFile(file, "recursive");
int bananaPos = getWordPosition(file, "banana");
assertNotNull(info.mBigrams);
assertEquals(info.mBigrams.size(), 1);
@@ -351,21 +336,21 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
public void testRandomWords() {
File file = null;
try {
- file = File.createTempFile("testRandomWord", ".dict", getContext().getCacheDir());
+ file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION,
+ getContext().getCacheDir());
} catch (IOException e) {
}
assertNotNull(file);
// set an initial dictionary.
- final FusionDictionary dict = new FusionDictionary(new Node(),
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false));
dict.add("initial", 10, null, false);
try {
- final FileOutputStream out = new FileOutputStream(file);
- BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
- out.close();
+ final DictEncoder dictEncoder = new Ver3DictEncoder(file);
+ dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) {
assertTrue(false);
} catch (UnsupportedFormatException e) {
@@ -390,6 +375,6 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
- Log.d(TAG, "avg = " + ((double)sum/MAX_UNIGRAMS/1000000) + " ms.");
+ Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
}
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java b/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java
new file mode 100644
index 000000000..36b958af8
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import java.util.Random;
+
+// Utility methods related with code points used for tests.
+public class CodePointUtils {
+ private CodePointUtils() {
+ // This utility class is not publicly instantiable.
+ }
+
+ public static int[] generateCodePointSet(final int codePointSetSize, final Random random) {
+ final int[] codePointSet = new int[codePointSetSize];
+ for (int i = codePointSet.length - 1; i >= 0; ) {
+ final int r = Math.abs(random.nextInt());
+ if (r < 0) continue;
+ // Don't insert 0~0x20, but insert any other code point.
+ // Code points are in the range 0~0x10FFFF.
+ final int candidateCodePoint = 0x20 + r % (Character.MAX_CODE_POINT - 0x20);
+ // Code points between MIN_ and MAX_SURROGATE are not valid on their own.
+ if (candidateCodePoint >= Character.MIN_SURROGATE
+ && candidateCodePoint <= Character.MAX_SURROGATE) continue;
+ codePointSet[i] = candidateCodePoint;
+ --i;
+ }
+ return codePointSet;
+ }
+
+ /**
+ * Generates a random word.
+ */
+ public static String generateWord(final Random random, final int[] codePointSet) {
+ StringBuilder builder = new StringBuilder();
+ // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward
+ // longer words. This should be closer to natural language, and more importantly, it will
+ // exercise the algorithms in dicttool much more.
+ final int count = 1 + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5)
+ + (Math.abs(random.nextInt()) % 5);
+ while (builder.length() < count) {
+ builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]);
+ }
+ return builder.toString();
+ }
+}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java
new file mode 100644
index 000000000..132483d5e
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import android.test.AndroidTestCase;
+import android.test.suitebuilder.annotation.LargeTest;
+import android.util.Log;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Random;
+
+/**
+ * Unit tests for SparseTable.
+ */
+@LargeTest
+public class SparseTableTests extends AndroidTestCase {
+ private static final String TAG = SparseTableTests.class.getSimpleName();
+
+ private static final int[] SMALL_INDEX = { SparseTable.NOT_EXIST, 0 };
+ private static final int[] BIG_INDEX = { SparseTable.NOT_EXIST, 1, 2, 3, 4, 5, 6, 7};
+
+ private final Random mRandom;
+ private final ArrayList<Integer> mRandomIndex;
+
+ private static final int DEFAULT_SIZE = 10000;
+ private static final int BLOCK_SIZE = 8;
+
+ public SparseTableTests() {
+ this(System.currentTimeMillis(), DEFAULT_SIZE);
+ }
+
+ public SparseTableTests(final long seed, final int tableSize) {
+ super();
+ Log.d(TAG, "Seed for test is " + seed + ", size is " + tableSize);
+ mRandom = new Random(seed);
+ mRandomIndex = new ArrayList<Integer>(tableSize);
+ for (int i = 0; i < tableSize; ++i) {
+ mRandomIndex.add(SparseTable.NOT_EXIST);
+ }
+ }
+
+ public void testInitializeWithArray() {
+ final SparseTable table = new SparseTable(SMALL_INDEX, BIG_INDEX, BLOCK_SIZE);
+ for (int i = 0; i < 8; ++i) {
+ assertEquals(SparseTable.NOT_EXIST, table.get(i));
+ }
+ assertEquals(SparseTable.NOT_EXIST, table.get(8));
+ for (int i = 9; i < 16; ++i) {
+ assertEquals(i - 8, table.get(i));
+ }
+ }
+
+ public void testSet() {
+ final SparseTable table = new SparseTable(16, BLOCK_SIZE);
+ table.set(3, 6);
+ table.set(8, 16);
+ for (int i = 0; i < 16; ++i) {
+ if (i == 3 || i == 8) {
+ assertEquals(i * 2, table.get(i));
+ } else {
+ assertEquals(SparseTable.NOT_EXIST, table.get(i));
+ }
+ }
+ }
+
+ private void generateRandomIndex(final int size, final int prop) {
+ for (int i = 0; i < DEFAULT_SIZE; ++i) {
+ if (mRandom.nextInt(100) < prop) {
+ mRandomIndex.set(i, mRandom.nextInt());
+ } else {
+ mRandomIndex.set(i, SparseTable.NOT_EXIST);
+ }
+ }
+ }
+
+ private void runTestRandomSet() {
+ final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE);
+ int elementCount = 0;
+ for (int i = 0; i < DEFAULT_SIZE; ++i) {
+ if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) {
+ table.set(i, mRandomIndex.get(i));
+ elementCount++;
+ }
+ }
+
+ Log.d(TAG, "table size = " + table.getLookupTableSize() + " + "
+ + table.getContentTableSize());
+ Log.d(TAG, "the table has " + elementCount + " elements");
+ for (int i = 0; i < DEFAULT_SIZE; ++i) {
+ assertEquals(table.get(i), (int)mRandomIndex.get(i));
+ }
+
+ // flush and reload
+ OutputStream lookupOutStream = null;
+ OutputStream contentOutStream = null;
+ InputStream lookupInStream = null;
+ InputStream contentInStream = null;
+ try {
+ final File lookupIndexFile = File.createTempFile("testRandomSet", ".small");
+ final File contentFile = File.createTempFile("testRandomSet", ".big");
+ lookupOutStream = new FileOutputStream(lookupIndexFile);
+ contentOutStream = new FileOutputStream(contentFile);
+ table.write(lookupOutStream, contentOutStream);
+ lookupInStream = new FileInputStream(lookupIndexFile);
+ contentInStream = new FileInputStream(contentFile);
+ final byte[] lookupArray = new byte[(int) lookupIndexFile.length()];
+ final byte[] contentArray = new byte[(int) contentFile.length()];
+ lookupInStream.read(lookupArray);
+ contentInStream.read(contentArray);
+ final SparseTable newTable = new SparseTable(lookupArray, contentArray, BLOCK_SIZE);
+ for (int i = 0; i < DEFAULT_SIZE; ++i) {
+ assertEquals(table.get(i), newTable.get(i));
+ }
+ } catch (IOException e) {
+ Log.d(TAG, "IOException while flushing and realoding", e);
+ } finally {
+ if (lookupOutStream != null) {
+ try {
+ lookupOutStream.close();
+ } catch (IOException e) {
+ Log.d(TAG, "IOException while closing the stream", e);
+ }
+ }
+ if (contentOutStream != null) {
+ try {
+ contentOutStream.close();
+ } catch (IOException e) {
+ Log.d(TAG, "IOException while closing contentStream.", e);
+ }
+ }
+ }
+ }
+
+ public void testRandomSet() {
+ for (int i = 0; i <= 100; i += 10) {
+ generateRandomIndex(DEFAULT_SIZE, i);
+ runTestRandomSet();
+ }
+ }
+}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java
new file mode 100644
index 000000000..9611599b9
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver3DictDecoderTests.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
+import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
+import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFromByteArrayFactory;
+import com.android.inputmethod.latin.makedict.DictDecoder.
+ DictionaryBufferFromReadOnlyByteBufferFactory;
+import com.android.inputmethod.latin.makedict.DictDecoder.
+ DictionaryBufferFromWritableByteBufferFactory;
+
+import android.test.AndroidTestCase;
+import android.util.Log;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+/**
+ * Unit tests for Ver3DictDecoder
+ */
+public class Ver3DictDecoderTests extends AndroidTestCase {
+ private static final String TAG = Ver3DictDecoderTests.class.getSimpleName();
+
+ private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+
+ // Utilities for testing
+ public void writeDataToFile(final File file) {
+ FileOutputStream outStream = null;
+ try {
+ outStream = new FileOutputStream(file);
+ outStream.write(data);
+ } catch (IOException e) {
+ fail ("Can't write data to the test file");
+ } finally {
+ if (outStream != null) {
+ try {
+ outStream.close();
+ } catch (IOException e) {
+ Log.e(TAG, "Failed to close the output stream", e);
+ }
+ }
+ }
+ }
+
+ @SuppressWarnings("null")
+ public void runTestOpenBuffer(final String testName, final DictionaryBufferFactory factory) {
+ File testFile = null;
+ try {
+ testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while the creating temporary file", e);
+ }
+
+ assertNotNull(testFile);
+ final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(testFile, factory);
+ try {
+ dictDecoder.openDictBuffer();
+ } catch (Exception e) {
+ Log.e(TAG, "Failed to open the buffer", e);
+ }
+
+ writeDataToFile(testFile);
+
+ try {
+ dictDecoder.openDictBuffer();
+ } catch (Exception e) {
+ Log.e(TAG, "Raised the exception while opening buffer", e);
+ }
+
+ assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
+ }
+
+ public void testOpenBufferWithByteBuffer() {
+ runTestOpenBuffer("testOpenBufferWithByteBuffer",
+ new DictionaryBufferFromReadOnlyByteBufferFactory());
+ }
+
+ public void testOpenBufferWithByteArray() {
+ runTestOpenBuffer("testOpenBufferWithByteArray",
+ new DictionaryBufferFromByteArrayFactory());
+ }
+
+ public void testOpenBufferWithWritableByteBuffer() {
+ runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
+ new DictionaryBufferFromWritableByteBufferFactory());
+ }
+
+ @SuppressWarnings("null")
+ public void runTestGetBuffer(final String testName, final DictionaryBufferFactory factory) {
+ File testFile = null;
+ try {
+ testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while the creating temporary file", e);
+ }
+
+ final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(testFile, factory);
+
+ // the default return value of getBuffer() must be null.
+ assertNull("the default return value of getBuffer() is not null",
+ dictDecoder.getDictBuffer());
+
+ writeDataToFile(testFile);
+ assertTrue(testFile.exists());
+ Log.d(TAG, "file length = " + testFile.length());
+
+ DictBuffer dictBuffer = null;
+ try {
+ dictBuffer = dictDecoder.openAndGetDictBuffer();
+ } catch (IOException e) {
+ Log.e(TAG, "Failed to open and get the buffer", e);
+ }
+ assertNotNull("the buffer must not be null", dictBuffer);
+
+ for (int i = 0; i < data.length; ++i) {
+ assertEquals(data[i], dictBuffer.readUnsignedByte());
+ }
+ }
+
+ public void testGetBufferWithByteBuffer() {
+ runTestGetBuffer("testGetBufferWithByteBuffer",
+ new DictionaryBufferFromReadOnlyByteBufferFactory());
+ }
+
+ public void testGetBufferWithByteArray() {
+ runTestGetBuffer("testGetBufferWithByteArray",
+ new DictionaryBufferFromByteArrayFactory());
+ }
+
+ public void testGetBufferWithWritableByteBuffer() {
+ runTestGetBuffer("testGetBufferWithWritableByteBuffer",
+ new DictionaryBufferFromWritableByteBufferFactory());
+ }
+}