aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorYuichiro Hanada <yhanada@google.com>2012-10-01 14:50:58 +0900
committerYuichiro Hanada <yhanada@google.com>2012-10-04 17:19:47 +0900
commit3c6d9fe14840fd2c455ec65b6481ed78f99a5460 (patch)
tree2cb58f68b11553296c7edb48e88ab370ffa32f52 /tests
parentc3a98ca306d5d6a3dfce3585b73f7431dbf90bfc (diff)
downloadlatinime-3c6d9fe14840fd2c455ec65b6481ed78f99a5460.tar.gz
latinime-3c6d9fe14840fd2c455ec65b6481ed78f99a5460.tar.xz
latinime-3c6d9fe14840fd2c455ec65b6481ed78f99a5460.zip
Add insertWord.
bug: 6669677 Change-Id: Ide55a4931071de9cd42c1cddae63ddd531d2feba
Diffstat (limited to 'tests')
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java335
1 files changed, 335 insertions, 0 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
new file mode 100644
index 000000000..7607b58eb
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.CollectionUtils;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.CharEncoding;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import android.test.AndroidTestCase;
+import android.test.MoreAsserts;
+import android.util.Log;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Random;
+
+public class BinaryDictIOUtilsTests extends AndroidTestCase{
+ private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
+ private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
+ new FormatSpec.FormatOptions(3, true);
+ private static final int MAX_UNIGRAMS = 1500;
+
+ private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
+
+ private static final String[] CHARACTERS = {
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+ "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
+ "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters
+ "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji
+ "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDeD7" /* 𨛗 */ // surrogate pair
+ };
+
+ public BinaryDictIOUtilsTests() {
+ super();
+ final Random random = new Random(123456);
+ sWords.clear();
+ for (int i = 0; i < MAX_UNIGRAMS; ++i) {
+ sWords.add(generateWord(random.nextInt()));
+ }
+ }
+
+ // Utilities for test
+ private String generateWord(final int value) {
+ final int lengthOfChars = CHARACTERS.length;
+ StringBuilder builder = new StringBuilder("");
+ long lvalue = Math.abs((long)value);
+ while (lvalue > 0) {
+ builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
+ lvalue /= lengthOfChars;
+ }
+ if (builder.toString().equals("")) return "a";
+ return builder.toString();
+ }
+
+ private static void printCharGroup(final CharGroupInfo info) {
+ Log.d(TAG, " CharGroup at " + info.mOriginalAddress);
+ Log.d(TAG, " flags = " + info.mFlags);
+ Log.d(TAG, " parentAddress = " + info.mParentAddress);
+ Log.d(TAG, " characters = " + new String(info.mCharacters, 0,
+ info.mCharacters.length));
+ if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency);
+ if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
+ Log.d(TAG, " children address = no children address");
+ } else {
+ Log.d(TAG, " children address = " + info.mChildrenAddress);
+ }
+ if (info.mShortcutTargets != null) {
+ for (final WeightedString ws : info.mShortcutTargets) {
+ Log.d(TAG, " shortcuts = " + ws.mWord);
+ }
+ }
+ if (info.mBigrams != null) {
+ for (final PendingAttribute attr : info.mBigrams) {
+ Log.d(TAG, " bigram = " + attr.mAddress);
+ }
+ }
+ Log.d(TAG, " end address = " + info.mEndAddress);
+ }
+
+ private static void printNode(final FusionDictionaryBufferInterface buffer,
+ final FormatSpec.FormatOptions formatOptions) {
+ Log.d(TAG, "Node at " + buffer.position());
+ final int count = BinaryDictInputOutput.readCharGroupCount(buffer);
+ Log.d(TAG, " charGroupCount = " + count);
+ for (int i = 0; i < count; ++i) {
+ final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
+ buffer.position(), formatOptions);
+ printCharGroup(currentInfo);
+ }
+ if (formatOptions.mSupportsDynamicUpdate) {
+ final int forwardLinkAddress = buffer.readUnsignedInt24();
+ Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
+ }
+ }
+
+ private static void printBinaryFile(final FusionDictionaryBufferInterface buffer)
+ throws IOException, UnsupportedFormatException {
+ FileHeader header = BinaryDictInputOutput.readHeader(buffer);
+ while (buffer.position() < buffer.limit()) {
+ printNode(buffer, header.mFormatOptions);
+ }
+ }
+
+ private int getWordPosition(final File file, final String word) {
+ int position = FormatSpec.NOT_VALID_WORD;
+ FileInputStream inStream = null;
+ try {
+ inStream = new FileInputStream(file);
+ final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
+ inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
+ position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ } finally {
+ if (inStream != null) {
+ try {
+ inStream.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+ return position;
+ }
+
+ // return amount of time to insert a word
+ private long insertAndCheckWord(final File file, final String word, final int frequency,
+ final boolean exist) {
+ RandomAccessFile raFile = null;
+ FileOutputStream outStream = null;
+ FusionDictionaryBufferInterface buffer = null;
+ long amountOfTime = -1;
+ try {
+ raFile = new RandomAccessFile(file, "rw");
+ buffer = new ByteBufferWrapper(raFile.getChannel().map(
+ FileChannel.MapMode.READ_WRITE, 0, file.length()));
+ outStream = new FileOutputStream(file, true);
+
+ if (!exist) {
+ assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
+ }
+ final long now = System.nanoTime();
+ BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, null, null, false,
+ false);
+ amountOfTime = System.nanoTime() - now;
+ MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
+ outStream.close();
+ raFile.close();
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ } finally {
+ if (outStream != null) {
+ try {
+ outStream.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ if (raFile != null) {
+ try {
+ raFile.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+ return amountOfTime;
+ }
+
+ private void deleteWord(final File file, final String word) {
+ RandomAccessFile raFile = null;
+ FusionDictionaryBufferInterface buffer = null;
+ try {
+ raFile = new RandomAccessFile(file, "rw");
+ buffer = new ByteBufferWrapper(raFile.getChannel().map(
+ FileChannel.MapMode.READ_WRITE, 0, file.length()));
+ BinaryDictIOUtils.deleteWord(buffer, word);
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ } finally {
+ if (raFile != null) {
+ try {
+ raFile.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+ }
+
+
+
+ private void checkReverseLookup(final File file, final String word, final int position) {
+ FileInputStream inStream = null;
+ try {
+ inStream = new FileInputStream(file);
+ final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
+ inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
+ final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
+ assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize,
+ position - header.mHeaderSize, header.mFormatOptions));
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ } finally {
+ if (inStream != null) {
+ try {
+ inStream.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+ }
+
+ public void testInsertWord() {
+ File file = null;
+ try {
+ file = File.createTempFile("testInsertWord", ".dict");
+ } catch (IOException e) {
+ fail("IOException while creating temporary file: " + e);
+ }
+
+ // set an initial dictionary.
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
+ dict.add("abcd", 10, null, false);
+
+ try {
+ final FileOutputStream out = new FileOutputStream(file);
+ BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
+ out.close();
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ } catch (UnsupportedFormatException e) {
+ fail("UnsupportedFormatException while writing an initial dictionary : " + e);
+ }
+
+ MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
+ insertAndCheckWord(file, "abcde", 10, false);
+
+ insertAndCheckWord(file, "abcdefghijklmn", 10, false);
+ checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
+
+ insertAndCheckWord(file, "abcdabcd", 10, false);
+ checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
+
+ // update the existing word.
+ insertAndCheckWord(file, "abcdabcd", 15, true);
+
+ // split 1
+ insertAndCheckWord(file, "ab", 20, false);
+
+ // split 2
+ insertAndCheckWord(file, "ami", 30, false);
+
+ deleteWord(file, "ami");
+ assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
+
+ insertAndCheckWord(file, "abcdabfg", 30, false);
+
+ deleteWord(file, "abcd");
+ assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
+ }
+
+ public void testRandomWords() {
+ File file = null;
+ try {
+ file = File.createTempFile("testRandomWord", ".dict");
+ } catch (IOException e) {
+ }
+ assertNotNull(file);
+
+ // set an initial dictionary.
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
+ false));
+ dict.add("initial", 10, null, false);
+
+ try {
+ final FileOutputStream out = new FileOutputStream(file);
+ BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
+ out.close();
+ } catch (IOException e) {
+ assertTrue(false);
+ } catch (UnsupportedFormatException e) {
+ assertTrue(false);
+ }
+
+ long maxTimeToInsert = 0, sum = 0;
+ long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
+ int cnt = 0;
+ for (final String word : sWords) {
+ final long diff = insertAndCheckWord(file, word, cnt%255, false);
+ maxTimeToInsert = Math.max(maxTimeToInsert, diff);
+ minTimeToInsert = Math.min(minTimeToInsert, diff);
+ sum += diff;
+ cnt++;
+ }
+ cnt = 0;
+ for (final String word : sWords) {
+ MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
+ }
+
+ Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
+ Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
+ Log.d(TAG, "avg = " + ((double)sum/MAX_UNIGRAMS/1000000) + " ms.");
+ }
+}