aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java11
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java6
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java82
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java14
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java106
-rwxr-xr-xtools/dicttool/tests/etc/test-dicttool.sh1
6 files changed, 206 insertions, 14 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index da5236974..031306e1d 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -1699,6 +1699,14 @@ public final class BinaryDictInputOutput {
}
/**
+ * Helper method to pass a file name instead of a File object to isBinaryDictionary.
+ */
+ public static boolean isBinaryDictionary(final String filename) {
+ final File file = new File(filename);
+ return isBinaryDictionary(file);
+ }
+
+ /**
* Basic test to find out whether the file is a binary dictionary or not.
*
* Concretely this only tests the magic number.
@@ -1706,10 +1714,9 @@ public final class BinaryDictInputOutput {
* @param filename The name of the file to test.
* @return true if it's a binary dictionary, false otherwise
*/
- public static boolean isBinaryDictionary(final String filename) {
+ public static boolean isBinaryDictionary(final File file) {
FileInputStream inStream = null;
try {
- final File file = new File(filename);
inStream = new FileInputStream(file);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, file.length());
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 7fd13d78b..44537986b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -16,6 +16,7 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import java.util.ArrayList;
@@ -141,6 +142,11 @@ public final class FusionDictionary implements Iterable<Word> {
return NOT_A_TERMINAL != mFrequency;
}
+ @UsedForTesting
+ public int getFrequency() {
+ return mFrequency;
+ }
+
public boolean hasSeveralChars() {
assert(mChars.length > 0);
return 1 < mChars.length;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 83c5d9ac6..9dcd7eb42 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -16,19 +16,42 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+
+import java.io.File;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.ArrayList;
/**
-* Class grouping utilities for offline dictionary making.
-*
-* Those should not be used on-device, essentially because they are quite
-* liberal about I/O and performance.
-*/
-public class BinaryDictOffdeviceUtils {
+ * Class grouping utilities for offline dictionary making.
+ *
+ * Those should not be used on-device, essentially because they are quite
+ * liberal about I/O and performance.
+ */
+public final class BinaryDictOffdeviceUtils {
+ // Prefix and suffix are arbitrary, the values do not really matter
+ private final static String PREFIX = "dicttool";
+ private final static String SUFFIX = ".tmp";
+
+ public final static String COMPRESSION = "compression";
+
+ public static class DecoderChainSpec {
+ ArrayList<String> mDecoderSpec = new ArrayList<String>();
+ File mFile;
+ public DecoderChainSpec addStep(final String stepDescription) {
+ mDecoderSpec.add(stepDescription);
+ return this;
+ }
+ }
+
public static void copy(final InputStream input, final OutputStream output) throws IOException {
final byte[] buffer = new byte[1000];
final BufferedInputStream in = new BufferedInputStream(input);
@@ -38,4 +61,51 @@ public class BinaryDictOffdeviceUtils {
in.close();
out.close();
}
+
+ /**
+ * Returns a decrypted/uncompressed binary dictionary.
+ *
+ * This will decrypt/uncompress any number of times as necessary until it finds the binary
+ * dictionary signature, and copy the decoded file to a temporary place.
+ * If this is not a binary dictionary, the method returns null.
+ */
+ public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
+ return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src);
+ }
+
+ private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
+ final DecoderChainSpec spec, final File src) {
+ // TODO: arrange for the intermediary files to be deleted
+ if (BinaryDictInputOutput.isBinaryDictionary(src)) {
+ spec.mFile = src;
+ return spec;
+ }
+ // It's not a raw dictionary - try to see if it's compressed.
+ final File uncompressedFile = tryGetUncompressedFile(src);
+ if (null != uncompressedFile) {
+ final DecoderChainSpec newSpec =
+ getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile);
+ if (null == newSpec) return null;
+ return newSpec.addStep(COMPRESSION);
+ }
+ return null;
+ }
+
+ /* Try to uncompress the file passed as an argument.
+ *
+ * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
+ * is returned.
+ */
+ private static File tryGetUncompressedFile(final File src) {
+ try {
+ final File dst = File.createTempFile(PREFIX, SUFFIX);
+ final FileOutputStream dstStream = new FileOutputStream(dst);
+ copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
+ new BufferedOutputStream(dstStream)); // #copy() closes the streams
+ return dst;
+ } catch (IOException e) {
+ // Could not uncompress the file: presumably the file is simply not a compressed file
+ return null;
+ }
+ }
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
index 49e90ada2..072de5c01 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
@@ -16,6 +16,8 @@
package com.android.inputmethod.latin.dicttool;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
@@ -27,12 +29,12 @@ import java.util.zip.GZIPOutputStream;
public class Compress {
- private static OutputStream getCompressedStream(final OutputStream out)
+ public static OutputStream getCompressedStream(final OutputStream out)
throws java.io.IOException {
return new GZIPOutputStream(out);
}
- private static InputStream getUncompressedStream(final InputStream in) throws IOException {
+ public static InputStream getUncompressedStream(final InputStream in) throws IOException {
return new GZIPInputStream(in);
}
@@ -55,9 +57,9 @@ public class Compress {
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
- : new FileInputStream(new File(inFilename));
+ : new BufferedInputStream(new FileInputStream(new File(inFilename)));
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
- : new FileOutputStream(new File(outFilename));
+ : new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
}
}
@@ -81,9 +83,9 @@ public class Compress {
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
- : new FileInputStream(new File(inFilename));
+ : new BufferedInputStream(new FileInputStream(new File(inFilename)));
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
- : new FileOutputStream(new File(outFilename));
+ : new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
}
}
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
new file mode 100644
index 000000000..7a686e556
--- /dev/null
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import junit.framework.TestCase;
+
+import java.io.File;
+import java.io.BufferedOutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * Unit tests for BinaryDictOffdeviceUtilsTests
+ */
+public class BinaryDictOffdeviceUtilsTests extends TestCase {
+ private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
+
+ public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
+ // Create a thrice-compressed dictionary file.
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new DictionaryOptions(new HashMap<String, String>(),
+ false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
+ dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
+ dict.add("fta", 1, null, false /* isNotAWord */);
+ dict.add("ftb", 1, null, false /* isNotAWord */);
+ dict.add("bar", 1, null, false /* isNotAWord */);
+ dict.add("fool", 1, null, false /* isNotAWord */);
+
+ final File dst = File.createTempFile("testGetRawDict", ".tmp");
+ final OutputStream out = Compress.getCompressedStream(
+ Compress.getCompressedStream(
+ Compress.getCompressedStream(
+ new BufferedOutputStream(new FileOutputStream(dst)))));
+
+ BinaryDictInputOutput.writeDictionaryBinary(out, dict, new FormatOptions(2, false));
+
+ // Test for an actually compressed dictionary and its contents
+ final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
+ BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
+ for (final String step : decodeSpec.mDecoderSpec) {
+ assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
+ }
+ assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
+ final FileInputStream inStream = new FileInputStream(decodeSpec.mFile);
+ final ByteBuffer buffer = inStream.getChannel().map(
+ FileChannel.MapMode.READ_ONLY, 0, decodeSpec.mFile.length());
+ final FusionDictionary resultDict = BinaryDictInputOutput.readDictionaryBinary(
+ new BinaryDictInputOutput.ByteBufferWrapper(buffer),
+ null /* dict : an optional dictionary to add words to, or null */);
+ assertEquals("Dictionary can't be read back correctly",
+ resultDict.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
+ }
+
+ public void testGetRawDictFails() throws IOException {
+ // Randomly create some 4k file containing garbage
+ final File dst = File.createTempFile("testGetRawDict", ".tmp");
+ final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst));
+ for (int i = 0; i < 1024; ++i) {
+ out.write(0x12345678);
+ }
+ out.close();
+
+ // Test that a random data file actually fails
+ assertNull("Wrongly identified data file",
+ BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
+
+ final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
+ final OutputStream gzOut =
+ Compress.getCompressedStream(new BufferedOutputStream(new FileOutputStream(gzDst)));
+ for (int i = 0; i < 1024; ++i) {
+ gzOut.write(0x12345678);
+ }
+ gzOut.close();
+
+ // Test that a compressed random data file actually fails
+ assertNull("Wrongly identified data file",
+ BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
+ }
+}
diff --git a/tools/dicttool/tests/etc/test-dicttool.sh b/tools/dicttool/tests/etc/test-dicttool.sh
index 1283be21a..0f3ed6d62 100755
--- a/tools/dicttool/tests/etc/test-dicttool.sh
+++ b/tools/dicttool/tests/etc/test-dicttool.sh
@@ -14,3 +14,4 @@
# limitations under the License.
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest
+java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtilsTests