aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java75
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java1
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java69
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java93
4 files changed, 232 insertions, 6 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 49a6e8e14..3ec28f313 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -19,6 +19,10 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@@ -34,6 +38,8 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -59,7 +65,7 @@ public final class BinaryDictOffdeviceUtils {
};
private final int mDecoderSpecIndex;
- T mResult;
+ public T mResult;
public DecoderChainSpec() {
mDecoderSpecIndex = 0;
@@ -142,6 +148,54 @@ public final class BinaryDictOffdeviceUtils {
}
}
+ public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
+ // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
+ // than this. Revisit this if needed later.
+ private final int MAX_HEADER_LENGTH = 32 * 1024;
+ @Override @Nonnull
+ public DictionaryHeader process(final InputStream input) throws IOException,
+ UnsupportedFormatException {
+ // Do everything as curtly and ad-hoc as possible for performance.
+ final byte[] tmpBuffer = new byte[12];
+ if (tmpBuffer.length != input.read(tmpBuffer)) {
+ throw new UnsupportedFormatException("File too short, not a dictionary");
+ }
+ // Ad-hoc check for the magic number. See FormatSpec.java as well as
+ // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
+ final int MAGIC_NUMBER_START_OFFSET = 0;
+ final int VERSION_START_OFFSET = 4;
+ final int HEADER_SIZE_OFFSET = 8;
+ final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
+ if (magicNumber != FormatSpec.MAGIC_NUMBER) {
+ throw new UnsupportedFormatException("Wrong magic number");
+ }
+ final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
+ + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
+ if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
+ && version != FormatSpec.VERSION202) {
+ throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
+ }
+ final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
+ if (totalHeaderSize > MAX_HEADER_LENGTH) {
+ throw new UnsupportedFormatException("Header too large");
+ }
+ final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
+ if (headerBuffer.length != input.read(headerBuffer)) {
+ throw new UnsupportedFormatException("File shorter than specified in the header");
+ }
+ final HashMap<String, String> attributes =
+ BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
+ return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
+ new FormatOptions(version, false /* hasTimestamp */));
+ }
+ }
+
public static void copy(final InputStream input, final OutputStream output) throws IOException {
final byte[] buffer = new byte[COPY_BUFFER_SIZE];
for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
@@ -162,11 +216,22 @@ public final class BinaryDictOffdeviceUtils {
@Nonnull DecoderChainSpec spec = new DecoderChainSpec();
while (null != spec) {
try {
- try (final InputStream input = spec.getStream(src)) {
- spec.mResult = processor.process(input);
- return spec;
+ final InputStream input = spec.getStream(src);
+ spec.mResult = processor.process(input);
+ try {
+ input.close();
+ } catch (IOException e) {
+ // CipherInputStream doesn't like being closed without having read the
+ // entire stream, for some reason. But we don't want to because it's a waste
+ // of resources. We really, really don't care about this.
+ // However on close() CipherInputStream does throw this exception, wrapped
+ // in an IOException so we need to catch it.
+ if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
+ throw e;
+ }
}
- } catch (IOException | UnsupportedFormatException e) {
+ return spec;
+ } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
// If the format is not the right one for this file, the processor will throw one
// of these exceptions. In our case, that means we should try the next spec,
// since it may still be at another format we haven't tried yet.
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
index 07450ca51..8fdf7633f 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
@@ -20,6 +20,7 @@ public class CommandList {
public static void populate() {
// TODO: Move some commands to native code.
Dicttool.addCommand("info", Info.class);
+ Dicttool.addCommand("header", Header.class);
Dicttool.addCommand("diff", Diff.class);
Dicttool.addCommand("compress", Compress.Compressor.class);
Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
new file mode 100644
index 000000000..51efdec33
--- /dev/null
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
@@ -0,0 +1,69 @@
+/**
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Locale;
+
+public class Header extends Dicttool.Command {
+ public static final String COMMAND = "header";
+
+ public Header() {
+ }
+
+ @Override
+ public String getHelp() {
+ return COMMAND + " <filename>: prints the header contents of a dictionary file";
+ }
+
+ @Override
+ public void run() throws UnsupportedFormatException {
+ final boolean plumbing;
+ if (mArgs.length > 0 && "-p".equals(mArgs[0])) {
+ plumbing = true;
+ mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
+ } else {
+ plumbing = false;
+ }
+ if (mArgs.length < 1) {
+ throw new RuntimeException("Not enough arguments for command " + COMMAND);
+ }
+ final String filename = mArgs[0];
+ final File dictFile = new File(filename);
+ final DecoderChainSpec<DictionaryHeader> spec =
+ BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile,
+ new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+ if (null == spec) {
+ throw new UnsupportedFormatException(filename
+ + " doesn't seem to be a valid version 2 dictionary file");
+ }
+
+ final DictionaryHeader header = spec.mResult;
+ System.out.println("Dictionary : " + dictFile.getAbsolutePath());
+ System.out.println("Size : " + dictFile.length() + " bytes");
+ System.out.println("Format : Binary dictionary format");
+ System.out.println("Packaging : " + spec.describeChain());
+ System.out.println("Header attributes :");
+ System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing));
+ }
+}
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
index 0fb4cf59c..ea9d4cc19 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -16,10 +16,17 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.common.CodePointUtils;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils;
+import com.android.inputmethod.latin.dicttool.Compress;
+import com.android.inputmethod.latin.dicttool.Crypt;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
+import com.android.inputmethod.latin.makedict.BinaryDictUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
@@ -35,13 +42,37 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
/**
* Unit tests for BinaryDictOffdeviceUtils
*/
public class BinaryDictOffdeviceUtilsTests extends TestCase {
private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
+ private static final int CODE_POINT_SET_SIZE = 300;
+ final Random mRandom;
+ private static final ArrayList<String> sWords = new ArrayList<>();
+
+ public BinaryDictOffdeviceUtilsTests(final long seed, final int maxUnigrams) {
+ super();
+ mRandom = new Random(seed);
+ sWords.clear();
+ generateWords(maxUnigrams, mRandom);
+ }
+
+ private static void generateWords(final int maxUnigrams, final Random random) {
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(
+ CODE_POINT_SET_SIZE, random);
+ final Set<String> wordSet = new HashSet<>();
+ while (wordSet.size() < maxUnigrams) {
+ wordSet.add(CodePointUtils.generateWord(random, codePointSet));
+ }
+ sWords.addAll(wordSet);
+ }
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
final String VERSION = "1";
@@ -70,7 +101,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
try (final OutputStream out = Compress.getCompressedStream(
new BufferedOutputStream(new FileOutputStream(dst)))) {
final DictEncoder dictEncoder = new Ver2DictEncoder(out);
- dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
+ dictEncoder.writeDictionary(dict, new FormatOptions(FormatSpec.VERSION202, false));
}
// Test for an actually compressed dictionary and its contents
@@ -119,4 +150,64 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
assertNull("Wrongly identified data file",
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst));
}
+
+ public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt)
+ throws IOException, UnsupportedFormatException {
+ final String dictName = "testHeaderReaderProcessor";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
+ final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5;
+ final HashMap<String, String> options = new HashMap<>();
+ // Required attributes
+ options.put("dictionary", "main:en_US");
+ options.put("locale", "en_US");
+ options.put("version", Integer.toString(mRandom.nextInt()));
+ // Add some random options for test
+ final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1);
+ for (int i = 0; i < numberOfOptionsToAdd; ++i) {
+ options.put(sWords.get(2 * i), sWords.get(2 * 1 + 1));
+ }
+ final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+ new DictionaryOptions(options));
+
+ for (int i = 0; i < sWords.size(); ++i) {
+ final String word = sWords.get(i);
+ dict.add(word, new ProbabilityInfo(TEST_FREQ), null /* shortcuts */,
+ false /* isNotAWord */, false /* isPossiblyOffensive */);
+ }
+
+ File file = File.createTempFile(dictName, ".tmp");
+ final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
+ dictEncoder.writeDictionary(dict, formatOptions);
+
+ if (compress) {
+ final File rawFile = file;
+ file = File.createTempFile(dictName + ".compress", ".tmp");
+ final Compress.Compressor compressCommand = new Compress.Compressor();
+ compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
+ compressCommand.run();
+ }
+ if (crypt) {
+ final File rawFile = file;
+ file = File.createTempFile(dictName + ".crypt", ".tmp");
+ final Crypt.Encrypter cryptCommand = new Crypt.Encrypter();
+ cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
+ cryptCommand.run();
+ }
+
+ final DecoderChainSpec<DictionaryHeader> spec =
+ BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file,
+ new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+ assertNotNull("Can't decode a dictionary we just wrote : " + file, spec);
+ final DictionaryHeader header = spec.mResult;
+ assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""),
+ spec.describeChain());
+ assertEquals(header.mDictionaryOptions.mAttributes, options);
+ }
+
+ public void testHeaderReaderProcessor() throws IOException, UnsupportedFormatException {
+ runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */);
+ runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */);
+ runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */);
+ }
}