aboutsummaryrefslogtreecommitdiffstats
path: root/tools/dicttool/src
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dicttool/src')
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java262
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java1
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java69
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java6
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java2
5 files changed, 238 insertions, 102 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 1c5dfa9fb..3ec28f313 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -19,6 +19,10 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@@ -27,12 +31,18 @@ import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
/**
* Class grouping utilities for offline dictionary making.
@@ -44,26 +54,27 @@ public final class BinaryDictOffdeviceUtils {
// Prefix and suffix are arbitrary, the values do not really matter
private final static String PREFIX = "dicttool";
private final static String SUFFIX = ".tmp";
-
private final static int COPY_BUFFER_SIZE = 8192;
- public static class DecoderChainSpec {
+ public static class DecoderChainSpec<T> {
public final static int COMPRESSION = 1;
public final static int ENCRYPTION = 2;
- private final static int MAX_DECODE_DEPTH = 4;
- final int[] mDecoderSpec;
- File mFile;
+ private final static int[][] VALID_DECODER_CHAINS = {
+ { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
+ };
+
+ private final int mDecoderSpecIndex;
+ public T mResult;
public DecoderChainSpec() {
- mDecoderSpec = new int[0];
- mFile = null;
+ mDecoderSpecIndex = 0;
+ mResult = null;
}
- public DecoderChainSpec(final DecoderChainSpec src, final int newStep) {
- mDecoderSpec = Arrays.copyOf(src.mDecoderSpec, src.mDecoderSpec.length + 1);
- mDecoderSpec[src.mDecoderSpec.length] = newStep;
- mFile = src.mFile;
+ private DecoderChainSpec(final DecoderChainSpec<T> src) {
+ mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
+ mResult = src.mResult;
}
private String getStepDescription(final int step) {
@@ -79,110 +90,165 @@ public final class BinaryDictOffdeviceUtils {
public String describeChain() {
final StringBuilder s = new StringBuilder("raw");
- for (final int step : mDecoderSpec) {
+ for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
s.append(" > ");
s.append(getStepDescription(step));
}
return s.toString();
}
- }
- public static void copy(final InputStream input, final OutputStream output) throws IOException {
- final byte[] buffer = new byte[COPY_BUFFER_SIZE];
- for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
- output.write(buffer, 0, readBytes);
+ /**
+ * Returns the next sequential spec. If exhausted, return null.
+ */
+ public DecoderChainSpec next() {
+ if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
+ return null;
+ }
+ return new DecoderChainSpec(this);
}
- }
- /**
- * Returns a decrypted/uncompressed dictionary.
- *
- * This will decrypt/uncompress any number of times as necessary until it finds the
- * dictionary signature, and copy the decoded file to a temporary place.
- * If this is not a dictionary, the method returns null.
- */
- public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
- return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
+ public InputStream getStream(final File src) throws FileNotFoundException, IOException {
+ InputStream input = new BufferedInputStream(new FileInputStream(src));
+ for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
+ switch (step) {
+ case COMPRESSION:
+ input = Compress.getUncompressedStream(input);
+ break;
+ case ENCRYPTION:
+ input = Crypt.getDecryptedStream(input);
+ break;
+ }
+ }
+ return input;
+ }
}
- private static DecoderChainSpec getRawDictionaryOrNullInternal(
- final DecoderChainSpec spec, final File src, final int depth) {
- // Unfortunately the decoding scheme we use can consider any data to be encrypted
- // and will produce some output, meaning it's not possible to reliably detect encrypted
- // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
- // over and over, ending in a stack overflow. Hence we limit the depth at which we try
- // decoding the file.
- if (depth > DecoderChainSpec.MAX_DECODE_DEPTH) {
- return null;
- }
- if (BinaryDictDecoderUtils.isBinaryDictionary(src)
- || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
- spec.mFile = src;
- return spec;
- }
- // It's not a raw dictionary - try to see if it's compressed.
- final File uncompressedFile = tryGetUncompressedFile(src);
- if (null != uncompressedFile) {
- final DecoderChainSpec newSpec =
- getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
- if (null == newSpec) return null;
- return new DecoderChainSpec(newSpec, DecoderChainSpec.COMPRESSION);
- }
- // It's not a compressed either - try to see if it's crypted.
- final File decryptedFile = tryGetDecryptedFile(src);
- if (null != decryptedFile) {
- final DecoderChainSpec newSpec =
- getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
- if (null == newSpec) return null;
- return new DecoderChainSpec(newSpec, DecoderChainSpec.ENCRYPTION);
- }
- return null;
+ public interface InputProcessor<T> {
+ @Nonnull
+ public T process(@Nonnull final InputStream input)
+ throws IOException, UnsupportedFormatException;
}
- /* Try to uncompress the file passed as an argument.
- *
- * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
- * is returned.
- */
- private static File tryGetUncompressedFile(final File src) {
- try {
+ public static class CopyProcessor implements InputProcessor<File> {
+ @Override @Nonnull
+ public File process(@Nonnull final InputStream input) throws IOException,
+ UnsupportedFormatException {
final File dst = File.createTempFile(PREFIX, SUFFIX);
dst.deleteOnExit();
- try (
- final InputStream input = Compress.getUncompressedStream(
- new BufferedInputStream(new FileInputStream(src)));
- final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
- ) {
+ try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
copy(input, output);
- return dst;
+ output.flush();
+ output.close();
+ if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
+ || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
+ return dst;
+ }
}
- } catch (final IOException e) {
- // Could not uncompress the file: presumably the file is simply not a compressed file
- return null;
+ throw new UnsupportedFormatException("Input stream not at the expected format");
}
}
- /* Try to decrypt the file passed as an argument.
+ public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
+ // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
+ // than this. Revisit this if needed later.
+ private final int MAX_HEADER_LENGTH = 32 * 1024;
+ @Override @Nonnull
+ public DictionaryHeader process(final InputStream input) throws IOException,
+ UnsupportedFormatException {
+ // Do everything as curtly and ad-hoc as possible for performance.
+ final byte[] tmpBuffer = new byte[12];
+ if (tmpBuffer.length != input.read(tmpBuffer)) {
+ throw new UnsupportedFormatException("File too short, not a dictionary");
+ }
+ // Ad-hoc check for the magic number. See FormatSpec.java as well as
+ // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
+ final int MAGIC_NUMBER_START_OFFSET = 0;
+ final int VERSION_START_OFFSET = 4;
+ final int HEADER_SIZE_OFFSET = 8;
+ final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
+ if (magicNumber != FormatSpec.MAGIC_NUMBER) {
+ throw new UnsupportedFormatException("Wrong magic number");
+ }
+ final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
+ + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
+ if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
+ && version != FormatSpec.VERSION202) {
+ throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
+ }
+ final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
+ if (totalHeaderSize > MAX_HEADER_LENGTH) {
+ throw new UnsupportedFormatException("Header too large");
+ }
+ final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
+ if (headerBuffer.length != input.read(headerBuffer)) {
+ throw new UnsupportedFormatException("File shorter than specified in the header");
+ }
+ final HashMap<String, String> attributes =
+ BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
+ return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
+ new FormatOptions(version, false /* hasTimestamp */));
+ }
+ }
+
+ public static void copy(final InputStream input, final OutputStream output) throws IOException {
+ final byte[] buffer = new byte[COPY_BUFFER_SIZE];
+ for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
+ output.write(buffer, 0, readBytes);
+ }
+ }
+
+ /**
+ * Process a dictionary, decrypting/uncompressing it on the fly as necessary.
*
- * If the file can be decrypted, the decrypted version is returned. Otherwise, null
- * is returned.
+ * This will execute the given processor repeatedly with the possible alternatives
+ * for dictionary format until the processor does not throw an exception.
+ * If the processor succeeds for none of the possible formats, the method returns null.
*/
- private static File tryGetDecryptedFile(final File src) {
- try {
- final File dst = File.createTempFile(PREFIX, SUFFIX);
- dst.deleteOnExit();
- try (
- final InputStream input = Crypt.getDecryptedStream(
- new BufferedInputStream(new FileInputStream(src)));
- final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
- ) {
- copy(input, output);
- return dst;
+ @Nullable
+ public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
+ @Nonnull final InputProcessor<T> processor) {
+ @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
+ while (null != spec) {
+ try {
+ final InputStream input = spec.getStream(src);
+ spec.mResult = processor.process(input);
+ try {
+ input.close();
+ } catch (IOException e) {
+ // CipherInputStream doesn't like being closed without having read the
+ // entire stream, for some reason. But we don't want to because it's a waste
+ // of resources. We really, really don't care about this.
+ // However on close() CipherInputStream does throw this exception, wrapped
+ // in an IOException so we need to catch it.
+ if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
+ throw e;
+ }
+ }
+ return spec;
+ } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
+ // If the format is not the right one for this file, the processor will throw one
+ // of these exceptions. In our case, that means we should try the next spec,
+ // since it may still be at another format we haven't tried yet.
+ // TODO: stop using exceptions for this non-exceptional case.
}
- } catch (final IOException e) {
- // Could not decrypt the file: presumably the file is simply not a crypted file
- return null;
+ spec = spec.next();
}
+ return null;
+ }
+
+ /**
+ * Get a decoder chain spec with a raw dictionary file. This makes a new file on the
+ * disk ready for any treatment the client wants.
+ */
+ @Nullable
+ public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
+ return decodeDictionaryForProcess(src, new CopyProcessor());
}
static FusionDictionary getDictionary(final String filename, final boolean report) {
@@ -192,28 +258,28 @@ public final class BinaryDictOffdeviceUtils {
System.out.println("Size : " + file.length() + " bytes");
}
try {
- final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
+ final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
if (null == decodedSpec) {
throw new RuntimeException("Does not seem to be a dictionary file " + filename);
}
- if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
+ if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
if (report) {
System.out.println("Format : Combined format");
System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+ System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
}
try (final BufferedReader reader = new BufferedReader(
- new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
+ new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
return CombinedInputOutput.readDictionaryCombined(reader);
}
}
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
- decodedSpec.mFile, 0, decodedSpec.mFile.length(),
+ decodedSpec.mResult, 0, decodedSpec.mResult.length(),
DictDecoder.USE_BYTEARRAY);
if (report) {
System.out.println("Format : Binary dictionary format");
System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+ System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
}
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
} catch (final IOException | UnsupportedFormatException e) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
index 07450ca51..8fdf7633f 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
@@ -20,6 +20,7 @@ public class CommandList {
public static void populate() {
// TODO: Move some commands to native code.
Dicttool.addCommand("info", Info.class);
+ Dicttool.addCommand("header", Header.class);
Dicttool.addCommand("diff", Diff.class);
Dicttool.addCommand("compress", Compress.Compressor.class);
Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
new file mode 100644
index 000000000..51efdec33
--- /dev/null
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
@@ -0,0 +1,69 @@
+/**
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Locale;
+
+public class Header extends Dicttool.Command {
+ public static final String COMMAND = "header";
+
+ public Header() {
+ }
+
+ @Override
+ public String getHelp() {
+ return COMMAND + " <filename>: prints the header contents of a dictionary file";
+ }
+
+ @Override
+ public void run() throws UnsupportedFormatException {
+ final boolean plumbing;
+ if (mArgs.length > 0 && "-p".equals(mArgs[0])) {
+ plumbing = true;
+ mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
+ } else {
+ plumbing = false;
+ }
+ if (mArgs.length < 1) {
+ throw new RuntimeException("Not enough arguments for command " + COMMAND);
+ }
+ final String filename = mArgs[0];
+ final File dictFile = new File(filename);
+ final DecoderChainSpec<DictionaryHeader> spec =
+ BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile,
+ new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+ if (null == spec) {
+ throw new UnsupportedFormatException(filename
+ + " doesn't seem to be a valid version 2 dictionary file");
+ }
+
+ final DictionaryHeader header = spec.mResult;
+ System.out.println("Dictionary : " + dictFile.getAbsolutePath());
+ System.out.println("Size : " + dictFile.length() + " bytes");
+ System.out.println("Format : Binary dictionary format");
+ System.out.println("Packaging : " + spec.describeChain());
+ System.out.println("Header attributes :");
+ System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing));
+ }
+}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
index 47ea70629..4e5c0742e 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
@@ -77,16 +77,16 @@ public class Package {
if (mArgs.length != 2) {
throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
}
- final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec =
+ final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodedSpec =
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
if (null == decodedSpec) {
System.out.println(mArgs[0] + " does not seem to be a dictionary");
return;
}
System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+ System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
try (
- final InputStream input = getFileInputStream(decodedSpec.mFile);
+ final InputStream input = getFileInputStream(decodedSpec.mResult);
final OutputStream output = new BufferedOutputStream(
getFileOutputStreamOrStdOut(mArgs[1]))
) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index b6383d788..e2dd5199b 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -16,10 +16,10 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.common.FileUtils;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
-import com.android.inputmethod.latin.utils.FileUtils;
import java.io.File;
import java.io.IOException;