diff options
Diffstat (limited to 'tools/dicttool')
4 files changed, 232 insertions, 6 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 49a6e8e14..3ec28f313 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -19,6 +19,10 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.DictDecoder; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; @@ -34,6 +38,8 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.util.Arrays; +import java.util.ArrayList; +import java.util.HashMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -59,7 +65,7 @@ public final class BinaryDictOffdeviceUtils { }; private final int mDecoderSpecIndex; - T mResult; + public T mResult; public DecoderChainSpec() { mDecoderSpecIndex = 0; @@ -142,6 +148,54 @@ public final class BinaryDictOffdeviceUtils { } } + public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> { + // Arbitrarily limit the header length to 32k. Sounds like it would never be larger + // than this. Revisit this if needed later. + private final int MAX_HEADER_LENGTH = 32 * 1024; + @Override @Nonnull + public DictionaryHeader process(final InputStream input) throws IOException, + UnsupportedFormatException { + // Do everything as curtly and ad-hoc as possible for performance. + final byte[] tmpBuffer = new byte[12]; + if (tmpBuffer.length != input.read(tmpBuffer)) { + throw new UnsupportedFormatException("File too short, not a dictionary"); + } + // Ad-hoc check for the magic number. See FormatSpec.java as well as + // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader(). + final int MAGIC_NUMBER_START_OFFSET = 0; + final int VERSION_START_OFFSET = 4; + final int HEADER_SIZE_OFFSET = 8; + final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24) + + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16) + + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8) + + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF); + if (magicNumber != FormatSpec.MAGIC_NUMBER) { + throw new UnsupportedFormatException("Wrong magic number"); + } + final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8) + + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF); + if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201 + && version != FormatSpec.VERSION202) { + throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported"); + } + final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24) + + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16) + + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8) + + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF); + if (totalHeaderSize > MAX_HEADER_LENGTH) { + throw new UnsupportedFormatException("Header too large"); + } + final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length]; + if (headerBuffer.length != input.read(headerBuffer)) { + throw new UnsupportedFormatException("File shorter than specified in the header"); + } + final HashMap<String, String> attributes = + BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer); + return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes), + new FormatOptions(version, false /* hasTimestamp */)); + } + } + public static void copy(final InputStream input, final OutputStream output) throws IOException { final byte[] buffer = new byte[COPY_BUFFER_SIZE]; for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { @@ -162,11 +216,22 @@ public final class BinaryDictOffdeviceUtils { @Nonnull DecoderChainSpec spec = new DecoderChainSpec(); while (null != spec) { try { - try (final InputStream input = spec.getStream(src)) { - spec.mResult = processor.process(input); - return spec; + final InputStream input = spec.getStream(src); + spec.mResult = processor.process(input); + try { + input.close(); + } catch (IOException e) { + // CipherInputStream doesn't like being closed without having read the + // entire stream, for some reason. But we don't want to because it's a waste + // of resources. We really, really don't care about this. + // However on close() CipherInputStream does throw this exception, wrapped + // in an IOException so we need to catch it. + if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) { + throw e; + } } - } catch (IOException | UnsupportedFormatException e) { + return spec; + } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) { // If the format is not the right one for this file, the processor will throw one // of these exceptions. In our case, that means we should try the next spec, // since it may still be at another format we haven't tried yet. diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java index 07450ca51..8fdf7633f 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java @@ -20,6 +20,7 @@ public class CommandList { public static void populate() { // TODO: Move some commands to native code. Dicttool.addCommand("info", Info.class); + Dicttool.addCommand("header", Header.class); Dicttool.addCommand("diff", Diff.class); Dicttool.addCommand("compress", Compress.Compressor.class); Dicttool.addCommand("uncompress", Compress.Uncompressor.class); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java new file mode 100644 index 000000000..51efdec33 --- /dev/null +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java @@ -0,0 +1,69 @@ +/** + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.dicttool; + +import com.android.inputmethod.latin.BinaryDictionary; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + +import java.io.File; +import java.util.Arrays; +import java.util.Locale; + +public class Header extends Dicttool.Command { + public static final String COMMAND = "header"; + + public Header() { + } + + @Override + public String getHelp() { + return COMMAND + " <filename>: prints the header contents of a dictionary file"; + } + + @Override + public void run() throws UnsupportedFormatException { + final boolean plumbing; + if (mArgs.length > 0 && "-p".equals(mArgs[0])) { + plumbing = true; + mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length); + } else { + plumbing = false; + } + if (mArgs.length < 1) { + throw new RuntimeException("Not enough arguments for command " + COMMAND); + } + final String filename = mArgs[0]; + final File dictFile = new File(filename); + final DecoderChainSpec<DictionaryHeader> spec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile, + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); + if (null == spec) { + throw new UnsupportedFormatException(filename + + " doesn't seem to be a valid version 2 dictionary file"); + } + + final DictionaryHeader header = spec.mResult; + System.out.println("Dictionary : " + dictFile.getAbsolutePath()); + System.out.println("Size : " + dictFile.length() + " bytes"); + System.out.println("Format : Binary dictionary format"); + System.out.println("Packaging : " + spec.describeChain()); + System.out.println("Header attributes :"); + System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing)); + } +} diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index 0fb4cf59c..ea9d4cc19 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -16,10 +16,17 @@ package com.android.inputmethod.latin.dicttool; +import com.android.inputmethod.latin.common.CodePointUtils; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils; +import com.android.inputmethod.latin.dicttool.Compress; +import com.android.inputmethod.latin.dicttool.Crypt; +import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; +import com.android.inputmethod.latin.makedict.BinaryDictUtils; import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -35,13 +42,37 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; /** * Unit tests for BinaryDictOffdeviceUtils */ public class BinaryDictOffdeviceUtilsTests extends TestCase { private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance + private static final int CODE_POINT_SET_SIZE = 300; + final Random mRandom; + private static final ArrayList<String> sWords = new ArrayList<>(); + + public BinaryDictOffdeviceUtilsTests(final long seed, final int maxUnigrams) { + super(); + mRandom = new Random(seed); + sWords.clear(); + generateWords(maxUnigrams, mRandom); + } + + private static void generateWords(final int maxUnigrams, final Random random) { + final int[] codePointSet = CodePointUtils.generateCodePointSet( + CODE_POINT_SET_SIZE, random); + final Set<String> wordSet = new HashSet<>(); + while (wordSet.size() < maxUnigrams) { + wordSet.add(CodePointUtils.generateWord(random, codePointSet)); + } + sWords.addAll(wordSet); + } public void testGetRawDictWorks() throws IOException, UnsupportedFormatException { final String VERSION = "1"; @@ -70,7 +101,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { try (final OutputStream out = Compress.getCompressedStream( new BufferedOutputStream(new FileOutputStream(dst)))) { final DictEncoder dictEncoder = new Ver2DictEncoder(out); - dictEncoder.writeDictionary(dict, new FormatOptions(2, false)); + dictEncoder.writeDictionary(dict, new FormatOptions(FormatSpec.VERSION202, false)); } // Test for an actually compressed dictionary and its contents @@ -119,4 +150,64 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { assertNull("Wrongly identified data file", BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst)); } + + public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt) + throws IOException, UnsupportedFormatException { + final String dictName = "testHeaderReaderProcessor"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; + final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5; + final HashMap<String, String> options = new HashMap<>(); + // Required attributes + options.put("dictionary", "main:en_US"); + options.put("locale", "en_US"); + options.put("version", Integer.toString(mRandom.nextInt())); + // Add some random options for test + final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1); + for (int i = 0; i < numberOfOptionsToAdd; ++i) { + options.put(sWords.get(2 * i), sWords.get(2 * 1 + 1)); + } + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + new DictionaryOptions(options)); + + for (int i = 0; i < sWords.size(); ++i) { + final String word = sWords.get(i); + dict.add(word, new ProbabilityInfo(TEST_FREQ), null /* shortcuts */, + false /* isNotAWord */, false /* isPossiblyOffensive */); + } + + File file = File.createTempFile(dictName, ".tmp"); + final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); + dictEncoder.writeDictionary(dict, formatOptions); + + if (compress) { + final File rawFile = file; + file = File.createTempFile(dictName + ".compress", ".tmp"); + final Compress.Compressor compressCommand = new Compress.Compressor(); + compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() }); + compressCommand.run(); + } + if (crypt) { + final File rawFile = file; + file = File.createTempFile(dictName + ".crypt", ".tmp"); + final Crypt.Encrypter cryptCommand = new Crypt.Encrypter(); + cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() }); + cryptCommand.run(); + } + + final DecoderChainSpec<DictionaryHeader> spec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file, + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); + assertNotNull("Can't decode a dictionary we just wrote : " + file, spec); + final DictionaryHeader header = spec.mResult; + assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""), + spec.describeChain()); + assertEquals(header.mDictionaryOptions.mAttributes, options); + } + + public void testHeaderReaderProcessor() throws IOException, UnsupportedFormatException { + runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */); + runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */); + runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */); + } } |