10 files changed, 384 insertions, 198 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
index 81c0706c1..1a9f029ae 100644
--- a/tools/dicttool/Android.mk
+++ b/tools/dicttool/Android.mk
@@ -42,23 +42,15 @@ LATINIME_TESTS_SRC_DIR := $(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmetho
 # a significant part of the dependencies are mocked in the compat/ directory, with empty or
 # nearly-empty implementations, for parts that we don't use in Dicttool.
 LATINIME_SRC_FILES_FOR_DICTTOOL := \
-        event/Combiner.java \
-        event/Event.java \
         latin/BinaryDictionary.java \
         latin/DicTraverseSession.java \
         latin/Dictionary.java \
-        latin/LastComposedWord.java \
         latin/NgramContext.java \
         latin/SuggestedWords.java \
-        latin/WordComposer.java \
-        latin/settings/NativeSuggestOptions.java \
         latin/settings/SettingsValuesForSuggestion.java \
         latin/utils/BinaryDictionaryUtils.java \
         latin/utils/CombinedFormatUtils.java \
-        latin/utils/CoordinateUtils.java \
-        latin/utils/FileUtils.java \
-        latin/utils/JniUtils.java \
-        latin/utils/LocaleUtils.java
+        latin/utils/JniUtils.java
 
 LATINIME_OVERRIDABLE_SRC_FILES_FOR_DICTTOOL := \
         latin/define/DebugFlags.java
diff --git a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java b/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java
deleted file mode 100644
index c4457a1b7..000000000
--- a/tools/dicttool/compat/com/android/inputmethod/event/CombinerChain.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.event;
-
-import java.util.ArrayList;
-
-/**
- * Compatibility class that stands in for the combiner chain in LatinIME.
- *
- * This is not used by dicttool, it's just needed by the dependency chain.
- */
-// TODO: there should not be a dependency to this in dicttool, so there
-// should be a sensible way to separate them cleanly.
-public class CombinerChain {
-    private StringBuilder mComposingWord;
-    public CombinerChain(final String initialText, final Combiner... combinerList) {
-        mComposingWord = new StringBuilder(initialText);
-    }
-
-    public Event processEvent(final ArrayList<Event> previousEvents, final Event newEvent) {
-        return newEvent;
-    }
-
-    public void applyProcessedEvent(final Event event) {
-        mComposingWord.append(event.getTextToCommit());
-    }
-
-    public CharSequence getComposingWordWithCombiningFeedback() {
-        return mComposingWord;
-    }
-
-    public void reset() {
-        mComposingWord.setLength(0);
-    }
-
-    public static Combiner[] createCombiners(final String spec) {
-        // Dicttool never uses a combiner at all, so we just return a zero-sized array.
-        return new Combiner[0];
-    }
-}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 1c5dfa9fb..84c3956f7 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -19,6 +19,10 @@ package com.android.inputmethod.latin.dicttool;
 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
 import com.android.inputmethod.latin.makedict.DictDecoder;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary;
 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
 
@@ -27,12 +31,18 @@ import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 
 /**
  * Class grouping utilities for offline dictionary making.
@@ -44,26 +54,27 @@ public final class BinaryDictOffdeviceUtils {
     // Prefix and suffix are arbitrary, the values do not really matter
     private final static String PREFIX = "dicttool";
     private final static String SUFFIX = ".tmp";
-
     private final static int COPY_BUFFER_SIZE = 8192;
 
-    public static class DecoderChainSpec {
+    public static class DecoderChainSpec<T> {
         public final static int COMPRESSION = 1;
         public final static int ENCRYPTION = 2;
-        private final static int MAX_DECODE_DEPTH = 4;
 
-        final int[] mDecoderSpec;
-        File mFile;
+        private final static int[][] VALID_DECODER_CHAINS = {
+            { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
+        };
+
+        private final int mDecoderSpecIndex;
+        public T mResult;
 
         public DecoderChainSpec() {
-            mDecoderSpec = new int[0];
-            mFile = null;
+            mDecoderSpecIndex = 0;
+            mResult = null;
         }
 
-        public DecoderChainSpec(final DecoderChainSpec src, final int newStep) {
-            mDecoderSpec = Arrays.copyOf(src.mDecoderSpec, src.mDecoderSpec.length + 1);
-            mDecoderSpec[src.mDecoderSpec.length] = newStep;
-            mFile = src.mFile;
+        private DecoderChainSpec(final DecoderChainSpec<T> src) {
+            mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
+            mResult = src.mResult;
         }
 
         private String getStepDescription(final int step) {
@@ -79,110 +90,177 @@ public final class BinaryDictOffdeviceUtils {
 
         public String describeChain() {
             final StringBuilder s = new StringBuilder("raw");
-            for (final int step : mDecoderSpec) {
+            for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
                 s.append(" > ");
                 s.append(getStepDescription(step));
             }
             return s.toString();
         }
-    }
 
-    public static void copy(final InputStream input, final OutputStream output) throws IOException {
-        final byte[] buffer = new byte[COPY_BUFFER_SIZE];
-        for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
-            output.write(buffer, 0, readBytes);
+        /**
+         * Returns the next sequential spec. If exhausted, return null.
+         */
+        public DecoderChainSpec next() {
+            if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
+                return null;
+            }
+            return new DecoderChainSpec(this);
+        }
+
+        public InputStream getStream(final File src) throws FileNotFoundException, IOException {
+            InputStream input = new BufferedInputStream(new FileInputStream(src));
+            for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
+                switch (step) {
+                case COMPRESSION:
+                    input = Compress.getUncompressedStream(input);
+                    break;
+                case ENCRYPTION:
+                    input = Crypt.getDecryptedStream(input);
+                    break;
+                }
+            }
+            return input;
         }
     }
 
-    /**
-     * Returns a decrypted/uncompressed dictionary.
-     *
-     * This will decrypt/uncompress any number of times as necessary until it finds the
-     * dictionary signature, and copy the decoded file to a temporary place.
-     * If this is not a dictionary, the method returns null.
-     */
-    public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
-        return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
+    public interface InputProcessor<T> {
+        @Nonnull
+        public T process(@Nonnull final InputStream input)
+                throws IOException, UnsupportedFormatException;
     }
 
-    private static DecoderChainSpec getRawDictionaryOrNullInternal(
-            final DecoderChainSpec spec, final File src, final int depth) {
-        // Unfortunately the decoding scheme we use can consider any data to be encrypted
-        // and will produce some output, meaning it's not possible to reliably detect encrypted
-        // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
-        // over and over, ending in a stack overflow. Hence we limit the depth at which we try
-        // decoding the file.
-        if (depth > DecoderChainSpec.MAX_DECODE_DEPTH) {
-            return null;
+    public static class CopyProcessor implements InputProcessor<File> {
+        @Override @Nonnull
+        public File process(@Nonnull final InputStream input) throws IOException,
+                UnsupportedFormatException {
+            final File dst = File.createTempFile(PREFIX, SUFFIX);
+            dst.deleteOnExit();
+            try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
+                copy(input, output);
+                output.flush();
+                output.close();
+                if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
+                        || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
+                    return dst;
+                }
+            }
+            throw new UnsupportedFormatException("Input stream not at the expected format");
         }
-        if (BinaryDictDecoderUtils.isBinaryDictionary(src)
-                || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
-            spec.mFile = src;
-            return spec;
+    }
+
+    public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
+        // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
+        // than this. Revisit this if needed later.
+        private final int MAX_HEADER_LENGTH = 32 * 1024;
+        @Override @Nonnull
+        public DictionaryHeader process(final InputStream input) throws IOException,
+                UnsupportedFormatException {
+            // Do everything as curtly and ad-hoc as possible for performance.
+            final byte[] tmpBuffer = new byte[12];
+            if (tmpBuffer.length != input.read(tmpBuffer)) {
+                throw new UnsupportedFormatException("File too short, not a dictionary");
+            }
+            // Ad-hoc check for the magic number. See FormatSpec.java as well as
+            // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
+            final int MAGIC_NUMBER_START_OFFSET = 0;
+            final int VERSION_START_OFFSET = 4;
+            final int HEADER_SIZE_OFFSET = 8;
+            final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
+                    + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
+                    + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
+                    + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
+            if (magicNumber != FormatSpec.MAGIC_NUMBER) {
+                throw new UnsupportedFormatException("Wrong magic number");
+            }
+            final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
+                    + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
+            if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
+                    && version != FormatSpec.VERSION202) {
+                throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
+            }
+            final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
+                    + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
+                    + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
+                    + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
+            if (totalHeaderSize > MAX_HEADER_LENGTH) {
+                throw new UnsupportedFormatException("Header too large");
+            }
+            final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
+            readStreamExhaustively(input, headerBuffer);
+            final HashMap<String, String> attributes =
+                    BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
+            return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
+                    new FormatOptions(version, false /* hasTimestamp */));
         }
-        // It's not a raw dictionary - try to see if it's compressed.
-        final File uncompressedFile = tryGetUncompressedFile(src);
-        if (null != uncompressedFile) {
-            final DecoderChainSpec newSpec =
-                    getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
-            if (null == newSpec) return null;
-            return new DecoderChainSpec(newSpec, DecoderChainSpec.COMPRESSION);
+    }
+
+    private static void readStreamExhaustively(final InputStream inputStream,
+            final byte[] outBuffer) throws IOException, UnsupportedFormatException {
+        int readBytes = 0;
+        int readBytesLastCycle = -1;
+        while (readBytes != outBuffer.length) {
+            readBytesLastCycle = inputStream.read(outBuffer, readBytes,
+                    outBuffer.length - readBytes);
+            if (readBytesLastCycle == -1)
+                throw new UnsupportedFormatException("File shorter than specified in the header"
+                        + " (expected " + outBuffer.length + ", read " + readBytes + ")");
+            readBytes += readBytesLastCycle;
         }
-        // It's not a compressed either - try to see if it's crypted.
-        final File decryptedFile = tryGetDecryptedFile(src);
-        if (null != decryptedFile) {
-            final DecoderChainSpec newSpec =
-                    getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
-            if (null == newSpec) return null;
-            return new DecoderChainSpec(newSpec, DecoderChainSpec.ENCRYPTION);
+    }
+
+    public static void copy(final InputStream input, final OutputStream output) throws IOException {
+        final byte[] buffer = new byte[COPY_BUFFER_SIZE];
+        for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
+            output.write(buffer, 0, readBytes);
         }
-        return null;
     }
 
-    /* Try to uncompress the file passed as an argument.
+    /**
+     * Process a dictionary, decrypting/uncompressing it on the fly as necessary.
      *
-     * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
-     * is returned.
+     * This will execute the given processor repeatedly with the possible alternatives
+     * for dictionary format until the processor does not throw an exception.
+     * If the processor succeeds for none of the possible formats, the method returns null.
      */
-    private static File tryGetUncompressedFile(final File src) {
-        try {
-            final File dst = File.createTempFile(PREFIX, SUFFIX);
-            dst.deleteOnExit();
-            try (
-                final InputStream input = Compress.getUncompressedStream(
-                        new BufferedInputStream(new FileInputStream(src)));
-                final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
-            ) {
-                copy(input, output);
-                return dst;
+    @Nullable
+    public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
+            @Nonnull final InputProcessor<T> processor) {
+        @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
+        while (null != spec) {
+            try {
+                final InputStream input = spec.getStream(src);
+                spec.mResult = processor.process(input);
+                try {
+                    input.close();
+                } catch (IOException e) {
+                    // CipherInputStream doesn't like being closed without having read the
+                    // entire stream, for some reason. But we don't want to because it's a waste
+                    // of resources. We really, really don't care about this.
+                    // However on close() CipherInputStream does throw this exception, wrapped
+                    // in an IOException so we need to catch it.
+                    if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
+                        throw e;
+                    }
+                }
+                return spec;
+            } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
+                // If the format is not the right one for this file, the processor will throw one
+                // of these exceptions. In our case, that means we should try the next spec,
+                // since it may still be at another format we haven't tried yet.
+                // TODO: stop using exceptions for this non-exceptional case.
             }
-        } catch (final IOException e) {
-            // Could not uncompress the file: presumably the file is simply not a compressed file
-            return null;
+            spec = spec.next();
         }
+        return null;
     }
 
-    /* Try to decrypt the file passed as an argument.
-     *
-     * If the file can be decrypted, the decrypted version is returned. Otherwise, null
-     * is returned.
+    /**
+     * Get a decoder chain spec with a raw dictionary file. This makes a new file on the
+     * disk ready for any treatment the client wants.
      */
-    private static File tryGetDecryptedFile(final File src) {
-        try {
-            final File dst = File.createTempFile(PREFIX, SUFFIX);
-            dst.deleteOnExit();
-            try (
-                final InputStream input = Crypt.getDecryptedStream(
-                        new BufferedInputStream(new FileInputStream(src)));
-                final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
-            ) {
-                copy(input, output);
-                return dst;
-            }
-        } catch (final IOException e) {
-            // Could not decrypt the file: presumably the file is simply not a crypted file
-            return null;
-        }
+    @Nullable
+    public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
+        return decodeDictionaryForProcess(src, new CopyProcessor());
     }
 
     static FusionDictionary getDictionary(final String filename, final boolean report) {
@@ -192,28 +270,28 @@ public final class BinaryDictOffdeviceUtils {
             System.out.println("Size : " + file.length() + " bytes");
         }
         try {
-            final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
+            final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
             if (null == decodedSpec) {
                 throw new RuntimeException("Does not seem to be a dictionary file " + filename);
             }
-            if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
+            if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
                 if (report) {
                     System.out.println("Format : Combined format");
                     System.out.println("Packaging : " + decodedSpec.describeChain());
-                    System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+                    System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
                 }
                 try (final BufferedReader reader = new BufferedReader(
-                        new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
+                        new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
                     return CombinedInputOutput.readDictionaryCombined(reader);
                 }
             }
             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
-                    decodedSpec.mFile, 0, decodedSpec.mFile.length(),
+                    decodedSpec.mResult, 0, decodedSpec.mResult.length(),
                     DictDecoder.USE_BYTEARRAY);
             if (report) {
                 System.out.println("Format : Binary dictionary format");
                 System.out.println("Packaging : " + decodedSpec.describeChain());
-                System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
+                System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
             }
             return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
         } catch (final IOException | UnsupportedFormatException e) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index 48d2e5922..955c5728c 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -98,6 +98,7 @@ public class CombinedInputOutput {
         String word = null;
         ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
         boolean isNotAWord = false;
+        boolean isPossiblyOffensive = false;
         ArrayList<WeightedString> bigrams = new ArrayList<>();
         ArrayList<WeightedString> shortcuts = new ArrayList<>();
         while (null != (line = reader.readLine())) {
@@ -106,7 +107,7 @@ public class CombinedInputOutput {
             if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
                 if (null != word) {
                     dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
-                            isNotAWord, false /* isPossiblyOffensive */);
+                            isNotAWord, isPossiblyOffensive);
                     for (WeightedString s : bigrams) {
                         dict.setBigram(word, s.mWord, s.mProbabilityInfo);
                     }
@@ -114,27 +115,37 @@ public class CombinedInputOutput {
                 if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
                 if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
                 isNotAWord = false;
+                isPossiblyOffensive = false;
                 for (String param : args) {
                     final String params[] = param.split("=", 2);
                     if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
-                    if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
-                        word = params[1];
-                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
-                        probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
-                                probabilityInfo.mTimestamp, probabilityInfo.mLevel,
-                                probabilityInfo.mCount);
-                    } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
-                        final String[] historicalInfoParams =
-                                params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
-                        if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
-                            throw new RuntimeException("Wrong format (historical info) : " + line);
-                        }
-                        probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
-                                Integer.parseInt(historicalInfoParams[0]),
-                                Integer.parseInt(historicalInfoParams[1]),
-                                Integer.parseInt(historicalInfoParams[2]));
-                    } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
-                        isNotAWord = "true".equals(params[1]);
+                    switch (params[0]) {
+                        case CombinedFormatUtils.WORD_TAG:
+                            word = params[1];
+                            break;
+                        case CombinedFormatUtils.PROBABILITY_TAG:
+                            probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+                                    probabilityInfo.mTimestamp, probabilityInfo.mLevel,
+                                    probabilityInfo.mCount);
+                            break;
+                        case CombinedFormatUtils.HISTORICAL_INFO_TAG:
+                            final String[] historicalInfoParams = params[1].split(
+                                    CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+                            if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+                                throw new RuntimeException("Wrong format (historical info) : "
+                                        + line);
+                            }
+                            probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
+                                    Integer.parseInt(historicalInfoParams[0]),
+                                    Integer.parseInt(historicalInfoParams[1]),
+                                    Integer.parseInt(historicalInfoParams[2]));
+                            break;
+                        case CombinedFormatUtils.NOT_A_WORD_TAG:
+                            isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]);
+                            break;
+                        case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG:
+                            isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]);
+                            break;
                     }
                 }
             } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
@@ -190,7 +201,7 @@ public class CombinedInputOutput {
         }
         if (null != word) {
             dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
-                    false /* isPossiblyOffensive */);
+                    isPossiblyOffensive);
             for (WeightedString s : bigrams) {
                 dict.setBigram(word, s.mWord, s.mProbabilityInfo);
             }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
index 0d93c7fa9..8fdf7633f 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CommandList.java
@@ -18,7 +18,9 @@ package com.android.inputmethod.latin.dicttool;
 
 public class CommandList {
     public static void populate() {
+        // TODO: Move some commands to native code.
         Dicttool.addCommand("info", Info.class);
+        Dicttool.addCommand("header", Header.class);
         Dicttool.addCommand("diff", Diff.class);
         Dicttool.addCommand("compress", Compress.Compressor.class);
         Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 8f9e4a3a6..6187853c8 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -120,7 +120,7 @@ public class DictionaryMaker {
             String inputCombined = null;
             String outputBinary = null;
             String outputCombined = null;
-            int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
+            int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202.
             // Don't use code point table by default.
             int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
 
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
new file mode 100644
index 000000000..ba96c0aeb
--- /dev/null
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Locale;
+
+public class Header extends Dicttool.Command {
+    public static final String COMMAND = "header";
+
+    public Header() {
+    }
+
+    @Override
+    public String getHelp() {
+        return COMMAND + " <filename>: prints the header contents of a dictionary file";
+    }
+
+    @Override
+    public void run() throws UnsupportedFormatException {
+        final boolean plumbing;
+        if (mArgs.length > 0 && "-p".equals(mArgs[0])) {
+            plumbing = true;
+            mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
+        } else {
+            plumbing = false;
+        }
+        if (mArgs.length < 1) {
+            throw new RuntimeException("Not enough arguments for command " + COMMAND);
+        }
+        final String filename = mArgs[0];
+        final File dictFile = new File(filename);
+        final DecoderChainSpec<DictionaryHeader> spec =
+                BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile,
+                        new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+        if (null == spec) {
+            throw new UnsupportedFormatException(filename
+                    + " doesn't seem to be a valid version 2 dictionary file");
+        }
+
+        final DictionaryHeader header = spec.mResult;
+        System.out.println("Dictionary : " + dictFile.getAbsolutePath());
+        System.out.println("Size : " + dictFile.length() + " bytes");
+        System.out.println("Format : Binary dictionary format");
+        System.out.println("Format version : " + header.mFormatOptions.mVersion);
+        System.out.println("Packaging : " + spec.describeChain());
+        System.out.println("Header attributes :");
+        System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing));
+    }
+}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
index 47ea70629..3efa10a80 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
@@ -16,6 +16,8 @@
 
 package com.android.inputmethod.latin.dicttool;
 
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -77,16 +79,16 @@ public class Package {
             if (mArgs.length != 2) {
                 throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
             }
-            final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec =
-                    BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
+            final BinaryDictOffdeviceUtils.DecoderChainSpec<DictionaryHeader> decodedSpec =
+                    BinaryDictOffdeviceUtils.decodeDictionaryForProcess(new File(mArgs[0]),
+                            new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
             if (null == decodedSpec) {
                 System.out.println(mArgs[0] + " does not seem to be a dictionary");
                 return;
             }
             System.out.println("Packaging : " + decodedSpec.describeChain());
-            System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
             try (
-                final InputStream input = getFileInputStream(decodedSpec.mFile);
+                final InputStream input = decodedSpec.getStream(new File(mArgs[0]));
                 final OutputStream output = new BufferedOutputStream(
                         getFileOutputStreamOrStdOut(mArgs[1]))
             ) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index b6383d788..e2dd5199b 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -16,10 +16,10 @@
 
 package com.android.inputmethod.latin.dicttool;
 
+import com.android.inputmethod.latin.common.FileUtils;
 import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
 import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
 import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
-import com.android.inputmethod.latin.utils.FileUtils;
 
 import java.io.File;
 import java.io.IOException;
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
index 6cdbff7e5..ea9d4cc19 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -16,10 +16,17 @@
 
 package com.android.inputmethod.latin.dicttool;
 
+import com.android.inputmethod.latin.common.CodePointUtils;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils;
+import com.android.inputmethod.latin.dicttool.Compress;
+import com.android.inputmethod.latin.dicttool.Crypt;
+import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
+import com.android.inputmethod.latin.makedict.BinaryDictUtils;
 import com.android.inputmethod.latin.makedict.DictDecoder;
 import com.android.inputmethod.latin.makedict.DictEncoder;
 import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary;
@@ -35,13 +42,37 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
 
 /**
  * Unit tests for BinaryDictOffdeviceUtils
  */
 public class BinaryDictOffdeviceUtilsTests extends TestCase {
     private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
+    private static final int CODE_POINT_SET_SIZE = 300;
+    final Random mRandom;
+    private static final ArrayList<String> sWords = new ArrayList<>();
+
+    public BinaryDictOffdeviceUtilsTests(final long seed, final int maxUnigrams) {
+        super();
+        mRandom = new Random(seed);
+        sWords.clear();
+        generateWords(maxUnigrams, mRandom);
+    }
+
+    private static void generateWords(final int maxUnigrams, final Random random) {
+        final int[] codePointSet = CodePointUtils.generateCodePointSet(
+                CODE_POINT_SET_SIZE, random);
+        final Set<String> wordSet = new HashSet<>();
+        while (wordSet.size() < maxUnigrams) {
+            wordSet.add(CodePointUtils.generateWord(random, codePointSet));
+        }
+        sWords.addAll(wordSet);
+    }
 
     public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
         final String VERSION = "1";
@@ -68,23 +99,17 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
         final File dst = File.createTempFile("testGetRawDict", ".tmp");
         dst.deleteOnExit();
         try (final OutputStream out = Compress.getCompressedStream(
-                Compress.getCompressedStream(
-                        Compress.getCompressedStream(
-                                new BufferedOutputStream(new FileOutputStream(dst)))))) {
+                new BufferedOutputStream(new FileOutputStream(dst)))) {
             final DictEncoder dictEncoder = new Ver2DictEncoder(out);
-            dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
+            dictEncoder.writeDictionary(dict, new FormatOptions(FormatSpec.VERSION202, false));
         }
 
         // Test for an actually compressed dictionary and its contents
-        final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
+        final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodeSpec =
                 BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
-        for (final int step : decodeSpec.mDecoderSpec) {
-            assertEquals("Wrong decode spec",
-                    BinaryDictOffdeviceUtils.DecoderChainSpec.COMPRESSION, step);
-        }
-        assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.length);
-        final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0,
-                decodeSpec.mFile.length());
+        assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
+        final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mResult, 0,
+                decodeSpec.mResult.length());
         final FusionDictionary resultDict =
                 dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
         assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get(
@@ -125,4 +150,64 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
         assertNull("Wrongly identified data file",
                 BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst));
     }
+
+    public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt)
+            throws IOException, UnsupportedFormatException {
+        final String dictName = "testHeaderReaderProcessor";
+        final String dictVersion = Long.toString(System.currentTimeMillis());
+        final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
+        final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5;
+        final HashMap<String, String> options = new HashMap<>();
+        // Required attributes
+        options.put("dictionary", "main:en_US");
+        options.put("locale", "en_US");
+        options.put("version", Integer.toString(mRandom.nextInt()));
+        // Add some random options for test
+        final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1);
+        for (int i = 0; i < numberOfOptionsToAdd; ++i) {
+            options.put(sWords.get(2 * i), sWords.get(2 * 1 + 1));
+        }
+        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
+                new DictionaryOptions(options));
+
+        for (int i = 0; i < sWords.size(); ++i) {
+            final String word = sWords.get(i);
+            dict.add(word, new ProbabilityInfo(TEST_FREQ), null /* shortcuts */,
+                    false /* isNotAWord */, false /* isPossiblyOffensive */);
+        }
+
+        File file = File.createTempFile(dictName, ".tmp");
+        final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
+        dictEncoder.writeDictionary(dict, formatOptions);
+
+        if (compress) {
+            final File rawFile = file;
+            file = File.createTempFile(dictName + ".compress", ".tmp");
+            final Compress.Compressor compressCommand = new Compress.Compressor();
+            compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
+            compressCommand.run();
+        }
+        if (crypt) {
+            final File rawFile = file;
+            file = File.createTempFile(dictName + ".crypt", ".tmp");
+            final Crypt.Encrypter cryptCommand = new Crypt.Encrypter();
+            cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
+            cryptCommand.run();
+        }
+
+        final DecoderChainSpec<DictionaryHeader> spec =
+                BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file,
+                        new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
+        assertNotNull("Can't decode a dictionary we just wrote : " + file, spec);
+        final DictionaryHeader header = spec.mResult;
+        assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""),
+                spec.describeChain());
+        assertEquals(header.mDictionaryOptions.mAttributes, options);
+    }
+
+    public void testHeaderReaderProcessor() throws IOException, UnsupportedFormatException {
+        runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */);
+        runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */);
+        runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */);
+    }
 }