6 files changed, 1157 insertions, 424 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
new file mode 100644
index 000000000..397532933
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.Constants;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.Stack;
+
+public class BinaryDictIOUtils {
+    private static final boolean DBG = false;
+
+    private static class Position {
+        public static final int NOT_READ_GROUPCOUNT = -1;
+
+        public int mAddress;
+        public int mNumOfCharGroup;
+        public int mPosition;
+        public int mLength;
+
+        public Position(int address, int length) {
+            mAddress = address;
+            mLength = length;
+            mNumOfCharGroup = NOT_READ_GROUPCOUNT;
+        }
+    }
+
+    /**
+     * Tours all node without recursive call.
+     */
+    private static void readUnigramsAndBigramsBinaryInner(
+            final FusionDictionaryBufferInterface buffer, final int headerSize,
+            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
+            final Map<Integer, ArrayList<PendingAttribute>> bigrams,
+            final FormatOptions formatOptions) {
+        int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1];
+
+        Stack<Position> stack = new Stack<Position>();
+        int index = 0;
+
+        Position initPos = new Position(headerSize, 0);
+        stack.push(initPos);
+
+        while (!stack.empty()) {
+            Position p = stack.peek();
+
+            if (DBG) {
+                MakedictLog.d("read: address=" + p.mAddress + ", numOfCharGroup=" +
+                        p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
+            }
+
+            if (buffer.position() != p.mAddress) buffer.position(p.mAddress);
+            if (index != p.mLength) index = p.mLength;
+
+            if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
+                p.mNumOfCharGroup = BinaryDictInputOutput.readCharGroupCount(buffer);
+                p.mAddress += BinaryDictInputOutput.getGroupCountSize(p.mNumOfCharGroup);
+                p.mPosition = 0;
+            }
+
+            CharGroupInfo info = BinaryDictInputOutput.readCharGroup(buffer,
+                    p.mAddress - headerSize, formatOptions);
+            for (int i = 0; i < info.mCharacters.length; ++i) {
+                pushedChars[index++] = info.mCharacters[i];
+            }
+            p.mPosition++;
+
+            if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word
+                words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
+                frequencies.put(info.mOriginalAddress, info.mFrequency);
+                if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
+            }
+
+            if (p.mPosition == p.mNumOfCharGroup) {
+                if (formatOptions.mHasLinkedListNode) {
+                    final int forwardLinkAddress = buffer.readUnsignedInt24();
+                    if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
+                        // the node has a forward link.
+                        p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
+                        p.mAddress = forwardLinkAddress;
+                    } else {
+                        stack.pop();
+                    }
+                } else {
+                    stack.pop();
+                }
+            } else {
+                // the node has more groups.
+                p.mAddress = buffer.position();
+            }
+
+            if (BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) {
+                Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
+                stack.push(childrenPos);
+            }
+        }
+    }
+
+    /**
+     * Reads unigrams and bigrams from the binary file.
+     * Doesn't make the memory representation of the dictionary.
+     *
+     * @param buffer the buffer to read.
+     * @param words the map to store the address as a key and the word as a value.
+     * @param frequencies the map to store the address as a key and the frequency as a value.
+     * @param bigrams the map to store the address as a key and the list of address as a value.
+     * @throws IOException
+     * @throws UnsupportedFormatException
+     */
+    public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer,
+            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
+            final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
+            UnsupportedFormatException {
+        // Read header
+        final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
+        readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams,
+                header.mFormatOptions);
+    }
+
+    /**
+     * Gets the address of the last CharGroup of the exact matching word in the dictionary.
+     * If no match is found, returns NOT_VALID_WORD.
+     *
+     * @param buffer the buffer to read.
+     * @param word the word we search for.
+     * @return the address of the terminal node.
+     * @throws IOException
+     * @throws UnsupportedFormatException
+     */
+    public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
+            final String word) throws IOException, UnsupportedFormatException {
+        if (word == null) return FormatSpec.NOT_VALID_WORD;
+        if (buffer.position() != 0) buffer.position(0);
+
+        final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
+        int wordPos = 0;
+        final int wordLen = word.codePointCount(0, word.length());
+        for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) {
+            if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
+            int groupOffset = buffer.position() - header.mHeaderSize;
+            final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer);
+            groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount);
+
+            for (int i = 0; i < charGroupCount; ++i) {
+                final int charGroupPos = buffer.position();
+                final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
+                        buffer.position(), header.mFormatOptions);
+                boolean same = true;
+                for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
+                        p < currentInfo.mCharacters.length;
+                        ++p, j = word.offsetByCodePoints(j, 1)) {
+                    if (wordPos + p >= wordLen
+                            || word.codePointAt(j) != currentInfo.mCharacters[p]) {
+                        same = false;
+                        break;
+                    }
+                }
+
+                if (same) {
+                    if (wordPos + currentInfo.mCharacters.length == wordLen) {
+                        if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) {
+                            return FormatSpec.NOT_VALID_WORD;
+                        } else {
+                            return charGroupPos;
+                        }
+                    }
+                    wordPos += currentInfo.mCharacters.length;
+                    if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
+                        return FormatSpec.NOT_VALID_WORD;
+                    }
+                    buffer.position(currentInfo.mChildrenAddress);
+                    break;
+                }
+                groupOffset = currentInfo.mEndAddress;
+
+                // not found
+                if (i >= charGroupCount - 1) {
+                    return FormatSpec.NOT_VALID_WORD;
+                }
+            }
+        }
+        return FormatSpec.NOT_VALID_WORD;
+    }
+
+    /**
+     * Delete the word from the binary file.
+     *
+     * @param buffer the buffer to write.
+     * @param word the word we delete
+     * @throws IOException
+     * @throws UnsupportedFormatException
+     */
+    public static void deleteWord(final FusionDictionaryBufferInterface buffer,
+            final String word) throws IOException, UnsupportedFormatException {
+        buffer.position(0);
+        final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
+        final int wordPosition = getTerminalPosition(buffer, word);
+        if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
+
+        buffer.position(wordPosition);
+        final int flags = buffer.readUnsignedByte();
+        final int newFlags = flags ^ FormatSpec.FLAG_IS_TERMINAL;
+        buffer.position(wordPosition);
+        buffer.put((byte)newFlags);
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 89c59f809..7b8dc5cc5 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -16,21 +16,27 @@
 
 package com.android.inputmethod.latin.makedict;
 
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
 import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Stack;
 import java.util.TreeMap;
 
 /**
@@ -40,143 +46,7 @@ import java.util.TreeMap;
  */
 public class BinaryDictInputOutput {
 
-    final static boolean DBG = MakedictLog.DBG;
-
-    /* Node layout is as follows:
-     *   | addressType                         xx     : mask with MASK_GROUP_ADDRESS_TYPE
-     *                                 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
-     * f |                                     01 = 1 byte      : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
-     * l |                                     10 = 2 bytes     : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
-     * a |                                     11 = 3 bytes     : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
-     * g | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
-     * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
-     *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
-     *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
-     *
-     * c | IF FLAG_HAS_MULTIPLE_CHARS
-     * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
-     * a |   end                       1 byte, = 0
-     * r | ELSE
-     * s |   char                      1 or 3 bytes
-     *   | END
-     *
-     * f |
-     * r | IF FLAG_IS_TERMINAL
-     * e |   frequency                 1 byte
-     * q |
-     *
-     * c | IF 00 = FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = addressType
-     * h |   // nothing
-     * i | ELSIF 01 = FLAG_GROUP_ADDRESS_TYPE_ONEBYTE == addressType
-     * l |   children address, 1 byte
-     * d | ELSIF 10 = FLAG_GROUP_ADDRESS_TYPE_TWOBYTES == addressType
-     * r |   children address, 2 bytes
-     * e | ELSE // 11 = FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = addressType
-     * n |   children address, 3 bytes
-     * A | END
-     * d
-     * dress
-     *
-     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
-     *   | shortcut string list
-     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS
-     *   | bigrams address list
-     *
-     * Char format is:
-     * 1 byte = bbbbbbbb match
-     * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
-     * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
-     *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
-     *       00011111 would be outside unicode.
-     * else: iso-latin-1 code
-     * This allows for the whole unicode range to be encoded, including chars outside of
-     * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
-     * characters which should never happen anyway (and still work, but take 3 bytes).
-     *
-     * bigram address list is:
-     * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no     : FLAG_ATTRIBUTE_HAS_NEXT
-     *           | addressSign = 1 bit,                 : FLAG_ATTRIBUTE_OFFSET_NEGATIVE
-     *           |                      1 = must take -address, 0 = must take +address
-     *           |                         xx : mask with MASK_ATTRIBUTE_ADDRESS_TYPE
-     *           | addressFormat = 2 bits, 00 = unused  : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE
-     *           |                         01 = 1 byte  : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE
-     *           |                         10 = 2 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES
-     *           |                         11 = 3 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES
-     *           | 4 bits : frequency         : mask with FLAG_ATTRIBUTE_FREQUENCY
-     * <address> | IF (01 == FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE == addressFormat)
-     *           |   read 1 byte, add top 4 bits
-     *           | ELSIF (10 == FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES == addressFormat)
-     *           |   read 2 bytes, add top 4 bits
-     *           | ELSE // 11 == FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES == addressFormat
-     *           |   read 3 bytes, add top 4 bits
-     *           | END
-     *           | if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE) then address = -address
-     * if (FLAG_ATTRIBUTE_HAS_NEXT) goto bigram_and_shortcut_address_list_is
-     *
-     * shortcut string list is:
-     * <byte size> = GROUP_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes.
-     * <flags>     = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT
-     *               | reserved = 3 bits, must be 0
-     *               | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY
-     * <shortcut>  = | string of characters at the char format described above, with the terminator
-     *               | used to signal the end of the string.
-     * if (FLAG_ATTRIBUTE_HAS_NEXT goto flags
-     */
-
-    private static final int VERSION_1_MAGIC_NUMBER = 0x78B1;
-    private static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
-    private static final int MINIMUM_SUPPORTED_VERSION = 1;
-    private static final int MAXIMUM_SUPPORTED_VERSION = 2;
-    private static final int NOT_A_VERSION_NUMBER = -1;
-    private static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
-
-    // These options need to be the same numeric values as the one in the native reading code.
-    private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-    private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-    private static final int CONTAINS_BIGRAMS_FLAG = 0x8;
-
-    // TODO: Make this value adaptative to content data, store it in the header, and
-    // use it in the reading code.
-    private static final int MAX_WORD_LENGTH = 48;
-
-    private static final int MASK_GROUP_ADDRESS_TYPE = 0xC0;
-    private static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
-    private static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
-    private static final int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
-    private static final int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
-
-    private static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
-
-    private static final int FLAG_IS_TERMINAL = 0x10;
-    private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
-    private static final int FLAG_HAS_BIGRAMS = 0x04;
-
-    private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
-    private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-    private static final int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-    private static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-    private static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-    private static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-    private static final int FLAG_ATTRIBUTE_FREQUENCY = 0x0F;
-
-    private static final int GROUP_CHARACTERS_TERMINATOR = 0x1F;
-
-    private static final int GROUP_TERMINATOR_SIZE = 1;
-    private static final int GROUP_FLAGS_SIZE = 1;
-    private static final int GROUP_FREQUENCY_SIZE = 1;
-    private static final int GROUP_MAX_ADDRESS_SIZE = 3;
-    private static final int GROUP_ATTRIBUTE_FLAGS_SIZE = 1;
-    private static final int GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
-    private static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2;
-
-    private static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
-    private static final int INVALID_CHARACTER = -1;
-
-    private static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
-    private static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
-
-    private static final int MAX_TERMINAL_FREQUENCY = 255;
-    private static final int MAX_BIGRAM_FREQUENCY = 15;
+    private static final boolean DBG = MakedictLog.DBG;
 
     // Arbitrary limit to how much passes we consider address size compression should
     // terminate in. At the time of this writing, our largest dictionary completes
@@ -185,6 +55,66 @@ public class BinaryDictInputOutput {
     // suspicion that a bug might be causing an infinite loop.
     private static final int MAX_PASSES = 24;
 
+    public interface FusionDictionaryBufferInterface {
+        public int readUnsignedByte();
+        public int readUnsignedShort();
+        public int readUnsignedInt24();
+        public int readInt();
+        public int position();
+        public void position(int newPosition);
+        public void put(final byte b);
+        public int limit();
+    }
+
+    public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
+        private ByteBuffer mBuffer;
+
+        public ByteBufferWrapper(final ByteBuffer buffer) {
+            mBuffer = buffer;
+        }
+
+        @Override
+        public int readUnsignedByte() {
+            return ((int)mBuffer.get()) & 0xFF;
+        }
+
+        @Override
+        public int readUnsignedShort() {
+            return ((int)mBuffer.getShort()) & 0xFFFF;
+        }
+
+        @Override
+        public int readUnsignedInt24() {
+            final int retval = readUnsignedByte();
+            return (retval << 16) + readUnsignedShort();
+        }
+
+        @Override
+        public int readInt() {
+            return mBuffer.getInt();
+        }
+
+        @Override
+        public int position() {
+            return mBuffer.position();
+        }
+
+        @Override
+        public void position(int newPos) {
+            mBuffer.position(newPos);
+        }
+
+        @Override
+        public void put(final byte b) {
+            mBuffer.put(b);
+        }
+
+        @Override
+        public int limit() {
+            return mBuffer.limit();
+        }
+    }
+
     /**
      * A class grouping utility function for our specific character encoding.
      */
@@ -196,7 +126,7 @@ public class BinaryDictInputOutput {
         /**
          * Helper method to find out whether this code fits on one byte
          */
-        private static boolean fitsOnOneByte(int character) {
+        private static boolean fitsOnOneByte(final int character) {
             return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
                     && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
         }
@@ -218,10 +148,10 @@ public class BinaryDictInputOutput {
          * @param character the character code.
          * @return the size in binary encoded-form, either 1 or 3 bytes.
          */
-        private static int getCharSize(int character) {
+        private static int getCharSize(final int character) {
             // See char encoding in FusionDictionary.java
             if (fitsOnOneByte(character)) return 1;
-            if (INVALID_CHARACTER == character) return 1;
+            if (FormatSpec.INVALID_CHARACTER == character) return 1;
             return 3;
         }
 
@@ -279,7 +209,7 @@ public class BinaryDictInputOutput {
                     buffer[index++] = (byte)(0xFF & codePoint);
                 }
             }
-            buffer[index++] = GROUP_CHARACTERS_TERMINATOR;
+            buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
             return index - origin;
         }
 
@@ -291,7 +221,7 @@ public class BinaryDictInputOutput {
          * @param buffer the ByteArrayOutputStream to write to.
          * @param word the string to write.
          */
-        private static void writeString(ByteArrayOutputStream buffer, final String word) {
+        private static void writeString(final ByteArrayOutputStream buffer, final String word) {
             final int length = word.length();
             for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
                 final int codePoint = word.codePointAt(i);
@@ -303,37 +233,38 @@ public class BinaryDictInputOutput {
                     buffer.write((byte) (0xFF & codePoint));
                 }
             }
-            buffer.write(GROUP_CHARACTERS_TERMINATOR);
+            buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
         }
 
         /**
-         * Reads a string from a RandomAccessFile. This is the converse of the above method.
+         * Reads a string from a buffer. This is the converse of the above method.
          */
-        private static String readString(final RandomAccessFile source) throws IOException {
+        private static String readString(final FusionDictionaryBufferInterface buffer) {
             final StringBuilder s = new StringBuilder();
-            int character = readChar(source);
-            while (character != INVALID_CHARACTER) {
+            int character = readChar(buffer);
+            while (character != FormatSpec.INVALID_CHARACTER) {
                 s.appendCodePoint(character);
-                character = readChar(source);
+                character = readChar(buffer);
             }
             return s.toString();
         }
 
         /**
-         * Reads a character from the file.
+         * Reads a character from the buffer.
          *
          * This follows the character format documented earlier in this source file.
          *
-         * @param source the file, positioned over an encoded character.
+         * @param buffer the buffer, positioned over an encoded character.
          * @return the character code.
          */
-        private static int readChar(RandomAccessFile source) throws IOException {
-            int character = source.readUnsignedByte();
+        private static int readChar(final FusionDictionaryBufferInterface buffer) {
+            int character = buffer.readUnsignedByte();
             if (!fitsOnOneByte(character)) {
-                if (GROUP_CHARACTERS_TERMINATOR == character)
-                    return INVALID_CHARACTER;
+                if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
+                    return FormatSpec.INVALID_CHARACTER;
+                }
                 character <<= 16;
-                character += source.readUnsignedShort();
+                character += buffer.readUnsignedShort();
             }
             return character;
         }
@@ -348,9 +279,9 @@ public class BinaryDictInputOutput {
      * @param group the group
      * @return the size of the char array, including the terminator if any
      */
-    private static int getGroupCharactersSize(CharGroup group) {
+    private static int getGroupCharactersSize(final CharGroup group) {
         int size = CharEncoding.getCharArraySize(group.mChars);
-        if (group.hasSeveralChars()) size += GROUP_TERMINATOR_SIZE;
+        if (group.hasSeveralChars()) size += FormatSpec.GROUP_TERMINATOR_SIZE;
         return size;
     }
 
@@ -359,14 +290,15 @@ public class BinaryDictInputOutput {
      * @param count the group count
      * @return the size of the group count, either 1 or 2 bytes.
      */
-    private static int getGroupCountSize(final int count) {
-        if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
+    public static int getGroupCountSize(final int count) {
+        if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
             return 1;
-        } else if (MAX_CHARGROUPS_IN_A_NODE >= count) {
+        } else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) {
             return 2;
         } else {
-            throw new RuntimeException("Can't have more than " + MAX_CHARGROUPS_IN_A_NODE
-                    + " groups in a node (found " + count +")");
+            throw new RuntimeException("Can't have more than "
+                    + FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count
+                    + ")");
         }
     }
 
@@ -383,14 +315,14 @@ public class BinaryDictInputOutput {
      * Compute the size of a shortcut in bytes.
      */
     private static int getShortcutSize(final WeightedString shortcut) {
-        int size = GROUP_ATTRIBUTE_FLAGS_SIZE;
+        int size = FormatSpec.GROUP_ATTRIBUTE_FLAGS_SIZE;
         final String word = shortcut.mWord;
         final int length = word.length();
         for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
             final int codePoint = word.codePointAt(i);
             size += CharEncoding.getCharSize(codePoint);
         }
-        size += GROUP_TERMINATOR_SIZE;
+        size += FormatSpec.GROUP_TERMINATOR_SIZE;
         return size;
     }
 
@@ -402,7 +334,7 @@ public class BinaryDictInputOutput {
      */
     private static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
         if (null == shortcutList) return 0;
-        int size = GROUP_SHORTCUT_LIST_SIZE_SIZE;
+        int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
         for (final WeightedString shortcut : shortcutList) {
             size += getShortcutSize(shortcut);
         }
@@ -413,16 +345,18 @@ public class BinaryDictInputOutput {
      * Compute the maximum size of a CharGroup, assuming 3-byte addresses for everything.
      *
      * @param group the CharGroup to compute the size of.
+     * @param options file format options.
      * @return the maximum size of the group.
      */
-    private static int getCharGroupMaximumSize(CharGroup group) {
-        int size = getGroupCharactersSize(group) + GROUP_FLAGS_SIZE;
+    private static int getCharGroupMaximumSize(final CharGroup group, final FormatOptions options) {
+        int size = getGroupHeaderSize(group, options);
         // If terminal, one byte for the frequency
-        if (group.isTerminal()) size += GROUP_FREQUENCY_SIZE;
-        size += GROUP_MAX_ADDRESS_SIZE; // For children address
+        if (group.isTerminal()) size += FormatSpec.GROUP_FREQUENCY_SIZE;
+        size += FormatSpec.GROUP_MAX_ADDRESS_SIZE; // For children address
         size += getShortcutListSize(group.mShortcutTargets);
         if (null != group.mBigrams) {
-            size += (GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE)
+            size += (FormatSpec.GROUP_ATTRIBUTE_FLAGS_SIZE
+                    + FormatSpec.GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE)
                     * group.mBigrams.size();
         }
         return size;
@@ -433,22 +367,49 @@ public class BinaryDictInputOutput {
      * it in the 'actualSize' member of the node.
      *
      * @param node the node to compute the maximum size of.
+     * @param options file format options.
      */
-    private static void setNodeMaximumSize(Node node) {
+    private static void setNodeMaximumSize(final Node node, final FormatOptions options) {
         int size = getGroupCountSize(node);
         for (CharGroup g : node.mData) {
-            final int groupSize = getCharGroupMaximumSize(g);
+            final int groupSize = getCharGroupMaximumSize(g, options);
             g.mCachedSize = groupSize;
             size += groupSize;
         }
+        if (options.mHasLinkedListNode) {
+            size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
+        }
         node.mCachedSize = size;
     }
 
     /**
      * Helper method to hide the actual value of the no children address.
      */
-    private static boolean hasChildrenAddress(int address) {
-        return NO_CHILDREN_ADDRESS != address;
+    public static boolean hasChildrenAddress(final int address) {
+        return FormatSpec.NO_CHILDREN_ADDRESS != address;
+    }
+
+    /**
+     * Helper method to check whether the CharGroup has a parent address.
+     */
+    public static boolean hasParentAddress(final FormatOptions options) {
+        return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_PARENT_ADDRESS
+                && options.mHasParentAddress;
+    }
+
+    /**
+     * Compute the size of the header (flag + [parent address] + characters size) of a CharGroup.
+     *
+     * @param group the group of which to compute the size of the header
+     * @param options file format options.
+     */
+    private static int getGroupHeaderSize(final CharGroup group, final FormatOptions options) {
+        if (hasParentAddress(options)) {
+            return FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
+                    + getGroupCharactersSize(group);
+        } else {
+            return FormatSpec.GROUP_FLAGS_SIZE + getGroupCharactersSize(group);
+        }
     }
 
     /**
@@ -461,7 +422,7 @@ public class BinaryDictInputOutput {
      * @param address the address
      * @return the byte size.
      */
-    private static int getByteSize(int address) {
+    private static int getByteSize(final int address) {
         assert(address < 0x1000000);
         if (!hasChildrenAddress(address)) {
             return 0;
@@ -477,14 +438,14 @@ public class BinaryDictInputOutput {
 
     // This method is responsible for finding a nice ordering of the nodes that favors run-time
     // cache performance and dictionary size.
-    /* package for tests */ static ArrayList<Node> flattenTree(Node root) {
+    /* package for tests */ static ArrayList<Node> flattenTree(final Node root) {
         final int treeSize = FusionDictionary.countCharGroups(root);
         MakedictLog.i("Counted nodes : " + treeSize);
         final ArrayList<Node> flatTree = new ArrayList<Node>(treeSize);
         return flattenTreeInner(flatTree, root);
     }
 
-    private static ArrayList<Node> flattenTreeInner(ArrayList<Node> list, Node node) {
+    private static ArrayList<Node> flattenTreeInner(final ArrayList<Node> list, final Node node) {
         // Removing the node is necessary if the tails are merged, because we would then
         // add the same node several times when we only want it once. A number of places in
         // the code also depends on any node being only once in the list.
@@ -534,9 +495,11 @@ public class BinaryDictInputOutput {
      *
      * @param node the node to compute the size of.
      * @param dict the dictionary in which the word/attributes are to be found.
+     * @param formatOptions file format options.
      * @return false if none of the cached addresses inside the node changed, true otherwise.
      */
-    private static boolean computeActualNodeSize(Node node, FusionDictionary dict) {
+    private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict,
+            final FormatOptions formatOptions) {
         boolean changed = false;
         int size = getGroupCountSize(node);
         for (CharGroup group : node.mData) {
@@ -544,26 +507,32 @@ public class BinaryDictInputOutput {
                 changed = true;
                 group.mCachedAddress = node.mCachedAddress + size;
             }
-            int groupSize = GROUP_FLAGS_SIZE + getGroupCharactersSize(group);
-            if (group.isTerminal()) groupSize += GROUP_FREQUENCY_SIZE;
+            int groupSize = getGroupHeaderSize(group, formatOptions);
+            if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
             if (null != group.mChildren) {
-                final int offsetBasePoint= groupSize + node.mCachedAddress + size;
+                final int offsetBasePoint = groupSize + node.mCachedAddress + size;
                 final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
+                // assign my address to children's parent address
+                group.mChildren.mCachedParentAddress = group.mCachedAddress
+                        - group.mChildren.mCachedAddress;
                 groupSize += getByteSize(offset);
             }
             groupSize += getShortcutListSize(group.mShortcutTargets);
             if (null != group.mBigrams) {
                 for (WeightedString bigram : group.mBigrams) {
                     final int offsetBasePoint = groupSize + node.mCachedAddress + size
-                            + GROUP_FLAGS_SIZE;
+                            + FormatSpec.GROUP_FLAGS_SIZE;
                     final int addressOfBigram = findAddressOfWord(dict, bigram.mWord);
                     final int offset = addressOfBigram - offsetBasePoint;
-                    groupSize += getByteSize(offset) + GROUP_FLAGS_SIZE;
+                    groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
                 }
             }
             group.mCachedSize = groupSize;
             size += groupSize;
         }
+        if (formatOptions.mHasLinkedListNode) {
+            size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
+        }
         if (node.mCachedSize != size) {
             node.mCachedSize = size;
             changed = true;
@@ -575,9 +544,11 @@ public class BinaryDictInputOutput {
      * Computes the byte size of a list of nodes and updates each node cached position.
      *
      * @param flatNodes the array of nodes.
+     * @param formatOptions file format options.
      * @return the byte size of the entire stack.
      */
-    private static int stackNodes(ArrayList<Node> flatNodes) {
+    private static int stackNodes(final ArrayList<Node> flatNodes,
+            final FormatOptions formatOptions) {
         int nodeOffset = 0;
         for (Node n : flatNodes) {
             n.mCachedAddress = nodeOffset;
@@ -587,7 +558,9 @@ public class BinaryDictInputOutput {
                 g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
                 groupOffset += g.mCachedSize;
             }
-            if (groupOffset + groupCountSize != n.mCachedSize) {
+            final int nodeSize = groupCountSize + groupOffset
+                    + (formatOptions.mHasLinkedListNode ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
+            if (nodeSize != n.mCachedSize) {
                 throw new RuntimeException("Bug : Stored and computed node size differ");
             }
             nodeOffset += n.mCachedSize;
@@ -607,13 +580,14 @@ public class BinaryDictInputOutput {
      *
      * @param dict the dictionary
      * @param flatNodes the ordered array of nodes
+     * @param formatOptions file format options.
      * @return the same array it was passed. The nodes have been updated for address and size.
      */
-    private static ArrayList<Node> computeAddresses(FusionDictionary dict,
-            ArrayList<Node> flatNodes) {
+    private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
+            final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
         // First get the worst sizes and offsets
-        for (Node n : flatNodes) setNodeMaximumSize(n);
-        final int offset = stackNodes(flatNodes);
+        for (Node n : flatNodes) setNodeMaximumSize(n, formatOptions);
+        final int offset = stackNodes(flatNodes, formatOptions);
 
         MakedictLog.i("Compressing the array addresses. Original size : " + offset);
         MakedictLog.i("(Recursively seen size : " + offset + ")");
@@ -624,12 +598,12 @@ public class BinaryDictInputOutput {
             changesDone = false;
             for (Node n : flatNodes) {
                 final int oldNodeSize = n.mCachedSize;
-                final boolean changed = computeActualNodeSize(n, dict);
+                final boolean changed = computeActualNodeSize(n, dict, formatOptions);
                 final int newNodeSize = n.mCachedSize;
                 if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
                 changesDone |= changed;
             }
-            stackNodes(flatNodes);
+            stackNodes(flatNodes, formatOptions);
             ++passes;
             if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
         } while (changesDone);
@@ -652,7 +626,7 @@ public class BinaryDictInputOutput {
      *
      * @param array the array node to check
      */
-    private static void checkFlatNodeArray(ArrayList<Node> array) {
+    private static void checkFlatNodeArray(final ArrayList<Node> array) {
         int offset = 0;
         int index = 0;
         for (Node n : array) {
@@ -697,20 +671,20 @@ public class BinaryDictInputOutput {
     private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
             final int childrenOffset) {
         byte flags = 0;
-        if (group.mChars.length > 1) flags |= FLAG_HAS_MULTIPLE_CHARS;
+        if (group.mChars.length > 1) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
         if (group.mFrequency >= 0) {
-            flags |= FLAG_IS_TERMINAL;
+            flags |= FormatSpec.FLAG_IS_TERMINAL;
         }
         if (null != group.mChildren) {
             switch (getByteSize(childrenOffset)) {
              case 1:
-                 flags |= FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
+                 flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
                  break;
              case 2:
-                 flags |= FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
+                 flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
                  break;
              case 3:
-                 flags |= FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
+                 flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
                  break;
              default:
                  throw new RuntimeException("Node with a strange address");
@@ -720,13 +694,19 @@ public class BinaryDictInputOutput {
             if (DBG && 0 == group.mShortcutTargets.size()) {
                 throw new RuntimeException("0-sized shortcut list must be null");
             }
-            flags |= FLAG_HAS_SHORTCUT_TARGETS;
+            flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
         }
         if (null != group.mBigrams) {
             if (DBG && 0 == group.mBigrams.size()) {
                 throw new RuntimeException("0-sized bigram list must be null");
             }
-            flags |= FLAG_HAS_BIGRAMS;
+            flags |= FormatSpec.FLAG_HAS_BIGRAMS;
+        }
+        if (group.mIsNotAWord) {
+            flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
+        }
+        if (group.mIsBlacklistEntry) {
+            flags |= FormatSpec.FLAG_IS_BLACKLISTED;
         }
         return flags;
     }
@@ -743,17 +723,17 @@ public class BinaryDictInputOutput {
      */
     private static final int makeBigramFlags(final boolean more, final int offset,
             int bigramFrequency, final int unigramFrequency, final String word) {
-        int bigramFlags = (more ? FLAG_ATTRIBUTE_HAS_NEXT : 0)
-                + (offset < 0 ? FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0);
+        int bigramFlags = (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0)
+                + (offset < 0 ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0);
         switch (getByteSize(offset)) {
         case 1:
-            bigramFlags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+            bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
             break;
         case 2:
-            bigramFlags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+            bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
             break;
         case 3:
-            bigramFlags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+            bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
             break;
         default:
             throw new RuntimeException("Strange offset size");
@@ -783,13 +763,14 @@ public class BinaryDictInputOutput {
         // their lower bound and exclude their higher bound so we need to have the first step
         // start at exactly 1 unit higher than floor(unigramFreq + half a step).
         // Note : to reconstruct the score, the dictionary reader will need to divide
-        // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise, and add
-        // (discretizedFrequency + 0.5) times this value to get the median value of the step,
-        // which is the best approximation. This is how we get the most precise result with
-        // only four bits.
-        final double stepSize =
-                (double)(MAX_TERMINAL_FREQUENCY - unigramFrequency) / (1.5 + MAX_BIGRAM_FREQUENCY);
-        final double firstStepStart = 1 + unigramFrequency + (stepSize / 2.0);
+        // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise to get the value of the step,
+        // and add (discretizedFrequency + 0.5 + 0.5) times this value to get the best
+        // approximation. (0.5 to get the first step start, and 0.5 to get the middle of the
+        // step pointed by the discretized frequency.
+        final float stepSize =
+                (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
+                / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
+        final float firstStepStart = 1 + unigramFrequency + (stepSize / 2.0f);
         final int discretizedFrequency = (int)((bigramFrequency - firstStepStart) / stepSize);
         // If the bigram freq is less than half-a-step higher than the unigram freq, we get -1
         // here. The best approximation would be the unigram freq itself, so we should not
@@ -797,19 +778,22 @@ public class BinaryDictInputOutput {
         // small over-estimation that we get in this case. TODO: actually remove this bigram
         // if discretizedFrequency < 0.
         final int finalBigramFrequency = discretizedFrequency > 0 ? discretizedFrequency : 0;
-        bigramFlags += finalBigramFrequency & FLAG_ATTRIBUTE_FREQUENCY;
+        bigramFlags += finalBigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY;
         return bigramFlags;
     }
 
     /**
      * Makes the 2-byte value for options flags.
      */
-    private static final int makeOptionsValue(final FusionDictionary dictionary) {
+    private static final int makeOptionsValue(final FusionDictionary dictionary,
+            final FormatOptions formatOptions) {
         final DictionaryOptions options = dictionary.mOptions;
         final boolean hasBigrams = dictionary.hasBigrams();
-        return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0)
-                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0)
-                + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0);
+        return (options.mFrenchLigatureProcessing ? FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG : 0)
+                + (options.mGermanUmlautProcessing ? FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG : 0)
+                + (hasBigrams ? FormatSpec.CONTAINS_BIGRAMS_FLAG : 0)
+                + (formatOptions.mHasParentAddress ? FormatSpec.HAS_PARENT_ADDRESS : 0)
+                + (formatOptions.mHasLinkedListNode ? FormatSpec.HAS_LINKEDLIST_NODE : 0);
     }
 
     /**
@@ -820,7 +804,8 @@ public class BinaryDictInputOutput {
      * @return the flags
      */
     private static final int makeShortcutFlags(final boolean more, final int frequency) {
-        return (more ? FLAG_ATTRIBUTE_HAS_NEXT : 0) + (frequency & FLAG_ATTRIBUTE_FREQUENCY);
+        return (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0)
+                + (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
     }
 
     /**
@@ -832,13 +817,16 @@ public class BinaryDictInputOutput {
      * @param dict the dictionary the node is a part of (for relative offsets).
      * @param buffer the memory buffer to write to.
      * @param node the node to write.
+     * @param formatOptions file format options.
      * @return the address of the END of the node.
      */
-    private static int writePlacedNode(FusionDictionary dict, byte[] buffer, Node node) {
+    private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
+            final Node node, final FormatOptions formatOptions) {
         int index = node.mCachedAddress;
 
         final int groupCount = node.mData.size();
         final int countSize = getGroupCountSize(node);
+        final int parentAddress = node.mCachedParentAddress;
         if (1 == countSize) {
             buffer[index++] = (byte)groupCount;
         } else if (2 == countSize) {
@@ -855,20 +843,38 @@ public class BinaryDictInputOutput {
             if (index != group.mCachedAddress) throw new RuntimeException("Bug: write index is not "
                     + "the same as the cached address of the group : "
                     + index + " <> " + group.mCachedAddress);
-            groupAddress += GROUP_FLAGS_SIZE + getGroupCharactersSize(group);
+            groupAddress += getGroupHeaderSize(group, formatOptions);
             // Sanity checks.
-            if (DBG && group.mFrequency > MAX_TERMINAL_FREQUENCY) {
-                throw new RuntimeException("A node has a frequency > " + MAX_TERMINAL_FREQUENCY
+            if (DBG && group.mFrequency > FormatSpec.MAX_TERMINAL_FREQUENCY) {
+                throw new RuntimeException("A node has a frequency > "
+                        + FormatSpec.MAX_TERMINAL_FREQUENCY
                         + " : " + group.mFrequency);
             }
-            if (group.mFrequency >= 0) groupAddress += GROUP_FREQUENCY_SIZE;
+            if (group.mFrequency >= 0) groupAddress += FormatSpec.GROUP_FREQUENCY_SIZE;
             final int childrenOffset = null == group.mChildren
-                    ? NO_CHILDREN_ADDRESS : group.mChildren.mCachedAddress - groupAddress;
+                    ? FormatSpec.NO_CHILDREN_ADDRESS
+                            : group.mChildren.mCachedAddress - groupAddress;
             byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset);
             buffer[index++] = flags;
+
+            if (hasParentAddress(formatOptions)) {
+                if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
+                    // this node is the root node.
+                    buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
+                } else {
+                    // write parent address. (version 3)
+                    final int actualParentAddress = Math.abs(parentAddress
+                            + (node.mCachedAddress - group.mCachedAddress));
+                    buffer[index] = (byte)((actualParentAddress >> 16) & 0xFF);
+                    buffer[index + 1] = (byte)((actualParentAddress >> 8) & 0xFF);
+                    buffer[index + 2] = (byte)(actualParentAddress & 0xFF);
+                }
+                index += 3;
+            }
+
             index = CharEncoding.writeCharArray(group.mChars, buffer, index);
             if (group.hasSeveralChars()) {
-                buffer[index++] = GROUP_CHARACTERS_TERMINATOR;
+                buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
             }
             if (group.mFrequency >= 0) {
                 buffer[index++] = (byte) group.mFrequency;
@@ -880,8 +886,8 @@ public class BinaryDictInputOutput {
             // Write shortcuts
             if (null != group.mShortcutTargets) {
                 final int indexOfShortcutByteSize = index;
-                index += GROUP_SHORTCUT_LIST_SIZE_SIZE;
-                groupAddress += GROUP_SHORTCUT_LIST_SIZE_SIZE;
+                index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
+                groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
                 final Iterator<WeightedString> shortcutIterator = group.mShortcutTargets.iterator();
                 while (shortcutIterator.hasNext()) {
                     final WeightedString target = shortcutIterator.next();
@@ -921,6 +927,11 @@ public class BinaryDictInputOutput {
             }
 
         }
+        if (formatOptions.mHasLinkedListNode) {
+            buffer[index] = buffer[index + 1] = buffer[index + 2]
+                    = FormatSpec.NO_FORWARD_LINK_ADDRESS;
+            index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
+        }
         if (index != node.mCachedAddress + node.mCachedSize) throw new RuntimeException(
                 "Not the same size : written "
                 + (index - node.mCachedAddress) + " bytes out of a node that should have "
@@ -990,10 +1001,10 @@ public class BinaryDictInputOutput {
      *
      * @param destination the stream to write the binary data to.
      * @param dict the dictionary to write.
-     * @param version the version of the format to write, currently either 1 or 2.
+     * @param formatOptions file format options.
      */
     public static void writeDictionaryBinary(final OutputStream destination,
-            final FusionDictionary dict, final int version)
+            final FusionDictionary dict, final FormatOptions formatOptions)
             throws IOException, UnsupportedFormatException {
 
         // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
@@ -1002,36 +1013,39 @@ public class BinaryDictInputOutput {
         // does not have a size limit, each node must still be within 16MB of all its children and
         // parents. As long as this is ensured, the dictionary file may grow to any size.
 
-        if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
+        final int version = formatOptions.mVersion;
+        if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
+                || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
             throw new UnsupportedFormatException("Requested file format version " + version
                     + ", but this implementation only supports versions "
-                    + MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
+                    + FormatSpec.MINIMUM_SUPPORTED_VERSION + " through "
+                    + FormatSpec.MAXIMUM_SUPPORTED_VERSION);
         }
 
         ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
 
         // The magic number in big-endian order.
-        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
+        if (version >= FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) {
             // Magic number for version 2+.
-            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)));
-            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)));
-            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)));
-            headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER));
+            headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 24)));
+            headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 16)));
+            headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 8)));
+            headerBuffer.write((byte) (0xFF & FormatSpec.VERSION_2_MAGIC_NUMBER));
             // Dictionary version.
             headerBuffer.write((byte) (0xFF & (version >> 8)));
             headerBuffer.write((byte) (0xFF & version));
         } else {
             // Magic number for version 1.
-            headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)));
-            headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER));
+            headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_1_MAGIC_NUMBER >> 8)));
+            headerBuffer.write((byte) (0xFF & FormatSpec.VERSION_1_MAGIC_NUMBER));
             // Dictionary version.
             headerBuffer.write((byte) (0xFF & version));
         }
         // Options flags
-        final int options = makeOptionsValue(dict);
+        final int options = makeOptionsValue(dict, formatOptions);
         headerBuffer.write((byte) (0xFF & (options >> 8)));
         headerBuffer.write((byte) (0xFF & options));
-        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
+        if (version >= FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) {
             final int headerSizeOffset = headerBuffer.size();
             // Placeholder to be written later with header size.
             for (int i = 0; i < 4; ++i) {
@@ -1062,20 +1076,20 @@ public class BinaryDictInputOutput {
         ArrayList<Node> flatNodes = flattenTree(dict.mRoot);
 
         MakedictLog.i("Computing addresses...");
-        computeAddresses(dict, flatNodes);
+        computeAddresses(dict, flatNodes, formatOptions);
         MakedictLog.i("Checking array...");
         if (DBG) checkFlatNodeArray(flatNodes);
 
         // Create a buffer that matches the final dictionary size.
         final Node lastNode = flatNodes.get(flatNodes.size() - 1);
-        final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize);
+        final int bufferSize = lastNode.mCachedAddress + lastNode.mCachedSize;
         final byte[] buffer = new byte[bufferSize];
         int index = 0;
 
         MakedictLog.i("Writing file...");
         int dataEndOffset = 0;
         for (Node n : flatNodes) {
-            dataEndOffset = writePlacedNode(dict, buffer, n);
+            dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions);
         }
 
         if (DBG) showStatistics(flatNodes);
@@ -1090,113 +1104,127 @@ public class BinaryDictInputOutput {
     // Input methods: Read a binary dictionary to memory.
     // readDictionaryBinary is the public entry point for them.
 
-    static final int[] characterBuffer = new int[MAX_WORD_LENGTH];
-    private static CharGroupInfo readCharGroup(RandomAccessFile source,
-            final int originalGroupAddress) throws IOException {
+    private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
+    public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
+            final int originalGroupAddress, final FormatOptions options) {
         int addressPointer = originalGroupAddress;
-        final int flags = source.readUnsignedByte();
+        final int flags = buffer.readUnsignedByte();
         ++addressPointer;
+
+        final int parentAddress;
+        if (hasParentAddress(options)) {
+            // read the parent address. (version 3)
+            parentAddress = -buffer.readUnsignedInt24();
+            addressPointer += 3;
+        } else {
+            parentAddress = FormatSpec.NO_PARENT_ADDRESS;
+        }
+
         final int characters[];
-        if (0 != (flags & FLAG_HAS_MULTIPLE_CHARS)) {
+        if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
             int index = 0;
-            int character = CharEncoding.readChar(source);
+            int character = CharEncoding.readChar(buffer);
             addressPointer += CharEncoding.getCharSize(character);
             while (-1 != character) {
-                characterBuffer[index++] = character;
-                character = CharEncoding.readChar(source);
+                // FusionDictionary is making sure that the length of the word is smaller than
+                // MAX_WORD_LENGTH.
+                // So we'll never write past the end of CHARACTER_BUFFER.
+                CHARACTER_BUFFER[index++] = character;
+                character = CharEncoding.readChar(buffer);
                 addressPointer += CharEncoding.getCharSize(character);
             }
-            characters = Arrays.copyOfRange(characterBuffer, 0, index);
+            characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
         } else {
-            final int character = CharEncoding.readChar(source);
+            final int character = CharEncoding.readChar(buffer);
             addressPointer += CharEncoding.getCharSize(character);
             characters = new int[] { character };
         }
         final int frequency;
-        if (0 != (FLAG_IS_TERMINAL & flags)) {
+        if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
             ++addressPointer;
-            frequency = source.readUnsignedByte();
+            frequency = buffer.readUnsignedByte();
         } else {
             frequency = CharGroup.NOT_A_TERMINAL;
         }
         int childrenAddress = addressPointer;
-        switch (flags & MASK_GROUP_ADDRESS_TYPE) {
-        case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
-            childrenAddress += source.readUnsignedByte();
+        switch (flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
+        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+            childrenAddress += buffer.readUnsignedByte();
             addressPointer += 1;
             break;
-        case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
-            childrenAddress += source.readUnsignedShort();
+        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+            childrenAddress += buffer.readUnsignedShort();
             addressPointer += 2;
             break;
-        case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
-            childrenAddress += (source.readUnsignedByte() << 16) + source.readUnsignedShort();
+        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+            childrenAddress += buffer.readUnsignedInt24();
             addressPointer += 3;
             break;
-        case FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
+        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
         default:
-            childrenAddress = NO_CHILDREN_ADDRESS;
+            childrenAddress = FormatSpec.NO_CHILDREN_ADDRESS;
             break;
         }
         ArrayList<WeightedString> shortcutTargets = null;
-        if (0 != (flags & FLAG_HAS_SHORTCUT_TARGETS)) {
-            final long pointerBefore = source.getFilePointer();
+        if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
+            final int pointerBefore = buffer.position();
             shortcutTargets = new ArrayList<WeightedString>();
-            source.readUnsignedShort(); // Skip the size
+            buffer.readUnsignedShort(); // Skip the size
             while (true) {
-                final int targetFlags = source.readUnsignedByte();
-                final String word = CharEncoding.readString(source);
+                final int targetFlags = buffer.readUnsignedByte();
+                final String word = CharEncoding.readString(buffer);
                 shortcutTargets.add(new WeightedString(word,
-                        targetFlags & FLAG_ATTRIBUTE_FREQUENCY));
-                if (0 == (targetFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
+                        targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
+                if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
             }
-            addressPointer += (source.getFilePointer() - pointerBefore);
+            addressPointer += buffer.position() - pointerBefore;
         }
         ArrayList<PendingAttribute> bigrams = null;
-        if (0 != (flags & FLAG_HAS_BIGRAMS)) {
+        if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
             bigrams = new ArrayList<PendingAttribute>();
             while (true) {
-                final int bigramFlags = source.readUnsignedByte();
+                final int bigramFlags = buffer.readUnsignedByte();
                 ++addressPointer;
-                final int sign = 0 == (bigramFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1;
+                final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
+                        ? 1 : -1;
                 int bigramAddress = addressPointer;
-                switch (bigramFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
-                case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
-                    bigramAddress += sign * source.readUnsignedByte();
+                switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
+                case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+                    bigramAddress += sign * buffer.readUnsignedByte();
                     addressPointer += 1;
                     break;
-                case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
-                    bigramAddress += sign * source.readUnsignedShort();
+                case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+                    bigramAddress += sign * buffer.readUnsignedShort();
                     addressPointer += 2;
                     break;
-                case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
-                    final int offset = ((source.readUnsignedByte() << 16)
-                            + source.readUnsignedShort());
+                case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+                    final int offset = (buffer.readUnsignedByte() << 16)
+                            + buffer.readUnsignedShort();
                     bigramAddress += sign * offset;
                     addressPointer += 3;
                     break;
                 default:
                     throw new RuntimeException("Has bigrams with no address");
                 }
-                bigrams.add(new PendingAttribute(bigramFlags & FLAG_ATTRIBUTE_FREQUENCY,
+                bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
                         bigramAddress));
-                if (0 == (bigramFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
+                if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
             }
         }
         return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
-                childrenAddress, shortcutTargets, bigrams);
+                parentAddress, childrenAddress, shortcutTargets, bigrams);
     }
 
     /**
-     * Reads and returns the char group count out of a file and forwards the pointer.
+     * Reads and returns the char group count out of a buffer and forwards the pointer.
      */
-    private static int readCharGroupCount(RandomAccessFile source) throws IOException {
-        final int msb = source.readUnsignedByte();
-        if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
+    public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) {
+        final int msb = buffer.readUnsignedByte();
+        if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
             return msb;
         } else {
-            return ((MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
-                    + source.readUnsignedByte();
+            return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+                    + buffer.readUnsignedByte();
         }
     }
 
@@ -1204,31 +1232,73 @@ public class BinaryDictInputOutput {
     // of this method. Since it performs direct, unbuffered random access to the file and
     // may be called hundreds of thousands of times, the resulting performance is not
     // reasonable without some kind of cache. Thus:
-    // TODO: perform buffered I/O here and in other places in the code.
     private static TreeMap<Integer, String> wordCache = new TreeMap<Integer, String>();
     /**
      * Finds, as a string, the word at the address passed as an argument.
      *
-     * @param source the file to read from.
+     * @param buffer the buffer to read from.
      * @param headerSize the size of the header.
      * @param address the address to seek.
+     * @param formatOptions file format options.
      * @return the word, as a string.
-     * @throws IOException if the file can't be read.
      */
-    private static String getWordAtAddress(final RandomAccessFile source, final long headerSize,
-            int address) throws IOException {
+    /* packages for tests */ static String getWordAtAddress(
+            final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
+            final FormatOptions formatOptions) {
         final String cachedString = wordCache.get(address);
         if (null != cachedString) return cachedString;
-        final long originalPointer = source.getFilePointer();
-        source.seek(headerSize);
-        final int count = readCharGroupCount(source);
+
+        final String result;
+        final int originalPointer = buffer.position();
+
+        if (hasParentAddress(formatOptions)) {
+            result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
+        } else {
+            result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address,
+                    formatOptions);
+        }
+
+        wordCache.put(address, result);
+        buffer.position(originalPointer);
+        return result;
+    }
+
+    private static int[] sGetWordBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
+    private static String getWordAtAddressWithParentAddress(
+            final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
+            final FormatOptions options) {
+        final StringBuilder builder = new StringBuilder();
+
+        int currentAddress = address;
+        int index = FormatSpec.MAX_WORD_LENGTH - 1;
+        // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
+        for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
+            buffer.position(currentAddress + headerSize);
+            final CharGroupInfo currentInfo = readCharGroup(buffer, currentAddress, options);
+            for (int i = 0; i < currentInfo.mCharacters.length; ++i) {
+                sGetWordBuffer[index--] =
+                        currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
+            }
+
+            if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
+            currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
+        }
+
+        return new String(sGetWordBuffer, index + 1, FormatSpec.MAX_WORD_LENGTH - index - 1);
+    }
+
+    private static String getWordAtAddressWithoutParentAddress(
+            final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
+            final FormatOptions options) {
+        buffer.position(headerSize);
+        final int count = readCharGroupCount(buffer);
         int groupOffset = getGroupCountSize(count);
         final StringBuilder builder = new StringBuilder();
         String result = null;
 
         CharGroupInfo last = null;
         for (int i = count - 1; i >= 0; --i) {
-            CharGroupInfo info = readCharGroup(source, groupOffset);
+            CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
             groupOffset = info.mEndAddress;
             if (info.mOriginalAddress == address) {
                 builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
@@ -1239,9 +1309,9 @@ public class BinaryDictInputOutput {
                 if (info.mChildrenAddress > address) {
                     if (null == last) continue;
                     builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
-                    source.seek(last.mChildrenAddress + headerSize);
+                    buffer.position(last.mChildrenAddress + headerSize);
                     groupOffset = last.mChildrenAddress + 1;
-                    i = source.readUnsignedByte();
+                    i = buffer.readUnsignedByte();
                     last = null;
                     continue;
                 }
@@ -1249,67 +1319,90 @@ public class BinaryDictInputOutput {
             }
             if (0 == i && hasChildrenAddress(last.mChildrenAddress)) {
                 builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
-                source.seek(last.mChildrenAddress + headerSize);
+                buffer.position(last.mChildrenAddress + headerSize);
                 groupOffset = last.mChildrenAddress + 1;
-                i = source.readUnsignedByte();
+                i = buffer.readUnsignedByte();
                 last = null;
                 continue;
             }
         }
-        source.seek(originalPointer);
-        wordCache.put(address, result);
         return result;
     }
 
     /**
-     * Reads a single node from a binary file.
+     * Reads a single node from a buffer.
      *
-     * This methods reads the file at the current position of its file pointer. A node is
-     * fully expected to start at the current position.
+     * This methods reads the file at the current position. A node is fully expected to start at
+     * the current position.
      * This will recursively read other nodes into the structure, populating the reverse
      * maps on the fly and using them to keep track of already read nodes.
      *
-     * @param source the data file, correctly positioned at the start of a node.
+     * @param buffer the buffer, correctly positioned at the start of a node.
      * @param headerSize the size, in bytes, of the file header.
      * @param reverseNodeMap a mapping from addresses to already read nodes.
      * @param reverseGroupMap a mapping from addresses to already read character groups.
+     * @param options file format options.
      * @return the read node with all his children already read.
      */
-    private static Node readNode(RandomAccessFile source, long headerSize,
-            Map<Integer, Node> reverseNodeMap, Map<Integer, CharGroup> reverseGroupMap)
+    private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize,
+            final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap,
+            final FormatOptions options)
             throws IOException {
-        final int nodeOrigin = (int)(source.getFilePointer() - headerSize);
-        final int count = readCharGroupCount(source);
         final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
-        int groupOffset = nodeOrigin + getGroupCountSize(count);
-        for (int i = count; i > 0; --i) {
-            CharGroupInfo info = readCharGroup(source, groupOffset);
-            ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
-            ArrayList<WeightedString> bigrams = null;
-            if (null != info.mBigrams) {
-                bigrams = new ArrayList<WeightedString>();
-                for (PendingAttribute bigram : info.mBigrams) {
-                    final String word = getWordAtAddress(source, headerSize, bigram.mAddress);
-                    bigrams.add(new WeightedString(word, bigram.mFrequency));
+        final int nodeOrigin = buffer.position() - headerSize;
+
+        do { // Scan the linked-list node.
+            final int nodeHeadPosition = buffer.position() - headerSize;
+            final int count = readCharGroupCount(buffer);
+            int groupOffset = nodeHeadPosition + getGroupCountSize(count);
+            for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
+                CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
+                ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
+                ArrayList<WeightedString> bigrams = null;
+                if (null != info.mBigrams) {
+                    bigrams = new ArrayList<WeightedString>();
+                    for (PendingAttribute bigram : info.mBigrams) {
+                        final String word = getWordAtAddress(
+                                buffer, headerSize, bigram.mAddress, options);
+                        bigrams.add(new WeightedString(word, bigram.mFrequency));
+                    }
+                }
+                if (hasChildrenAddress(info.mChildrenAddress)) {
+                    Node children = reverseNodeMap.get(info.mChildrenAddress);
+                    if (null == children) {
+                        final int currentPosition = buffer.position();
+                        buffer.position(info.mChildrenAddress + headerSize);
+                        children = readNode(
+                                buffer, headerSize, reverseNodeMap, reverseGroupMap, options);
+                        buffer.position(currentPosition);
+                    }
+                    nodeContents.add(
+                            new CharGroup(info.mCharacters, shortcutTargets, bigrams,
+                                    info.mFrequency,
+                                    0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
+                                    0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
+                } else {
+                    nodeContents.add(
+                            new CharGroup(info.mCharacters, shortcutTargets, bigrams,
+                                    info.mFrequency,
+                                    0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
+                                    0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
                 }
+                groupOffset = info.mEndAddress;
             }
-            if (hasChildrenAddress(info.mChildrenAddress)) {
-                Node children = reverseNodeMap.get(info.mChildrenAddress);
-                if (null == children) {
-                    final long currentPosition = source.getFilePointer();
-                    source.seek(info.mChildrenAddress + headerSize);
-                    children = readNode(source, headerSize, reverseNodeMap, reverseGroupMap);
-                    source.seek(currentPosition);
+
+            // reach the end of the array.
+            if (options.mHasLinkedListNode) {
+                final int nextAddress = buffer.readUnsignedInt24();
+                if (nextAddress >= 0 && nextAddress < buffer.limit()) {
+                    buffer.position(nextAddress);
+                } else {
+                    break;
                 }
-                nodeContents.add(
-                        new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
-                                children));
-            } else {
-                nodeContents.add(
-                        new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency));
             }
-            groupOffset = info.mEndAddress;
-        }
+        } while (options.mHasLinkedListNode &&
+                buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
+
         final Node node = new Node(nodeContents);
         node.mCachedAddress = nodeOrigin;
         reverseNodeMap.put(node.mCachedAddress, node);
@@ -1318,65 +1411,117 @@ public class BinaryDictInputOutput {
 
     /**
      * Helper function to get the binary format version from the header.
+     * @throws IOException
      */
-    private static int getFormatVersion(final RandomAccessFile source) throws IOException {
-        final int magic_v1 = source.readUnsignedShort();
-        if (VERSION_1_MAGIC_NUMBER == magic_v1) return source.readUnsignedByte();
-        final int magic_v2 = (magic_v1 << 16) + source.readUnsignedShort();
-        if (VERSION_2_MAGIC_NUMBER == magic_v2) return source.readUnsignedShort();
-        return NOT_A_VERSION_NUMBER;
+    private static int getFormatVersion(final FusionDictionaryBufferInterface buffer)
+            throws IOException {
+        final int magic_v1 = buffer.readUnsignedShort();
+        if (FormatSpec.VERSION_1_MAGIC_NUMBER == magic_v1) return buffer.readUnsignedByte();
+        final int magic_v2 = (magic_v1 << 16) + buffer.readUnsignedShort();
+        if (FormatSpec.VERSION_2_MAGIC_NUMBER == magic_v2) return buffer.readUnsignedShort();
+        return FormatSpec.NOT_A_VERSION_NUMBER;
     }
 
     /**
-     * Reads a random access file and returns the memory representation of the dictionary.
-     *
-     * This high-level method takes a binary file and reads its contents, populating a
-     * FusionDictionary structure. The optional dict argument is an existing dictionary to
-     * which words from the file should be added. If it is null, a new dictionary is created.
-     *
-     * @param source the file to read.
-     * @param dict an optional dictionary to add words to, or null.
-     * @return the created (or merged) dictionary.
+     * Helper function to get and validate the binary format version.
+     * @throws UnsupportedFormatException
+     * @throws IOException
      */
-    public static FusionDictionary readDictionaryBinary(final RandomAccessFile source,
-            final FusionDictionary dict) throws IOException, UnsupportedFormatException {
-        // Check file version
-        final int version = getFormatVersion(source);
-        if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION ) {
+    private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
+            throws IOException, UnsupportedFormatException {
+        final int version = getFormatVersion(buffer);
+        if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
+                || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
             throw new UnsupportedFormatException("This file has version " + version
                     + ", but this implementation does not support versions above "
-                    + MAXIMUM_SUPPORTED_VERSION);
+                    + FormatSpec.MAXIMUM_SUPPORTED_VERSION);
         }
+        return version;
+    }
 
-        // Read options
-        final int optionsFlags = source.readUnsignedShort();
+    /**
+     * Reads a header from a buffer.
+     * @param buffer the buffer to read.
+     * @throws IOException
+     * @throws UnsupportedFormatException
+     */
+    public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
+            throws IOException, UnsupportedFormatException {
+        final int version = checkFormatVersion(buffer);
+        final int optionsFlags = buffer.readUnsignedShort();
 
-        final long headerSize;
-        final HashMap<String, String> options = new HashMap<String, String>();
-        if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
-            headerSize = source.getFilePointer();
+        final HashMap<String, String> attributes = new HashMap<String, String>();
+        final int headerSize;
+        if (version < FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) {
+            headerSize = buffer.position();
         } else {
-            headerSize = (source.readUnsignedByte() << 24) + (source.readUnsignedByte() << 16)
-                    + (source.readUnsignedByte() << 8) + source.readUnsignedByte();
-            while (source.getFilePointer() < headerSize) {
-                final String key = CharEncoding.readString(source);
-                final String value = CharEncoding.readString(source);
-                options.put(key, value);
-            }
-            source.seek(headerSize);
+            headerSize = buffer.readInt();
+            populateOptions(buffer, headerSize, attributes);
+            buffer.position(headerSize);
+        }
+
+        if (headerSize < 0) {
+            throw new UnsupportedFormatException("header size can't be negative.");
+        }
+
+        final FileHeader header = new FileHeader(headerSize,
+                new FusionDictionary.DictionaryOptions(attributes,
+                        0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
+                        0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
+                new FormatOptions(version,
+                        0 != (optionsFlags & FormatSpec.HAS_PARENT_ADDRESS),
+                        0 != (optionsFlags & FormatSpec.HAS_LINKEDLIST_NODE)));
+        return header;
+    }
+
+    /**
+     * Reads options from a buffer and populate a map with their contents.
+     *
+     * The buffer is read at the current position, so the caller must take care the pointer
+     * is in the right place before calling this.
+     */
+    public static void populateOptions(final FusionDictionaryBufferInterface buffer,
+            final int headerSize, final HashMap<String, String> options) {
+        while (buffer.position() < headerSize) {
+            final String key = CharEncoding.readString(buffer);
+            final String value = CharEncoding.readString(buffer);
+            options.put(key, value);
         }
+    }
+
+    /**
+     * Reads a buffer and returns the memory representation of the dictionary.
+     *
+     * This high-level method takes a buffer and reads its contents, populating a
+     * FusionDictionary structure. The optional dict argument is an existing dictionary to
+     * which words from the buffer should be added. If it is null, a new dictionary is created.
+     *
+     * @param buffer the buffer to read.
+     * @param dict an optional dictionary to add words to, or null.
+     * @return the created (or merged) dictionary.
+     */
+    public static FusionDictionary readDictionaryBinary(
+            final FusionDictionaryBufferInterface buffer, final FusionDictionary dict)
+                    throws IOException, UnsupportedFormatException {
+        // clear cache
+        wordCache.clear();
+
+        // Read header
+        final FileHeader header = readHeader(buffer);
 
         Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
         Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
-        final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping);
+        final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping,
+                reverseGroupMapping, header.mFormatOptions);
 
-        FusionDictionary newDict = new FusionDictionary(root,
-                new FusionDictionary.DictionaryOptions(options,
-                        0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
-                        0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
+        FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
         if (null != dict) {
             for (final Word w : dict) {
-                newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets);
+                if (w.mIsBlacklistEntry) {
+                    newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
+                } else {
+                    newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
+                }
             }
             for (final Word w : dict) {
                 // By construction a binary dictionary may not have bigrams pointing to
@@ -1400,14 +1545,45 @@ public class BinaryDictInputOutput {
      * @return true if it's a binary dictionary, false otherwise
      */
     public static boolean isBinaryDictionary(final String filename) {
+        FileInputStream inStream = null;
         try {
-            RandomAccessFile f = new RandomAccessFile(filename, "r");
-            final int version = getFormatVersion(f);
-            return (version >= MINIMUM_SUPPORTED_VERSION && version <= MAXIMUM_SUPPORTED_VERSION);
+            final File file = new File(filename);
+            inStream = new FileInputStream(file);
+            final ByteBuffer buffer = inStream.getChannel().map(
+                    FileChannel.MapMode.READ_ONLY, 0, file.length());
+            final int version = getFormatVersion(new ByteBufferWrapper(buffer));
+            return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
+                    && version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
         } catch (FileNotFoundException e) {
             return false;
         } catch (IOException e) {
             return false;
+        } finally {
+            if (inStream != null) {
+                try {
+                    inStream.close();
+                } catch (IOException e) {
+                    // do nothing
+                }
+            }
         }
     }
+
+    /**
+     * Calculate bigram frequency from compressed value
+     *
+     * @see #makeBigramFlags
+     *
+     * @param unigramFrequency
+     * @param bigramFrequency compressed frequency
+     * @return approximate bigram frequency
+     */
+    public static int reconstructBigramFrequency(final int unigramFrequency,
+            final int bigramFrequency) {
+        final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
+                / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
+        final float resultFreqFloat = (float)unigramFrequency
+                + stepSize * (bigramFrequency + 1.0f);
+        return (int)resultFreqFloat;
+    }
 }
diff --git a/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java b/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java
index ef7dbb251..ed9388409 100644
--- a/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java
+++ b/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java
@@ -31,18 +31,20 @@ public class CharGroupInfo {
     public final int[] mCharacters;
     public final int mFrequency;
     public final int mChildrenAddress;
+    public final int mParentAddress;
     public final ArrayList<WeightedString> mShortcutTargets;
     public final ArrayList<PendingAttribute> mBigrams;
 
     public CharGroupInfo(final int originalAddress, final int endAddress, final int flags,
-            final int[] characters, final int frequency, final int childrenAddress,
-            final ArrayList<WeightedString> shortcutTargets,
+            final int[] characters, final int frequency, final int parentAddress,
+            final int childrenAddress, final ArrayList<WeightedString> shortcutTargets,
             final ArrayList<PendingAttribute> bigrams) {
         mOriginalAddress = originalAddress;
         mEndAddress = endAddress;
         mFlags = flags;
         mCharacters = characters;
         mFrequency = frequency;
+        mParentAddress = parentAddress;
         mChildrenAddress = childrenAddress;
         mShortcutTargets = shortcutTargets;
         mBigrams = bigrams;
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
new file mode 100644
index 000000000..adc6037bb
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.Constants;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+
+/**
+ * Dictionary File Format Specification.
+ */
+public final class FormatSpec {
+
+    /*
+     * Array of Node(FusionDictionary.Node) layout is as follows:
+     *
+     * g |
+     * r | the number of groups, 1 or 2 bytes.
+     * o | 1 byte = bbbbbbbb match
+     * u |   case 1xxxxxxx => xxxxxxx << 8 + next byte
+     * p |   otherwise => bbbbbbbb
+     * c |
+     * ount
+     *
+     * g |
+     * r | sequence of groups,
+     * o | the layout of each group is described below.
+     * u |
+     * ps
+     *
+     * f |
+     * o | IF HAS_LINKEDLIST_NODE (defined in the file header)
+     * r |     forward link address, 3byte
+     * w | the address must be positive.
+     * a |
+     * rdlinkaddress
+     */
+
+    /* Node(CharGroup) layout is as follows:
+     *   | addressType                         xx     : mask with MASK_GROUP_ADDRESS_TYPE
+     *                                 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
+     * f |                                     01 = 1 byte      : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
+     * l |                                     10 = 2 bytes     : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
+     * a |                                     11 = 3 bytes     : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
+     * g | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
+     * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
+     *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
+     *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
+     *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
+     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
+     *
+     * p |
+     * a | IF HAS_PARENT_ADDRESS (defined in the file header)
+     * r |     parent address, 3byte
+     * e | the address must be negative, so the absolute value of the address is stored.
+     * n |
+     * taddress
+     *
+     * c | IF FLAG_HAS_MULTIPLE_CHARS
+     * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
+     * a |   end                       1 byte, = 0
+     * r | ELSE
+     * s |   char                      1 or 3 bytes
+     *   | END
+     *
+     * f |
+     * r | IF FLAG_IS_TERMINAL
+     * e |   frequency                 1 byte
+     * q |
+     *
+     * c | IF 00 = FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = addressType
+     * h |   // nothing
+     * i | ELSIF 01 = FLAG_GROUP_ADDRESS_TYPE_ONEBYTE == addressType
+     * l |   children address, 1 byte
+     * d | ELSIF 10 = FLAG_GROUP_ADDRESS_TYPE_TWOBYTES == addressType
+     * r |   children address, 2 bytes
+     * e | ELSE // 11 = FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = addressType
+     * n |   children address, 3 bytes
+     * A | END
+     * d
+     * dress
+     *
+     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
+     *   | shortcut string list
+     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS
+     *   | bigrams address list
+     *
+     * Char format is:
+     * 1 byte = bbbbbbbb match
+     * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
+     * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
+     *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
+     *       00011111 would be outside unicode.
+     * else: iso-latin-1 code
+     * This allows for the whole unicode range to be encoded, including chars outside of
+     * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
+     * characters which should never happen anyway (and still work, but take 3 bytes).
+     *
+     * bigram address list is:
+     * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no     : FLAG_ATTRIBUTE_HAS_NEXT
+     *           | addressSign = 1 bit,                 : FLAG_ATTRIBUTE_OFFSET_NEGATIVE
+     *           |                      1 = must take -address, 0 = must take +address
+     *           |                         xx : mask with MASK_ATTRIBUTE_ADDRESS_TYPE
+     *           | addressFormat = 2 bits, 00 = unused  : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE
+     *           |                         01 = 1 byte  : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE
+     *           |                         10 = 2 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES
+     *           |                         11 = 3 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES
+     *           | 4 bits : frequency         : mask with FLAG_ATTRIBUTE_FREQUENCY
+     * <address> | IF (01 == FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE == addressFormat)
+     *           |   read 1 byte, add top 4 bits
+     *           | ELSIF (10 == FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES == addressFormat)
+     *           |   read 2 bytes, add top 4 bits
+     *           | ELSE // 11 == FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES == addressFormat
+     *           |   read 3 bytes, add top 4 bits
+     *           | END
+     *           | if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE) then address = -address
+     * if (FLAG_ATTRIBUTE_HAS_NEXT) goto bigram_and_shortcut_address_list_is
+     *
+     * shortcut string list is:
+     * <byte size> = GROUP_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes.
+     * <flags>     = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT
+     *               | reserved = 3 bits, must be 0
+     *               | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY
+     * <shortcut>  = | string of characters at the char format described above, with the terminator
+     *               | used to signal the end of the string.
+     * if (FLAG_ATTRIBUTE_HAS_NEXT goto flags
+     */
+
+    static final int VERSION_1_MAGIC_NUMBER = 0x78B1;
+    public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+    static final int MINIMUM_SUPPORTED_VERSION = 1;
+    static final int MAXIMUM_SUPPORTED_VERSION = 3;
+    static final int NOT_A_VERSION_NUMBER = -1;
+    static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
+    static final int FIRST_VERSION_WITH_PARENT_ADDRESS = 3;
+    static final int FIRST_VERSION_WITH_LINKEDLIST_NODE = 3;
+
+    // These options need to be the same numeric values as the one in the native reading code.
+    static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+    // TODO: Make the native reading code read this variable.
+    static final int HAS_PARENT_ADDRESS = 0x2;
+    static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+    static final int CONTAINS_BIGRAMS_FLAG = 0x8;
+    // TODO: Make the native reading code read this variable.
+    static final int HAS_LINKEDLIST_NODE = 0x10;
+
+    // TODO: Make this value adaptative to content data, store it in the header, and
+    // use it in the reading code.
+    static final int MAX_WORD_LENGTH = Constants.Dictionary.MAX_WORD_LENGTH;
+
+    static final int PARENT_ADDRESS_SIZE = 3;
+    static final int FORWARD_LINK_ADDRESS_SIZE = 3;
+
+    static final int MASK_GROUP_ADDRESS_TYPE = 0xC0;
+    static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
+    static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
+    static final int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
+    static final int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+
+    static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
+
+    static final int FLAG_IS_TERMINAL = 0x10;
+    static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
+    static final int FLAG_HAS_BIGRAMS = 0x04;
+    static final int FLAG_IS_NOT_A_WORD = 0x02;
+    static final int FLAG_IS_BLACKLISTED = 0x01;
+
+    static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+    static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+    static final int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+    static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+    static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+    static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+    static final int FLAG_ATTRIBUTE_FREQUENCY = 0x0F;
+
+    static final int GROUP_CHARACTERS_TERMINATOR = 0x1F;
+
+    static final int GROUP_TERMINATOR_SIZE = 1;
+    static final int GROUP_FLAGS_SIZE = 1;
+    static final int GROUP_FREQUENCY_SIZE = 1;
+    static final int GROUP_MAX_ADDRESS_SIZE = 3;
+    static final int GROUP_ATTRIBUTE_FLAGS_SIZE = 1;
+    static final int GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
+    static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2;
+
+    static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
+    static final int NO_PARENT_ADDRESS = 0;
+    static final int NO_FORWARD_LINK_ADDRESS = 0;
+    static final int INVALID_CHARACTER = -1;
+
+    static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
+    static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
+
+    static final int MAX_TERMINAL_FREQUENCY = 255;
+    static final int MAX_BIGRAM_FREQUENCY = 15;
+
+    // This option needs to be the same numeric value as the one in binary_format.h.
+    static final int NOT_VALID_WORD = -99;
+
+    /**
+     * Options about file format.
+     */
+    public static class FormatOptions {
+        public final int mVersion;
+        public final boolean mHasParentAddress;
+        public final boolean mHasLinkedListNode;
+        public FormatOptions(final int version) {
+            this(version, false);
+        }
+        public FormatOptions(final int version, final boolean hasParentAddress) {
+            this(version, hasParentAddress, false);
+        }
+        public FormatOptions(final int version, final boolean hasParentAddress,
+                final boolean hasLinkedListNode) {
+            mVersion = version;
+            if (version < FIRST_VERSION_WITH_PARENT_ADDRESS && hasParentAddress) {
+                throw new RuntimeException("Parent addresses are only supported with versions "
+                        + FIRST_VERSION_WITH_PARENT_ADDRESS + " and ulterior.");
+            }
+            mHasParentAddress = hasParentAddress;
+
+            if (version < FIRST_VERSION_WITH_LINKEDLIST_NODE && hasLinkedListNode) {
+                throw new RuntimeException("Linked list nodes are only supported with versions "
+                        + FIRST_VERSION_WITH_LINKEDLIST_NODE + " and ulterior.");
+            }
+            if (!hasParentAddress && hasLinkedListNode) {
+                throw new RuntimeException("Linked list nodes need parent addresses.");
+            }
+            mHasLinkedListNode = hasLinkedListNode;
+        }
+    }
+
+    /**
+     * Class representing file header.
+     */
+    static final class FileHeader {
+        public final int mHeaderSize;
+        public final DictionaryOptions mDictionaryOptions;
+        public final FormatOptions mFormatOptions;
+        public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
+                final FormatOptions formatOptions) {
+            mHeaderSize = headerSize;
+            mDictionaryOptions = dictionaryOptions;
+            mFormatOptions = formatOptions;
+        }
+    }
+
+    private FormatSpec() {
+        // This utility class is not publicly instantiable.
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 8b53c9427..98cf308c8 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -16,6 +16,8 @@
 
 package com.android.inputmethod.latin.makedict;
 
+import com.android.inputmethod.latin.Constants;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -41,17 +43,15 @@ public class FusionDictionary implements Iterable<Word> {
     public static class Node {
         ArrayList<CharGroup> mData;
         // To help with binary generation
-        int mCachedSize;
-        int mCachedAddress;
+        int mCachedSize = Integer.MIN_VALUE;
+        int mCachedAddress = Integer.MIN_VALUE;
+        int mCachedParentAddress = 0;
+
         public Node() {
             mData = new ArrayList<CharGroup>();
-            mCachedSize = Integer.MIN_VALUE;
-            mCachedAddress = Integer.MIN_VALUE;
         }
         public Node(ArrayList<CharGroup> data) {
             mData = data;
-            mCachedSize = Integer.MIN_VALUE;
-            mCachedAddress = Integer.MIN_VALUE;
         }
     }
 
@@ -61,8 +61,8 @@ public class FusionDictionary implements Iterable<Word> {
      * This represents an "attribute", that is either a bigram or a shortcut.
      */
     public static class WeightedString {
-        final String mWord;
-        int mFrequency;
+        public final String mWord;
+        public int mFrequency;
         public WeightedString(String word, int frequency) {
             mWord = word;
             mFrequency = frequency;
@@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
         ArrayList<WeightedString> mBigrams;
         int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
         Node mChildren;
+        boolean mIsNotAWord; // Only a shortcut
+        boolean mIsBlacklistEntry;
         // The two following members to help with binary generation
         int mCachedSize;
         int mCachedAddress;
 
         public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
-                final ArrayList<WeightedString> bigrams, final int frequency) {
+                final ArrayList<WeightedString> bigrams, final int frequency,
+                final boolean isNotAWord, final boolean isBlacklistEntry) {
             mChars = chars;
             mFrequency = frequency;
             mShortcutTargets = shortcutTargets;
             mBigrams = bigrams;
             mChildren = null;
+            mIsNotAWord = isNotAWord;
+            mIsBlacklistEntry = isBlacklistEntry;
         }
 
         public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
-                final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
+                final ArrayList<WeightedString> bigrams, final int frequency,
+                final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
             mChars = chars;
             mFrequency = frequency;
             mShortcutTargets = shortcutTargets;
             mBigrams = bigrams;
             mChildren = children;
+            mIsNotAWord = isNotAWord;
+            mIsBlacklistEntry = isBlacklistEntry;
         }
 
         public void addChild(CharGroup n) {
@@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
          * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
          * updated if they are higher than the existing ones.
          */
-        public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
-                ArrayList<WeightedString> bigrams) {
+        public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
+                final ArrayList<WeightedString> bigrams,
+                final boolean isNotAWord, final boolean isBlacklistEntry) {
             if (frequency > mFrequency) {
                 mFrequency = frequency;
             }
@@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
                     }
                 }
             }
+            mIsNotAWord = isNotAWord;
+            mIsBlacklistEntry = isBlacklistEntry;
         }
     }
 
@@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
      * @param word the word to add.
      * @param frequency the frequency of the word, in the range [0..255].
      * @param shortcutTargets a list of shortcut targets for this word, or null.
+     * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
      */
     public void add(final String word, final int frequency,
-            final ArrayList<WeightedString> shortcutTargets) {
-        add(getCodePoints(word), frequency, shortcutTargets);
+            final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+        add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
+                false /* isBlacklistEntry */);
+    }
+
+    /**
+     * Helper method to add a blacklist entry as a string.
+     *
+     * @param word the word to add as a blacklist entry.
+     * @param shortcutTargets a list of shortcut targets for this word, or null.
+     * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+     */
+    public void addBlacklistEntry(final String word,
+            final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+        add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
     }
 
     /**
@@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
         if (charGroup != null) {
             final CharGroup charGroup2 = findWordInTree(mRoot, word2);
             if (charGroup2 == null) {
-                add(getCodePoints(word2), 0, null);
+                add(getCodePoints(word2), 0, null, false /* isNotAWord */,
+                        false /* isBlacklistEntry */);
             }
             charGroup.addBigram(word2, frequency);
         } else {
@@ -349,10 +375,18 @@ public class FusionDictionary implements Iterable<Word> {
      * @param word the word, as an int array.
      * @param frequency the frequency of the word, in the range [0..255].
      * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
+     * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+     * @param isBlacklistEntry true if this is a blacklisted word, false otherwise
      */
     private void add(final int[] word, final int frequency,
-            final ArrayList<WeightedString> shortcutTargets) {
+            final ArrayList<WeightedString> shortcutTargets,
+            final boolean isNotAWord, final boolean isBlacklistEntry) {
         assert(frequency >= 0 && frequency <= 255);
+        if (word.length >= Constants.Dictionary.MAX_WORD_LENGTH) {
+            MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length);
+            return;
+        }
+
         Node currentNode = mRoot;
         int charIndex = 0;
 
@@ -376,7 +410,7 @@ public class FusionDictionary implements Iterable<Word> {
             final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
             final CharGroup newGroup = new CharGroup(
                     Arrays.copyOfRange(word, charIndex, word.length),
-                    shortcutTargets, null /* bigrams */, frequency);
+                    shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
             currentNode.mData.add(insertionIndex, newGroup);
             if (DBG) checkStack(currentNode);
         } else {
@@ -386,13 +420,15 @@ public class FusionDictionary implements Iterable<Word> {
                     // The new word is a prefix of an existing word, but the node on which it
                     // should end already exists as is. Since the old CharNode was not a terminal, 
                     // make it one by filling in its frequency and other attributes
-                    currentGroup.update(frequency, shortcutTargets, null);
+                    currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
+                            isBlacklistEntry);
                 } else {
                     // The new word matches the full old word and extends past it.
                     // We only have to create a new node and add it to the end of this.
                     final CharGroup newNode = new CharGroup(
                             Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
-                                    shortcutTargets, null /* bigrams */, frequency);
+                                    shortcutTargets, null /* bigrams */, frequency, isNotAWord,
+                                    isBlacklistEntry);
                     currentGroup.mChildren = new Node();
                     currentGroup.mChildren.mData.add(newNode);
                 }
@@ -400,7 +436,9 @@ public class FusionDictionary implements Iterable<Word> {
                 if (0 == differentCharIndex) {
                     // Exact same word. Update the frequency if higher. This will also add the
                     // new shortcuts to the existing shortcut list if it already exists.
-                    currentGroup.update(frequency, shortcutTargets, null);
+                    currentGroup.update(frequency, shortcutTargets, null,
+                            currentGroup.mIsNotAWord && isNotAWord,
+                            currentGroup.mIsBlacklistEntry || isBlacklistEntry);
                 } else {
                     // Partial prefix match only. We have to replace the current node with a node
                     // containing the current prefix and create two new ones for the tails.
@@ -408,21 +446,26 @@ public class FusionDictionary implements Iterable<Word> {
                     final CharGroup newOldWord = new CharGroup(
                             Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
                                     currentGroup.mChars.length), currentGroup.mShortcutTargets,
-                            currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
+                            currentGroup.mBigrams, currentGroup.mFrequency,
+                            currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
+                            currentGroup.mChildren);
                     newChildren.mData.add(newOldWord);
 
                     final CharGroup newParent;
                     if (charIndex + differentCharIndex >= word.length) {
                         newParent = new CharGroup(
                                 Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                shortcutTargets, null /* bigrams */, frequency, newChildren);
+                                shortcutTargets, null /* bigrams */, frequency,
+                                isNotAWord, isBlacklistEntry, newChildren);
                     } else {
                         newParent = new CharGroup(
                                 Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
+                                null /* shortcutTargets */, null /* bigrams */, -1, 
+                                false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
                         final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
                                 charIndex + differentCharIndex, word.length),
-                                shortcutTargets, null /* bigrams */, frequency);
+                                shortcutTargets, null /* bigrams */, frequency,
+                                isNotAWord, isBlacklistEntry);
                         final int addIndex = word[charIndex + differentCharIndex]
                                 > currentGroup.mChars[differentCharIndex] ? 1 : 0;
                         newChildren.mData.add(addIndex, newWord);
@@ -483,7 +526,8 @@ public class FusionDictionary implements Iterable<Word> {
     private static int findInsertionIndex(final Node node, int character) {
         final ArrayList<CharGroup> data = node.mData;
         final CharGroup reference = new CharGroup(new int[] { character },
-                null /* shortcutTargets */, null /* bigrams */, 0);
+                null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
+                false /* isBlacklistEntry */);
         int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
         return result >= 0 ? result : -result - 1;
     }
@@ -512,17 +556,28 @@ public class FusionDictionary implements Iterable<Word> {
         final StringBuilder checker = DBG ? new StringBuilder() : null;
 
         CharGroup currentGroup;
+        final int codePointCountInS = s.codePointCount(0, s.length());
         do {
             int indexOfGroup = findIndexOfChar(node, s.codePointAt(index));
             if (CHARACTER_NOT_FOUND == indexOfGroup) return null;
             currentGroup = node.mData.get(indexOfGroup);
+
+            if (s.length() - index < currentGroup.mChars.length) return null;
+            int newIndex = index;
+            while (newIndex < s.length() && newIndex - index < currentGroup.mChars.length) {
+                if (currentGroup.mChars[newIndex - index] != s.codePointAt(newIndex)) return null;
+                newIndex++;
+            }
+            index = newIndex;
+
             if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
-            index += currentGroup.mChars.length;
-            if (index < s.length()) {
+            if (index < codePointCountInS) {
                 node = currentGroup.mChildren;
             }
-        } while (null != node && index < s.length());
+        } while (null != node && index < codePointCountInS);
 
+        if (index < codePointCountInS) return null;
+        if (!currentGroup.isTerminal()) return null;
         if (DBG && !s.equals(checker.toString())) return null;
         return currentGroup;
     }
@@ -679,7 +734,7 @@ public class FusionDictionary implements Iterable<Word> {
 //      StringBuilder s = new StringBuilder();
 //      for (CharGroup g : node.data) {
 //          s.append(g.frequency);
-//          for (int ch : g.chars){
+//          for (int ch : g.chars) {
 //              s.append(Character.toChars(ch));
 //          }
 //      }
@@ -738,13 +793,14 @@ public class FusionDictionary implements Iterable<Word> {
                     }
                     if (currentGroup.mFrequency >= 0)
                         return new Word(mCurrentString.toString(), currentGroup.mFrequency,
-                                currentGroup.mShortcutTargets, currentGroup.mBigrams);
+                                currentGroup.mShortcutTargets, currentGroup.mBigrams,
+                                currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
                 } else {
                     mPositions.removeLast();
                     currentPos = mPositions.getLast();
                     mCurrentString.setLength(mCurrentString.length() - mPositions.getLast().length);
                 }
-            } while(true);
+            } while (true);
         }
 
         @Override
diff --git a/java/src/com/android/inputmethod/latin/makedict/Word.java b/java/src/com/android/inputmethod/latin/makedict/Word.java
index d07826757..4683ef154 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Word.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Word.java
@@ -27,20 +27,25 @@ import java.util.Arrays;
  * This is chiefly used to iterate a dictionary.
  */
 public class Word implements Comparable<Word> {
-    final String mWord;
-    final int mFrequency;
-    final ArrayList<WeightedString> mShortcutTargets;
-    final ArrayList<WeightedString> mBigrams;
+    public final String mWord;
+    public final int mFrequency;
+    public final ArrayList<WeightedString> mShortcutTargets;
+    public final ArrayList<WeightedString> mBigrams;
+    public final boolean mIsNotAWord;
+    public final boolean mIsBlacklistEntry;
 
     private int mHashCode = 0;
 
     public Word(final String word, final int frequency,
             final ArrayList<WeightedString> shortcutTargets,
-            final ArrayList<WeightedString> bigrams) {
+            final ArrayList<WeightedString> bigrams,
+            final boolean isNotAWord, final boolean isBlacklistEntry) {
         mWord = word;
         mFrequency = frequency;
         mShortcutTargets = shortcutTargets;
         mBigrams = bigrams;
+        mIsNotAWord = isNotAWord;
+        mIsBlacklistEntry = isBlacklistEntry;
     }
 
     private static int computeHashCode(Word word) {
@@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
                 word.mWord,
                 word.mFrequency,
                 word.mShortcutTargets.hashCode(),
-                word.mBigrams.hashCode()
+                word.mBigrams.hashCode(),
+                word.mIsNotAWord,
+                word.mIsBlacklistEntry
         });
     }
 
@@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
         Word w = (Word)o;
         return mFrequency == w.mFrequency && mWord.equals(w.mWord)
                 && mShortcutTargets.equals(w.mShortcutTargets)
-                && mBigrams.equals(w.mBigrams);
+                && mBigrams.equals(w.mBigrams)
+                && mIsNotAWord == w.mIsNotAWord
+                && mIsBlacklistEntry == w.mIsBlacklistEntry;
     }
 
     @Override