Change binary dictionary output buffer size to match dictionary size.

Bug: 6355943 Change-Id: Iaab7bc16ba0dbc7bfde70b06e7bd355519838831
author: Tom Ouyang <ouyang@google.com> 2012-04-18 14:15:34 -0700
committer: Tom Ouyang <ouyang@google.com> 2012-04-19 10:18:57 -0700
commit: df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d (patch)
tree: 6070f4a3fa45f33331033ba8d4af3124183b7b52 /java/src
parent: f5a0bd2c28fd9ec1de8fe49196e7f7ae38566f90 (diff)
download: latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.tar.gz
latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.tar.xz
latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.zip
1 files changed, 65 insertions, 32 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index d22332116..97df98e34 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
 
+import java.io.ByteArrayOutputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -272,6 +273,29 @@ public class BinaryDictInputOutput {
         }
 
         /**
+         * Writes a string with our character format to a ByteArrayOutputStream.
+         *
+         * This will also write the terminator byte.
+         *
+         * @param buffer the ByteArrayOutputStream to write to.
+         * @param word the string to write.
+         */
+        private static void writeString(ByteArrayOutputStream buffer, final String word) {
+            final int length = word.length();
+            for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
+                final int codePoint = word.codePointAt(i);
+                if (1 == getCharSize(codePoint)) {
+                    buffer.write((byte) codePoint);
+                } else {
+                    buffer.write((byte) (0xFF & (codePoint >> 16)));
+                    buffer.write((byte) (0xFF & (codePoint >> 8)));
+                    buffer.write((byte) (0xFF & codePoint));
+                }
+            }
+            buffer.write(GROUP_CHARACTERS_TERMINATOR);
+        }
+
+        /**
          * Reads a string from a RandomAccessFile. This is the converse of the above method.
          */
         private static String readString(final RandomAccessFile source) throws IOException {
@@ -894,15 +918,11 @@ public class BinaryDictInputOutput {
             final FusionDictionary dict, final int version)
             throws IOException, UnsupportedFormatException {
 
-        // Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
-        // can be relative to each node, the structure itself is not limited to 16MB at all, but
-        // I doubt this will ever be shot. If it is, deciding the order of the nodes becomes
-        // a quite complicated problem, because though the dictionary itself does not have a
-        // size limit, each node must still be within 16MB of all its children and parents.
-        // As long as this is ensured, the dictionary file may grow to any size.
-        // Anyway, to make a dictionary bigger than 16MB just increase the size of this buffer.
-        final byte[] buffer = new byte[1 << 24];
-        int index = 0;
+        // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
+        // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
+        // of the nodes becomes a quite complicated problem, because though the dictionary itself
+        // does not have a size limit, each node must still be within 16MB of all its children and
+        // parents. As long as this is ensured, the dictionary file may grow to any size.
 
         if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
             throw new UnsupportedFormatException("Requested file format version " + version
@@ -910,47 +930,54 @@ public class BinaryDictInputOutput {
                     + MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
         }
 
+        ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
+
         // The magic number in big-endian order.
         if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
             // Magic number for version 2+.
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24));
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16));
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8));
-            buffer[index++] = (byte) (0xFF & VERSION_2_MAGIC_NUMBER);
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)));
+            headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER));
             // Dictionary version.
-            buffer[index++] = (byte) (0xFF & (version >> 8));
-            buffer[index++] = (byte) (0xFF & version);
+            headerBuffer.write((byte) (0xFF & (version >> 8)));
+            headerBuffer.write((byte) (0xFF & version));
         } else {
             // Magic number for version 1.
-            buffer[index++] = (byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8));
-            buffer[index++] = (byte) (0xFF & VERSION_1_MAGIC_NUMBER);
+            headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)));
+            headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER));
             // Dictionary version.
-            buffer[index++] = (byte) (0xFF & version);
+            headerBuffer.write((byte) (0xFF & version));
         }
         // Options flags
         final int options = makeOptionsValue(dict.mOptions);
-        buffer[index++] = (byte) (0xFF & (options >> 8));
-        buffer[index++] = (byte) (0xFF & options);
+        headerBuffer.write((byte) (0xFF & (options >> 8)));
+        headerBuffer.write((byte) (0xFF & options));
         if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
-            final int headerSizeOffset = index;
-            index += 4; // Size of the header size
-
+            final int headerSizeOffset = headerBuffer.size();
+            // Placeholder to be written later with header size.
+            for (int i = 0; i < 4; ++i) {
+                headerBuffer.write(0);
+            }
             // Write out the options.
             for (final String key : dict.mOptions.mAttributes.keySet()) {
                 final String value = dict.mOptions.mAttributes.get(key);
-                index += CharEncoding.writeString(buffer, index, key);
-                index += CharEncoding.writeString(buffer, index, value);
+                CharEncoding.writeString(headerBuffer, key);
+                CharEncoding.writeString(headerBuffer, value);
             }
-
+            final int size = headerBuffer.size();
+            final byte[] bytes = headerBuffer.toByteArray();
             // Write out the header size.
-            buffer[headerSizeOffset] = (byte) (0xFF & (index >> 24));
-            buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 16));
-            buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 8));
-            buffer[headerSizeOffset + 3] = (byte) (0xFF & (index >> 0));
+            bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
+            bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
+            bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
+            bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
+            destination.write(bytes);
+        } else {
+            headerBuffer.writeTo(destination);
         }
 
-        destination.write(buffer, 0, index);
-        index = 0;
+        headerBuffer.close();
 
         // Leave the choice of the optimal node order to the flattenTree function.
         MakedictLog.i("Flattening the tree...");
@@ -961,6 +988,12 @@ public class BinaryDictInputOutput {
         MakedictLog.i("Checking array...");
         checkFlatNodeArray(flatNodes);
 
+        // Create a buffer that matches the final dictionary size.
+        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
+        final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize);
+        final byte[] buffer = new byte[bufferSize];
+        int index = 0;
+
         MakedictLog.i("Writing file...");
         int dataEndOffset = 0;
         for (Node n : flatNodes) {
author	Tom Ouyang <ouyang@google.com>	2012-04-18 14:15:34 -0700
committer	Tom Ouyang <ouyang@google.com>	2012-04-19 10:18:57 -0700
commit	df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d (patch)
tree	6070f4a3fa45f33331033ba8d4af3124183b7b52 /java/src
parent	f5a0bd2c28fd9ec1de8fe49196e7f7ae38566f90 (diff)
download	latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.tar.gz latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.tar.xz latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.zip