aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
authorTom Ouyang <ouyang@google.com>2012-04-18 14:15:34 -0700
committerTom Ouyang <ouyang@google.com>2012-04-19 10:18:57 -0700
commitdf7ebbbd616fa5aff569d00b16cd3f85ddf2da6d (patch)
tree6070f4a3fa45f33331033ba8d4af3124183b7b52 /java/src
parentf5a0bd2c28fd9ec1de8fe49196e7f7ae38566f90 (diff)
downloadlatinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.tar.gz
latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.tar.xz
latinime-df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d.zip
Change binary dictionary output buffer size to match dictionary size.
Bug: 6355943 Change-Id: Iaab7bc16ba0dbc7bfde70b06e7bd355519838831
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java97
1 files changed, 65 insertions, 32 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index d22332116..97df98e34 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
@@ -272,6 +273,29 @@ public class BinaryDictInputOutput {
}
/**
+ * Writes a string with our character format to a ByteArrayOutputStream.
+ *
+ * This will also write the terminator byte.
+ *
+ * @param buffer the ByteArrayOutputStream to write to.
+ * @param word the string to write.
+ */
+ private static void writeString(ByteArrayOutputStream buffer, final String word) {
+ final int length = word.length();
+ for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
+ final int codePoint = word.codePointAt(i);
+ if (1 == getCharSize(codePoint)) {
+ buffer.write((byte) codePoint);
+ } else {
+ buffer.write((byte) (0xFF & (codePoint >> 16)));
+ buffer.write((byte) (0xFF & (codePoint >> 8)));
+ buffer.write((byte) (0xFF & codePoint));
+ }
+ }
+ buffer.write(GROUP_CHARACTERS_TERMINATOR);
+ }
+
+ /**
* Reads a string from a RandomAccessFile. This is the converse of the above method.
*/
private static String readString(final RandomAccessFile source) throws IOException {
@@ -894,15 +918,11 @@ public class BinaryDictInputOutput {
final FusionDictionary dict, final int version)
throws IOException, UnsupportedFormatException {
- // Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
- // can be relative to each node, the structure itself is not limited to 16MB at all, but
- // I doubt this will ever be shot. If it is, deciding the order of the nodes becomes
- // a quite complicated problem, because though the dictionary itself does not have a
- // size limit, each node must still be within 16MB of all its children and parents.
- // As long as this is ensured, the dictionary file may grow to any size.
- // Anyway, to make a dictionary bigger than 16MB just increase the size of this buffer.
- final byte[] buffer = new byte[1 << 24];
- int index = 0;
+ // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
+ // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
+ // of the nodes becomes a quite complicated problem, because though the dictionary itself
+ // does not have a size limit, each node must still be within 16MB of all its children and
+ // parents. As long as this is ensured, the dictionary file may grow to any size.
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Requested file format version " + version
@@ -910,47 +930,54 @@ public class BinaryDictInputOutput {
+ MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
}
+ ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
+
// The magic number in big-endian order.
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
// Magic number for version 2+.
- buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24));
- buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16));
- buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8));
- buffer[index++] = (byte) (0xFF & VERSION_2_MAGIC_NUMBER);
+ headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)));
+ headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)));
+ headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)));
+ headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER));
// Dictionary version.
- buffer[index++] = (byte) (0xFF & (version >> 8));
- buffer[index++] = (byte) (0xFF & version);
+ headerBuffer.write((byte) (0xFF & (version >> 8)));
+ headerBuffer.write((byte) (0xFF & version));
} else {
// Magic number for version 1.
- buffer[index++] = (byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8));
- buffer[index++] = (byte) (0xFF & VERSION_1_MAGIC_NUMBER);
+ headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)));
+ headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER));
// Dictionary version.
- buffer[index++] = (byte) (0xFF & version);
+ headerBuffer.write((byte) (0xFF & version));
}
// Options flags
final int options = makeOptionsValue(dict.mOptions);
- buffer[index++] = (byte) (0xFF & (options >> 8));
- buffer[index++] = (byte) (0xFF & options);
+ headerBuffer.write((byte) (0xFF & (options >> 8)));
+ headerBuffer.write((byte) (0xFF & options));
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
- final int headerSizeOffset = index;
- index += 4; // Size of the header size
-
+ final int headerSizeOffset = headerBuffer.size();
+ // Placeholder to be written later with header size.
+ for (int i = 0; i < 4; ++i) {
+ headerBuffer.write(0);
+ }
// Write out the options.
for (final String key : dict.mOptions.mAttributes.keySet()) {
final String value = dict.mOptions.mAttributes.get(key);
- index += CharEncoding.writeString(buffer, index, key);
- index += CharEncoding.writeString(buffer, index, value);
+ CharEncoding.writeString(headerBuffer, key);
+ CharEncoding.writeString(headerBuffer, value);
}
-
+ final int size = headerBuffer.size();
+ final byte[] bytes = headerBuffer.toByteArray();
// Write out the header size.
- buffer[headerSizeOffset] = (byte) (0xFF & (index >> 24));
- buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 16));
- buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 8));
- buffer[headerSizeOffset + 3] = (byte) (0xFF & (index >> 0));
+ bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
+ bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
+ bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
+ bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
+ destination.write(bytes);
+ } else {
+ headerBuffer.writeTo(destination);
}
- destination.write(buffer, 0, index);
- index = 0;
+ headerBuffer.close();
// Leave the choice of the optimal node order to the flattenTree function.
MakedictLog.i("Flattening the tree...");
@@ -961,6 +988,12 @@ public class BinaryDictInputOutput {
MakedictLog.i("Checking array...");
checkFlatNodeArray(flatNodes);
+ // Create a buffer that matches the final dictionary size.
+ final Node lastNode = flatNodes.get(flatNodes.size() - 1);
+ final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize);
+ final byte[] buffer = new byte[bufferSize];
+ int index = 0;
+
MakedictLog.i("Writing file...");
int dataEndOffset = 0;
for (Node n : flatNodes) {