aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java10
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java104
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java109
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java44
4 files changed, 202 insertions, 65 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 167c6915c..c7b063daf 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -145,21 +145,21 @@ public final class BinaryDictIOUtils {
* Reads unigrams and bigrams from the binary file.
* Doesn't make the memory representation of the dictionary.
*
- * @param buffer the buffer to read.
+ * @param reader the reader.
* @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException
* @throws UnsupportedFormatException
*/
- public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer,
+ public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
UnsupportedFormatException {
// Read header
- final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
- readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams,
- header.mFormatOptions);
+ final FileHeader header = BinaryDictInputOutput.readHeader(reader.getBuffer());
+ readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words,
+ frequencies, bigrams, header.mFormatOptions);
}
/**
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 1b187d85d..504349a0b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -66,6 +66,7 @@ public final class BinaryDictInputOutput {
public void position(int newPosition);
public void put(final byte b);
public int limit();
+ @UsedForTesting
public int capacity();
}
@@ -1210,49 +1211,38 @@ public final class BinaryDictInputOutput {
ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
// The magic number in big-endian order.
- if (version >= FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) {
- // Magic number for version 2+.
- headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 24)));
- headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 16)));
- headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 8)));
- headerBuffer.write((byte) (0xFF & FormatSpec.VERSION_2_MAGIC_NUMBER));
- // Dictionary version.
- headerBuffer.write((byte) (0xFF & (version >> 8)));
- headerBuffer.write((byte) (0xFF & version));
- } else {
- // Magic number for version 1.
- headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_1_MAGIC_NUMBER >> 8)));
- headerBuffer.write((byte) (0xFF & FormatSpec.VERSION_1_MAGIC_NUMBER));
- // Dictionary version.
- headerBuffer.write((byte) (0xFF & version));
- }
+ // Magic number for all versions.
+ headerBuffer.write((byte) (0xFF & (FormatSpec.MAGIC_NUMBER >> 24)));
+ headerBuffer.write((byte) (0xFF & (FormatSpec.MAGIC_NUMBER >> 16)));
+ headerBuffer.write((byte) (0xFF & (FormatSpec.MAGIC_NUMBER >> 8)));
+ headerBuffer.write((byte) (0xFF & FormatSpec.MAGIC_NUMBER));
+ // Dictionary version.
+ headerBuffer.write((byte) (0xFF & (version >> 8)));
+ headerBuffer.write((byte) (0xFF & version));
+
// Options flags
final int options = makeOptionsValue(dict, formatOptions);
headerBuffer.write((byte) (0xFF & (options >> 8)));
headerBuffer.write((byte) (0xFF & options));
- if (version >= FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) {
- final int headerSizeOffset = headerBuffer.size();
- // Placeholder to be written later with header size.
- for (int i = 0; i < 4; ++i) {
- headerBuffer.write(0);
- }
- // Write out the options.
- for (final String key : dict.mOptions.mAttributes.keySet()) {
- final String value = dict.mOptions.mAttributes.get(key);
- CharEncoding.writeString(headerBuffer, key);
- CharEncoding.writeString(headerBuffer, value);
- }
- final int size = headerBuffer.size();
- final byte[] bytes = headerBuffer.toByteArray();
- // Write out the header size.
- bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
- bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
- bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
- bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
- destination.write(bytes);
- } else {
- headerBuffer.writeTo(destination);
- }
+ final int headerSizeOffset = headerBuffer.size();
+ // Placeholder to be written later with header size.
+ for (int i = 0; i < 4; ++i) {
+ headerBuffer.write(0);
+ }
+ // Write out the options.
+ for (final String key : dict.mOptions.mAttributes.keySet()) {
+ final String value = dict.mOptions.mAttributes.get(key);
+ CharEncoding.writeString(headerBuffer, key);
+ CharEncoding.writeString(headerBuffer, value);
+ }
+ final int size = headerBuffer.size();
+ final byte[] bytes = headerBuffer.toByteArray();
+ // Write out the header size.
+ bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
+ bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
+ bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
+ bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
+ destination.write(bytes);
headerBuffer.close();
@@ -1658,10 +1648,8 @@ public final class BinaryDictInputOutput {
*/
private static int getFormatVersion(final FusionDictionaryBufferInterface buffer)
throws IOException {
- final int magic_v1 = buffer.readUnsignedShort();
- if (FormatSpec.VERSION_1_MAGIC_NUMBER == magic_v1) return buffer.readUnsignedByte();
- final int magic_v2 = (magic_v1 << 16) + buffer.readUnsignedShort();
- if (FormatSpec.VERSION_2_MAGIC_NUMBER == magic_v2) return buffer.readUnsignedShort();
+ final int magic = buffer.readInt();
+ if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort();
return FormatSpec.NOT_A_VERSION_NUMBER;
}
@@ -1695,18 +1683,15 @@ public final class BinaryDictInputOutput {
final HashMap<String, String> attributes = new HashMap<String, String>();
final int headerSize;
- if (version < FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) {
- headerSize = buffer.position();
- } else {
- headerSize = buffer.readInt();
- populateOptions(buffer, headerSize, attributes);
- buffer.position(headerSize);
- }
+ headerSize = buffer.readInt();
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
+ populateOptions(buffer, headerSize, attributes);
+ buffer.position(headerSize);
+
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
@@ -1738,23 +1723,30 @@ public final class BinaryDictInputOutput {
* FusionDictionary structure. The optional dict argument is an existing dictionary to
* which words from the buffer should be added. If it is null, a new dictionary is created.
*
- * @param buffer the buffer to read.
+ * @param reader the reader.
* @param dict an optional dictionary to add words to, or null.
* @return the created (or merged) dictionary.
*/
@UsedForTesting
- public static FusionDictionary readDictionaryBinary(
- final FusionDictionaryBufferInterface buffer, final FusionDictionary dict)
- throws IOException, UnsupportedFormatException {
+ public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader,
+ final FusionDictionary dict) throws FileNotFoundException, IOException,
+ UnsupportedFormatException {
// clear cache
wordCache.clear();
+ // if the buffer has not been opened, open the buffer with bytebuffer.
+ if (reader.getBuffer() == null) reader.openBuffer(
+ new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
+ if (reader.getBuffer() == null) {
+ MakedictLog.e("Cannot open the buffer");
+ }
+
// Read header
- final FileHeader header = readHeader(buffer);
+ final FileHeader header = readHeader(reader.getBuffer());
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
- final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping,
+ final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping,
reverseGroupMapping, header.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java
new file mode 100644
index 000000000..57a583228
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
+import com.android.inputmethod.latin.utils.ByteArrayWrapper;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+public class BinaryDictReader {
+
+ public interface FusionDictionaryBufferFactory {
+ public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
+ throws FileNotFoundException, IOException;
+ }
+
+ /**
+ * Creates FusionDictionaryBuffer from a ByteBuffer
+ */
+ public static final class FusionDictionaryBufferFromByteBufferFactory
+ implements FusionDictionaryBufferFactory {
+ @Override
+ public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
+ throws FileNotFoundException, IOException {
+ FileInputStream inStream = null;
+ ByteBuffer buffer = null;
+ try {
+ inStream = new FileInputStream(file);
+ buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
+ 0, file.length());
+ } finally {
+ if (inStream != null) {
+ inStream.close();
+ }
+ }
+ if (buffer != null) {
+ return new BinaryDictInputOutput.ByteBufferWrapper(buffer);
+ }
+ return null;
+ }
+ }
+
+ /**
+ * Creates FusionDictionaryBuffer from a byte array
+ */
+ public static final class FusionDictionaryBufferFromByteArrayFactory
+ implements FusionDictionaryBufferFactory {
+ @Override
+ public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
+ throws FileNotFoundException, IOException {
+ FileInputStream inStream = null;
+ try {
+ inStream = new FileInputStream(file);
+ final byte[] array = new byte[(int) file.length()];
+ inStream.read(array);
+ return new ByteArrayWrapper(array);
+ } finally {
+ if (inStream != null) {
+ inStream.close();
+ }
+ }
+ }
+ }
+
+ private final File mDictionaryBinaryFile;
+ private FusionDictionaryBufferInterface mFusionDictionaryBuffer;
+
+ public BinaryDictReader(final File file) {
+ mDictionaryBinaryFile = file;
+ mFusionDictionaryBuffer = null;
+ }
+
+ public void openBuffer(final FusionDictionaryBufferFactory factory)
+ throws FileNotFoundException, IOException {
+ mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile);
+ }
+
+ public FusionDictionaryBufferInterface getBuffer() {
+ return mFusionDictionaryBuffer;
+ }
+
+ @UsedForTesting
+ public FusionDictionaryBufferInterface openAndGetBuffer(
+ final FusionDictionaryBufferFactory factory)
+ throws FileNotFoundException, IOException {
+ openBuffer(factory);
+ return getBuffer();
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index feadcda76..2bb5d8b6e 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -16,6 +16,7 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
@@ -25,6 +26,40 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
public final class FormatSpec {
/*
+ * File header layout is as follows:
+ *
+ * v |
+ * e | MAGIC_NUMBER + version of the file format, 2 bytes.
+ * r |
+ * sion
+ *
+ * o |
+ * p | not used 4 bits
+ * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG
+ * i | FRENCH_LIGATURE_PROCESSING_FLAG
+ * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE
+ * n | GERMAN_UMLAUT_PROCESSING_FLAG
+ * f |
+ * lags
+ *
+ * h |
+ * e | size of the file header, 4bytes
+ * a | including the size of the magic number, the option flags and the header size
+ * d |
+ * ersize
+ *
+ * | attributes list
+ *
+ * attributes list is:
+ * <key> = | string of characters at the char format described below, with the terminator used
+ * | to signal the end of the string.
+ * <value> = | string of characters at the char format described below, with the terminator used
+ * | to signal the end of the string.
+ * if the size of already read < headersize, goto key.
+ *
+ */
+
+ /*
* Array of Node(FusionDictionary.Node) layout is as follows:
*
* g |
@@ -150,12 +185,10 @@ public final class FormatSpec {
* if (FLAG_ATTRIBUTE_HAS_NEXT goto flags
*/
- static final int VERSION_1_MAGIC_NUMBER = 0x78B1;
- public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
- static final int MINIMUM_SUPPORTED_VERSION = 1;
+ public static final int MAGIC_NUMBER = 0x9BC13AFE;
+ static final int MINIMUM_SUPPORTED_VERSION = 2;
static final int MAXIMUM_SUPPORTED_VERSION = 3;
static final int NOT_A_VERSION_NUMBER = -1;
- static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3;
// These options need to be the same numeric values as the one in the native reading code.
@@ -236,9 +269,12 @@ public final class FormatSpec {
public static final class FormatOptions {
public final int mVersion;
public final boolean mSupportsDynamicUpdate;
+ @UsedForTesting
public FormatOptions(final int version) {
this(version, false);
}
+
+ @UsedForTesting
public FormatOptions(final int version, final boolean supportsDynamicUpdate) {
mVersion = version;
if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) {