diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict')
4 files changed, 202 insertions, 65 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 167c6915c..c7b063daf 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -145,21 +145,21 @@ public final class BinaryDictIOUtils { * Reads unigrams and bigrams from the binary file. * Doesn't make the memory representation of the dictionary. * - * @param buffer the buffer to read. + * @param reader the reader. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException * @throws UnsupportedFormatException */ - public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer, + public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header - final FileHeader header = BinaryDictInputOutput.readHeader(buffer); - readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams, - header.mFormatOptions); + final FileHeader header = BinaryDictInputOutput.readHeader(reader.getBuffer()); + readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words, + frequencies, bigrams, header.mFormatOptions); } /** diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 1b187d85d..504349a0b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -66,6 +66,7 @@ public final class BinaryDictInputOutput { public void position(int newPosition); public void put(final byte b); public int limit(); + @UsedForTesting public int capacity(); } @@ -1210,49 +1211,38 @@ public final class BinaryDictInputOutput { ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256); // The magic number in big-endian order. - if (version >= FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) { - // Magic number for version 2+. - headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 24))); - headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 16))); - headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_2_MAGIC_NUMBER >> 8))); - headerBuffer.write((byte) (0xFF & FormatSpec.VERSION_2_MAGIC_NUMBER)); - // Dictionary version. - headerBuffer.write((byte) (0xFF & (version >> 8))); - headerBuffer.write((byte) (0xFF & version)); - } else { - // Magic number for version 1. - headerBuffer.write((byte) (0xFF & (FormatSpec.VERSION_1_MAGIC_NUMBER >> 8))); - headerBuffer.write((byte) (0xFF & FormatSpec.VERSION_1_MAGIC_NUMBER)); - // Dictionary version. - headerBuffer.write((byte) (0xFF & version)); - } + // Magic number for all versions. + headerBuffer.write((byte) (0xFF & (FormatSpec.MAGIC_NUMBER >> 24))); + headerBuffer.write((byte) (0xFF & (FormatSpec.MAGIC_NUMBER >> 16))); + headerBuffer.write((byte) (0xFF & (FormatSpec.MAGIC_NUMBER >> 8))); + headerBuffer.write((byte) (0xFF & FormatSpec.MAGIC_NUMBER)); + // Dictionary version. + headerBuffer.write((byte) (0xFF & (version >> 8))); + headerBuffer.write((byte) (0xFF & version)); + // Options flags final int options = makeOptionsValue(dict, formatOptions); headerBuffer.write((byte) (0xFF & (options >> 8))); headerBuffer.write((byte) (0xFF & options)); - if (version >= FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) { - final int headerSizeOffset = headerBuffer.size(); - // Placeholder to be written later with header size. - for (int i = 0; i < 4; ++i) { - headerBuffer.write(0); - } - // Write out the options. - for (final String key : dict.mOptions.mAttributes.keySet()) { - final String value = dict.mOptions.mAttributes.get(key); - CharEncoding.writeString(headerBuffer, key); - CharEncoding.writeString(headerBuffer, value); - } - final int size = headerBuffer.size(); - final byte[] bytes = headerBuffer.toByteArray(); - // Write out the header size. - bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24)); - bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16)); - bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8)); - bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0)); - destination.write(bytes); - } else { - headerBuffer.writeTo(destination); - } + final int headerSizeOffset = headerBuffer.size(); + // Placeholder to be written later with header size. + for (int i = 0; i < 4; ++i) { + headerBuffer.write(0); + } + // Write out the options. + for (final String key : dict.mOptions.mAttributes.keySet()) { + final String value = dict.mOptions.mAttributes.get(key); + CharEncoding.writeString(headerBuffer, key); + CharEncoding.writeString(headerBuffer, value); + } + final int size = headerBuffer.size(); + final byte[] bytes = headerBuffer.toByteArray(); + // Write out the header size. + bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24)); + bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16)); + bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8)); + bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0)); + destination.write(bytes); headerBuffer.close(); @@ -1658,10 +1648,8 @@ public final class BinaryDictInputOutput { */ private static int getFormatVersion(final FusionDictionaryBufferInterface buffer) throws IOException { - final int magic_v1 = buffer.readUnsignedShort(); - if (FormatSpec.VERSION_1_MAGIC_NUMBER == magic_v1) return buffer.readUnsignedByte(); - final int magic_v2 = (magic_v1 << 16) + buffer.readUnsignedShort(); - if (FormatSpec.VERSION_2_MAGIC_NUMBER == magic_v2) return buffer.readUnsignedShort(); + final int magic = buffer.readInt(); + if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort(); return FormatSpec.NOT_A_VERSION_NUMBER; } @@ -1695,18 +1683,15 @@ public final class BinaryDictInputOutput { final HashMap<String, String> attributes = new HashMap<String, String>(); final int headerSize; - if (version < FormatSpec.FIRST_VERSION_WITH_HEADER_SIZE) { - headerSize = buffer.position(); - } else { - headerSize = buffer.readInt(); - populateOptions(buffer, headerSize, attributes); - buffer.position(headerSize); - } + headerSize = buffer.readInt(); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } + populateOptions(buffer, headerSize, attributes); + buffer.position(headerSize); + final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), @@ -1738,23 +1723,30 @@ public final class BinaryDictInputOutput { * FusionDictionary structure. The optional dict argument is an existing dictionary to * which words from the buffer should be added. If it is null, a new dictionary is created. * - * @param buffer the buffer to read. + * @param reader the reader. * @param dict an optional dictionary to add words to, or null. * @return the created (or merged) dictionary. */ @UsedForTesting - public static FusionDictionary readDictionaryBinary( - final FusionDictionaryBufferInterface buffer, final FusionDictionary dict) - throws IOException, UnsupportedFormatException { + public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader, + final FusionDictionary dict) throws FileNotFoundException, IOException, + UnsupportedFormatException { // clear cache wordCache.clear(); + // if the buffer has not been opened, open the buffer with bytebuffer. + if (reader.getBuffer() == null) reader.openBuffer( + new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); + if (reader.getBuffer() == null) { + MakedictLog.e("Cannot open the buffer"); + } + // Read header - final FileHeader header = readHeader(buffer); + final FileHeader header = readHeader(reader.getBuffer()); Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); - final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping, + final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, reverseGroupMapping, header.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java new file mode 100644 index 000000000..57a583228 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.utils.ByteArrayWrapper; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +public class BinaryDictReader { + + public interface FusionDictionaryBufferFactory { + public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) + throws FileNotFoundException, IOException; + } + + /** + * Creates FusionDictionaryBuffer from a ByteBuffer + */ + public static final class FusionDictionaryBufferFromByteBufferFactory + implements FusionDictionaryBufferFactory { + @Override + public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) + throws FileNotFoundException, IOException { + FileInputStream inStream = null; + ByteBuffer buffer = null; + try { + inStream = new FileInputStream(file); + buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, + 0, file.length()); + } finally { + if (inStream != null) { + inStream.close(); + } + } + if (buffer != null) { + return new BinaryDictInputOutput.ByteBufferWrapper(buffer); + } + return null; + } + } + + /** + * Creates FusionDictionaryBuffer from a byte array + */ + public static final class FusionDictionaryBufferFromByteArrayFactory + implements FusionDictionaryBufferFactory { + @Override + public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) + throws FileNotFoundException, IOException { + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + final byte[] array = new byte[(int) file.length()]; + inStream.read(array); + return new ByteArrayWrapper(array); + } finally { + if (inStream != null) { + inStream.close(); + } + } + } + } + + private final File mDictionaryBinaryFile; + private FusionDictionaryBufferInterface mFusionDictionaryBuffer; + + public BinaryDictReader(final File file) { + mDictionaryBinaryFile = file; + mFusionDictionaryBuffer = null; + } + + public void openBuffer(final FusionDictionaryBufferFactory factory) + throws FileNotFoundException, IOException { + mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile); + } + + public FusionDictionaryBufferInterface getBuffer() { + return mFusionDictionaryBuffer; + } + + @UsedForTesting + public FusionDictionaryBufferInterface openAndGetBuffer( + final FusionDictionaryBufferFactory factory) + throws FileNotFoundException, IOException { + openBuffer(factory); + return getBuffer(); + } +} diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index feadcda76..2bb5d8b6e 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -16,6 +16,7 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; @@ -25,6 +26,40 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions public final class FormatSpec { /* + * File header layout is as follows: + * + * v | + * e | MAGIC_NUMBER + version of the file format, 2 bytes. + * r | + * sion + * + * o | + * p | not used 4 bits + * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG + * i | FRENCH_LIGATURE_PROCESSING_FLAG + * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE + * n | GERMAN_UMLAUT_PROCESSING_FLAG + * f | + * lags + * + * h | + * e | size of the file header, 4bytes + * a | including the size of the magic number, the option flags and the header size + * d | + * ersize + * + * | attributes list + * + * attributes list is: + * <key> = | string of characters at the char format described below, with the terminator used + * | to signal the end of the string. + * <value> = | string of characters at the char format described below, with the terminator used + * | to signal the end of the string. + * if the size of already read < headersize, goto key. + * + */ + + /* * Array of Node(FusionDictionary.Node) layout is as follows: * * g | @@ -150,12 +185,10 @@ public final class FormatSpec { * if (FLAG_ATTRIBUTE_HAS_NEXT goto flags */ - static final int VERSION_1_MAGIC_NUMBER = 0x78B1; - public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; - static final int MINIMUM_SUPPORTED_VERSION = 1; + public static final int MAGIC_NUMBER = 0x9BC13AFE; + static final int MINIMUM_SUPPORTED_VERSION = 2; static final int MAXIMUM_SUPPORTED_VERSION = 3; static final int NOT_A_VERSION_NUMBER = -1; - static final int FIRST_VERSION_WITH_HEADER_SIZE = 2; static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3; // These options need to be the same numeric values as the one in the native reading code. @@ -236,9 +269,12 @@ public final class FormatSpec { public static final class FormatOptions { public final int mVersion; public final boolean mSupportsDynamicUpdate; + @UsedForTesting public FormatOptions(final int version) { this(version, false); } + + @UsedForTesting public FormatOptions(final int version, final boolean supportsDynamicUpdate) { mVersion = version; if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) { |