diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict')
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java | 143 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java (renamed from java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java) | 4 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java | 45 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/DictDecoder.java (renamed from java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java) | 100 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java | 22 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java | 274 |
6 files changed, 347 insertions, 241 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index d1974c8d4..5d3695eff 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -40,7 +40,7 @@ import java.util.TreeMap; * * All the methods in this class are static. * - * TODO: Remove calls from classes except BinaryDictDecoder + * TODO: Remove calls from classes except Ver3DictDecoder * TODO: Move this file to makedict/internal. */ public final class BinaryDictDecoderUtils { @@ -278,6 +278,12 @@ public final class BinaryDictDecoderUtils { // Input methods: Read a binary dictionary to memory. // readDictionaryBinary is the public entry point for them. + static int readSInt24(final DictBuffer dictBuffer) { + final int retval = dictBuffer.readUnsignedInt24(); + final int sign = ((retval & FormatSpec.MSB24) != 0) ? -1 : 1; + return sign * (retval & FormatSpec.SINT24_MAX); + } + static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, final FormatOptions options) { if (options.mSupportsDynamicUpdate) { @@ -314,103 +320,6 @@ public final class BinaryDictDecoderUtils { } } - private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH]; - public static CharGroupInfo readCharGroup(final DictBuffer dictBuffer, - final int originalGroupAddress, final FormatOptions options) { - int addressPointer = originalGroupAddress; - final int flags = dictBuffer.readUnsignedByte(); - ++addressPointer; - - final int parentAddress = readParentAddress(dictBuffer, options); - if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += 3; - } - - final int characters[]; - if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { - int index = 0; - int character = CharEncoding.readChar(dictBuffer); - addressPointer += CharEncoding.getCharSize(character); - while (-1 != character) { - // FusionDictionary is making sure that the length of the word is smaller than - // MAX_WORD_LENGTH. - // So we'll never write past the end of CHARACTER_BUFFER. - CHARACTER_BUFFER[index++] = character; - character = CharEncoding.readChar(dictBuffer); - addressPointer += CharEncoding.getCharSize(character); - } - characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index); - } else { - final int character = CharEncoding.readChar(dictBuffer); - addressPointer += CharEncoding.getCharSize(character); - characters = new int[] { character }; - } - final int frequency; - if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - ++addressPointer; - frequency = dictBuffer.readUnsignedByte(); - } else { - frequency = CharGroup.NOT_A_TERMINAL; - } - int childrenAddress = readChildrenAddress(dictBuffer, flags, options); - if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - childrenAddress += addressPointer; - } - addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); - ArrayList<WeightedString> shortcutTargets = null; - if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { - final int pointerBefore = dictBuffer.position(); - shortcutTargets = new ArrayList<WeightedString>(); - dictBuffer.readUnsignedShort(); // Skip the size - while (true) { - final int targetFlags = dictBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(dictBuffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; - } - addressPointer += dictBuffer.position() - pointerBefore; - } - ArrayList<PendingAttribute> bigrams = null; - if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { - bigrams = new ArrayList<PendingAttribute>(); - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - final int bigramFlags = dictBuffer.readUnsignedByte(); - ++addressPointer; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = addressPointer; - switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * dictBuffer.readUnsignedByte(); - addressPointer += 1; - break; - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * dictBuffer.readUnsignedShort(); - addressPointer += 2; - break; - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - final int offset = (dictBuffer.readUnsignedByte() << 16) - + dictBuffer.readUnsignedShort(); - bigramAddress += sign * offset; - addressPointer += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; - } - if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - MakedictLog.d("too many bigrams in a group."); - } - } - return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, - parentAddress, childrenAddress, shortcutTargets, bigrams); - } - /** * Reads and returns the char group count out of a buffer and forwards the pointer. */ @@ -427,24 +336,25 @@ public final class BinaryDictDecoderUtils { /** * Finds, as a string, the word at the address passed as an argument. * - * @param dictBuffer the buffer to read from. + * @param dictDecoder the dict decoder. * @param headerSize the size of the header. * @param address the address to seek. * @param formatOptions file format options. * @return the word with its frequency, as a weighted string. */ /* package for tests */ static WeightedString getWordAtAddress( - final DictBuffer dictBuffer, final int headerSize, final int address, + final Ver3DictDecoder dictDecoder, final int headerSize, final int address, final FormatOptions formatOptions) { + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); final WeightedString result; final int originalPointer = dictBuffer.position(); dictBuffer.position(address); if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - result = getWordAtAddressWithParentAddress(dictBuffer, headerSize, address, + result = getWordAtAddressWithParentAddress(dictDecoder, headerSize, address, formatOptions); } else { - result = getWordAtAddressWithoutParentAddress(dictBuffer, headerSize, address, + result = getWordAtAddressWithoutParentAddress(dictDecoder, headerSize, address, formatOptions); } @@ -454,8 +364,9 @@ public final class BinaryDictDecoderUtils { @SuppressWarnings("unused") private static WeightedString getWordAtAddressWithParentAddress( - final DictBuffer dictBuffer, final int headerSize, final int address, + final Ver3DictDecoder dictDecoder, final int headerSize, final int address, final FormatOptions options) { + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); int currentAddress = address; int frequency = Integer.MIN_VALUE; final StringBuilder builder = new StringBuilder(); @@ -465,7 +376,7 @@ public final class BinaryDictDecoderUtils { int loopCounter = 0; do { dictBuffer.position(currentAddress + headerSize); - currentInfo = readCharGroup(dictBuffer, currentAddress, options); + currentInfo = dictDecoder.readPtNode(currentAddress, options); if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; } @@ -483,8 +394,9 @@ public final class BinaryDictDecoderUtils { } private static WeightedString getWordAtAddressWithoutParentAddress( - final DictBuffer dictBuffer, final int headerSize, final int address, + final Ver3DictDecoder dictDecoder, final int headerSize, final int address, final FormatOptions options) { + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); dictBuffer.position(headerSize); final int count = readCharGroupCount(dictBuffer); int groupOffset = BinaryDictIOUtils.getGroupCountSize(count); @@ -493,7 +405,7 @@ public final class BinaryDictDecoderUtils { CharGroupInfo last = null; for (int i = count - 1; i >= 0; --i) { - CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options); + CharGroupInfo info = dictDecoder.readPtNode(groupOffset, options); groupOffset = info.mEndAddress; if (info.mOriginalAddress == address) { builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); @@ -532,17 +444,18 @@ public final class BinaryDictDecoderUtils { * This will recursively read other node arrays into the structure, populating the reverse * maps on the fly and using them to keep track of already read nodes. * - * @param dictBuffer the buffer, correctly positioned at the start of a node array. + * @param dictDecoder the dict decoder, correctly positioned at the start of a node array. * @param headerSize the size, in bytes, of the file header. * @param reverseNodeArrayMap a mapping from addresses to already read node arrays. * @param reverseGroupMap a mapping from addresses to already read character groups. * @param options file format options. * @return the read node array with all his children already read. */ - private static PtNodeArray readNodeArray(final DictBuffer dictBuffer, + private static PtNodeArray readNodeArray(final Ver3DictDecoder dictDecoder, final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap, final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options) throws IOException { + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>(); final int nodeArrayOrigin = dictBuffer.position() - headerSize; @@ -551,15 +464,15 @@ public final class BinaryDictDecoderUtils { final int count = readCharGroupCount(dictBuffer); int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); for (int i = count; i > 0; --i) { // Scan the array of CharGroup. - CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options); + CharGroupInfo info = dictDecoder.readPtNode(groupOffset, options); if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets; ArrayList<WeightedString> bigrams = null; if (null != info.mBigrams) { bigrams = new ArrayList<WeightedString>(); for (PendingAttribute bigram : info.mBigrams) { - final WeightedString word = getWordAtAddress( - dictBuffer, headerSize, bigram.mAddress, options); + final WeightedString word = getWordAtAddress(dictDecoder, headerSize, + bigram.mAddress, options); final int reconstructedFrequency = BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency, bigram.mFrequency); @@ -571,7 +484,7 @@ public final class BinaryDictDecoderUtils { if (null == children) { final int currentPosition = dictBuffer.position(); dictBuffer.position(info.mChildrenAddress + headerSize); - children = readNodeArray(dictBuffer, headerSize, reverseNodeArrayMap, + children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap, reverseGroupMap, options); dictBuffer.position(currentPosition); } @@ -649,13 +562,13 @@ public final class BinaryDictDecoderUtils { * @return the created (or merged) dictionary. */ @UsedForTesting - public static FusionDictionary readDictionaryBinary(final BinaryDictDecoder dictDecoder, + public static FusionDictionary readDictionaryBinary(final Ver3DictDecoder dictDecoder, final FusionDictionary dict) throws FileNotFoundException, IOException, UnsupportedFormatException { // if the buffer has not been opened, open the buffer with bytebuffer. if (dictDecoder.getDictBuffer() == null) dictDecoder.openDictBuffer( - new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + new Ver3DictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); if (dictDecoder.getDictBuffer() == null) { MakedictLog.e("Cannot open the buffer"); } @@ -665,7 +578,7 @@ public final class BinaryDictDecoderUtils { Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); - final PtNodeArray root = readNodeArray(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize, + final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index ff11cde39..3fe3ae6ce 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -34,11 +34,11 @@ import java.util.Iterator; * * All the methods in this class are static. */ -public class BinaryDictEncoder { +public class BinaryDictEncoderUtils { private static final boolean DBG = MakedictLog.DBG; - private BinaryDictEncoder() { + private BinaryDictEncoderUtils() { // This utility class is not publicly instantiable. } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 1abc779d0..a54fc8c21 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -62,10 +62,11 @@ public final class BinaryDictIOUtils { * Retrieves all node arrays without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( - final DictBuffer dictBuffer, final int headerSize, + final Ver3DictDecoder dictDecoder, final int headerSize, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams, final FormatOptions formatOptions) { + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1]; Stack<Position> stack = new Stack<Position>(); @@ -94,8 +95,7 @@ public final class BinaryDictIOUtils { stack.pop(); continue; } - CharGroupInfo info = BinaryDictDecoderUtils.readCharGroup(dictBuffer, - p.mAddress - headerSize, formatOptions); + CharGroupInfo info = dictDecoder.readPtNode(p.mAddress - headerSize, formatOptions); for (int i = 0; i < info.mCharacters.length; ++i) { pushedChars[index++] = info.mCharacters[i]; } @@ -148,13 +148,13 @@ public final class BinaryDictIOUtils { * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ - public static void readUnigramsAndBigramsBinary(final BinaryDictDecoder dictDecoder, + public static void readUnigramsAndBigramsBinary(final Ver3DictDecoder dictDecoder, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final FileHeader header = dictDecoder.readHeader(); - readUnigramsAndBigramsBinaryInner(dictDecoder.getDictBuffer(), header.mHeaderSize, words, + readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } @@ -169,7 +169,7 @@ public final class BinaryDictIOUtils { * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting - public static int getTerminalPosition(final BinaryDictDecoder dictDecoder, + public static int getTerminalPosition(final Ver3DictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); if (word == null) return FormatSpec.NOT_VALID_WORD; @@ -186,8 +186,8 @@ public final class BinaryDictIOUtils { boolean foundNextCharGroup = false; for (int i = 0; i < charGroupCount; ++i) { final int charGroupPos = dictBuffer.position(); - final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup( - dictBuffer, dictBuffer.position(), header.mFormatOptions); + final CharGroupInfo currentInfo = dictDecoder.readPtNode(charGroupPos, + header.mFormatOptions); final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags, header.mFormatOptions); final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags, @@ -272,7 +272,7 @@ public final class BinaryDictIOUtils { */ private static int writeVariableAddress(final OutputStream destination, final int value) throws IOException { - switch (BinaryDictEncoder.getByteSize(value)) { + switch (BinaryDictEncoderUtils.getByteSize(value)) { case 1: destination.write((byte)value); break; @@ -286,7 +286,7 @@ public final class BinaryDictIOUtils { destination.write((byte)(0xFF & value)); break; } - return BinaryDictEncoder.getByteSize(value); + return BinaryDictEncoderUtils.getByteSize(value); } static void skipCharGroup(final DictBuffer dictBuffer, @@ -413,14 +413,14 @@ public final class BinaryDictIOUtils { if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) { final int shortcutListSize = - BinaryDictEncoder.getShortcutListSize(info.mShortcutTargets); + BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); destination.write((byte)(shortcutListSize >> 8)); destination.write((byte)(shortcutListSize & 0xFF)); size += 2; final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator(); while (shortcutIterator.hasNext()) { final WeightedString target = shortcutIterator.next(); - destination.write((byte)BinaryDictEncoder.makeShortcutFlags( + destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags( shortcutIterator.hasNext(), target.mFrequency)); size++; size += writeString(destination, target.mWord); @@ -429,7 +429,7 @@ public final class BinaryDictIOUtils { if (info.mBigrams != null) { // TODO: Consolidate this code with the code that computes the size of the bigram list - // in BinaryDictEncoder#computeActualNodeArraySize + // in BinaryDictEncoderUtils#computeActualNodeArraySize for (int i = 0; i < info.mBigrams.size(); ++i) { final int bigramFrequency = info.mBigrams.get(i).mFrequency; @@ -439,7 +439,7 @@ public final class BinaryDictIOUtils { final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress + size); bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0; - switch (BinaryDictEncoder.getByteSize(bigramOffset)) { + switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) { case 1: bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; break; @@ -463,18 +463,18 @@ public final class BinaryDictIOUtils { */ static int computeGroupSize(final CharGroupInfo info, final FormatOptions formatOptions) { int size = FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE - + BinaryDictEncoder.getGroupCharactersSize(info.mCharacters) + + BinaryDictEncoderUtils.getGroupCharactersSize(info.mCharacters) + getChildrenAddressSize(info.mFlags, formatOptions); if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { size += FormatSpec.GROUP_FREQUENCY_SIZE; } if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) { - size += BinaryDictEncoder.getShortcutListSize(info.mShortcutTargets); + size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); } if (info.mBigrams != null) { for (final PendingAttribute attr : info.mBigrams) { size += FormatSpec.GROUP_FLAGS_SIZE; - size += BinaryDictEncoder.getByteSize(attr.mAddress); + size += BinaryDictEncoderUtils.getByteSize(attr.mAddress); } } return size; @@ -508,7 +508,7 @@ public final class BinaryDictIOUtils { } /** - * Find a word using the BinaryDictDecoder. + * Find a word using the Ver3DictDecoder. * * @param dictDecoder the dict reader * @param word the word searched @@ -517,7 +517,7 @@ public final class BinaryDictIOUtils { * @throws UnsupportedFormatException */ @UsedForTesting - public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictDecoder dictDecoder, + public static CharGroupInfo findWordByBinaryDictReader(final Ver3DictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { int position = getTerminalPosition(dictDecoder, word); final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); @@ -525,8 +525,7 @@ public final class BinaryDictIOUtils { dictBuffer.position(0); final FileHeader header = dictDecoder.readHeader(); dictBuffer.position(position); - return BinaryDictDecoderUtils.readCharGroup(dictBuffer, position, - header.mFormatOptions); + return dictDecoder.readPtNode(position, header.mFormatOptions); } return null; } @@ -545,8 +544,8 @@ public final class BinaryDictIOUtils { final File file, final long offset, final long length) throws FileNotFoundException, IOException, UnsupportedFormatException { final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; - final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); - dictDecoder.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFactory() { + final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file); + dictDecoder.openDictBuffer(new DictDecoder.DictionaryBufferFactory() { @Override public DictBuffer getDictionaryBuffer(File file) throws FileNotFoundException, IOException { diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index 2007b6284..144f91618 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -17,12 +17,10 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; -import com.android.inputmethod.latin.utils.JniUtils; import java.io.File; import java.io.FileInputStream; @@ -31,18 +29,17 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.util.HashMap; -// TODO: Rename this class to "Ver3DictDecoder" or something, and make an interface "DictDecoder". -@UsedForTesting -public class BinaryDictDecoder { - - static { - JniUtils.loadNativeLibrary(); - } - - // TODO: implement something sensical instead of just a phony method - private static native int doNothing(); +/** + * An interface of binary dictionary decoder. + */ +public interface DictDecoder { + public FileHeader readHeader() throws IOException, UnsupportedFormatException; + /** + * Reads a PtNode and returns CharGroupInfo. + */ + public CharGroupInfo readPtNode(final int originalGroupAddress, + final FormatOptions formatOptions); public interface DictionaryBufferFactory { public DictBuffer getDictionaryBuffer(final File file) @@ -133,81 +130,4 @@ public class BinaryDictDecoder { return null; } } - - private final static class HeaderReader { - protected static int readVersion(final DictBuffer dictBuffer) - throws IOException, UnsupportedFormatException { - return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); - } - - protected static int readOptionFlags(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedShort(); - } - - protected static int readHeaderSize(final DictBuffer dictBuffer) { - return dictBuffer.readInt(); - } - - protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, - final int headerSize) { - final HashMap<String, String> attributes = new HashMap<String, String>(); - while (dictBuffer.position() < headerSize) { - // We can avoid an infinite loop here since dictBuffer.position() is always - // increased by calling CharEncoding.readString. - final String key = CharEncoding.readString(dictBuffer); - final String value = CharEncoding.readString(dictBuffer); - attributes.put(key, value); - } - dictBuffer.position(headerSize); - return attributes; - } - } - - private final File mDictionaryBinaryFile; - private DictBuffer mDictBuffer; - - public BinaryDictDecoder(final File file) { - mDictionaryBinaryFile = file; - mDictBuffer = null; - } - - public void openDictBuffer(final DictionaryBufferFactory factory) - throws FileNotFoundException, IOException { - mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile); - } - - public DictBuffer getDictBuffer() { - return mDictBuffer; - } - - @UsedForTesting - public DictBuffer openAndGetDictBuffer( - final DictionaryBufferFactory factory) - throws FileNotFoundException, IOException { - openDictBuffer(factory); - return getDictBuffer(); - } - - // TODO : Define public functions of decoders - public FileHeader readHeader() throws IOException, UnsupportedFormatException { - final int version = HeaderReader.readVersion(mDictBuffer); - final int optionsFlags = HeaderReader.readOptionFlags(mDictBuffer); - - final int headerSize = HeaderReader.readHeaderSize(mDictBuffer); - - if (headerSize < 0) { - throw new UnsupportedFormatException("header size can't be negative."); - } - - final HashMap<String, String> attributes = HeaderReader.readAttributes(mDictBuffer, - headerSize); - - final FileHeader header = new FileHeader(headerSize, - new FusionDictionary.DictionaryOptions(attributes, - 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), - 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); - return header; - } } diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 6c1e75cbb..f976c8152 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -55,7 +55,7 @@ public final class DynamicBinaryDictIOUtils { * @throws UnsupportedFormatException */ @UsedForTesting - public static void deleteWord(final BinaryDictDecoder dictDecoder, final String word) + public static void deleteWord(final Ver3DictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); dictBuffer.position(0); @@ -253,7 +253,7 @@ public final class DynamicBinaryDictIOUtils { // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. @UsedForTesting - public static void insertWord(final BinaryDictDecoder dictDecoder, + public static void insertWord(final Ver3DictDecoder dictDecoder, final OutputStream destination, final String word, final int frequency, final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, @@ -293,8 +293,8 @@ public final class DynamicBinaryDictIOUtils { for (int i = 0; i < charGroupCount; ++i) { address = dictBuffer.position(); - final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer, - dictBuffer.position(), fileHeader.mFormatOptions); + final CharGroupInfo currentInfo = dictDecoder.readPtNode(address, + fileHeader.mFormatOptions); final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, fileHeader.mFormatOptions); if (isMovedGroup) continue; @@ -314,7 +314,7 @@ public final class DynamicBinaryDictIOUtils { * abc - d - ef */ final int newNodeAddress = dictBuffer.limit(); - final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1, + final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(p > 1, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, false /* isBlackListEntry */, fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, @@ -353,7 +353,7 @@ public final class DynamicBinaryDictIOUtils { final int childrenAddress = currentInfo.mChildrenAddress; // move prefix - final int prefixFlags = BinaryDictEncoder.makeCharGroupFlags(p > 1, + final int prefixFlags = BinaryDictEncoderUtils.makeCharGroupFlags(p > 1, false /* isTerminal */, 0 /* childrenAddressSize*/, false /* hasShortcut */, false /* hasBigrams */, false /* isNotAWord */, false /* isBlackListEntry */, @@ -369,7 +369,7 @@ public final class DynamicBinaryDictIOUtils { updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, fileHeader.mFormatOptions); } - final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags( + final int suffixFlags = BinaryDictEncoderUtils.makeCharGroupFlags( suffixCharacters.length > 1, (currentInfo.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0, 0 /* childrenAddressSize */, @@ -387,7 +387,7 @@ public final class DynamicBinaryDictIOUtils { final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, codePoints.length); - final int flags = BinaryDictEncoder.makeCharGroupFlags( + final int flags = BinaryDictEncoderUtils.makeCharGroupFlags( newCharacters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); @@ -410,7 +410,7 @@ public final class DynamicBinaryDictIOUtils { // only update group. final int newNodeAddress = dictBuffer.limit(); final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; - final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, + final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, @@ -440,7 +440,7 @@ public final class DynamicBinaryDictIOUtils { fileHeader.mFormatOptions); final int newGroupAddress = newNodeAddress + 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1; - final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, + final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); @@ -485,7 +485,7 @@ public final class DynamicBinaryDictIOUtils { BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); - final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1, + final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(characters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java new file mode 100644 index 000000000..8373ae0bd --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.JniUtils; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +/** + * An implementation of DictDecoder for version 3 binary dictionary. + */ +@UsedForTesting +public class Ver3DictDecoder implements DictDecoder { + + static { + JniUtils.loadNativeLibrary(); + } + + // TODO: implement something sensical instead of just a phony method + private static native int doNothing(); + + private final static class HeaderReader { + protected static int readVersion(final DictBuffer dictBuffer) + throws IOException, UnsupportedFormatException { + return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); + } + + protected static int readOptionFlags(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedShort(); + } + + protected static int readHeaderSize(final DictBuffer dictBuffer) { + return dictBuffer.readInt(); + } + + protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, + final int headerSize) { + final HashMap<String, String> attributes = new HashMap<String, String>(); + while (dictBuffer.position() < headerSize) { + // We can avoid an infinite loop here since dictBuffer.position() is always + // increased by calling CharEncoding.readString. + final String key = CharEncoding.readString(dictBuffer); + final String value = CharEncoding.readString(dictBuffer); + attributes.put(key, value); + } + dictBuffer.position(headerSize); + return attributes; + } + } + + private final static class PtNodeReader { + protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedByte(); + } + + protected static int readParentAddress(final DictBuffer dictBuffer, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + return BinaryDictDecoderUtils.readSInt24(dictBuffer); + } else { + return FormatSpec.NO_PARENT_ADDRESS; + } + } + + protected static int readFrequency(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedByte(); + } + + protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); + if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; + return address; + } else { + switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + return dictBuffer.readUnsignedByte(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + return dictBuffer.readUnsignedShort(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + return dictBuffer.readUnsignedInt24(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: + default: + return FormatSpec.NO_CHILDREN_ADDRESS; + } + } + } + + // Reads shortcuts and returns the read length. + protected static int readShortcut(final DictBuffer dictBuffer, + final ArrayList<WeightedString> shortcutTargets) { + final int pointerBefore = dictBuffer.position(); + dictBuffer.readUnsignedShort(); // skip the size + while (true) { + final int targetFlags = dictBuffer.readUnsignedByte(); + final String word = CharEncoding.readString(dictBuffer); + shortcutTargets.add(new WeightedString(word, + targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); + if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; + } + return dictBuffer.position() - pointerBefore; + } + + protected static int readBigrams(final DictBuffer dictBuffer, + final ArrayList<PendingAttribute> bigrams, final int baseAddress) { + int readLength = 0; + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { + final int bigramFlags = dictBuffer.readUnsignedByte(); + ++readLength; + final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) + ? 1 : -1; + int bigramAddress = baseAddress + readLength; + switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: + bigramAddress += sign * dictBuffer.readUnsignedByte(); + readLength += 1; + break; + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: + bigramAddress += sign * dictBuffer.readUnsignedShort(); + readLength += 2; + break; + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: + final int offset = (dictBuffer.readUnsignedByte() << 16) + + dictBuffer.readUnsignedShort(); + bigramAddress += sign * offset; + readLength += 3; + break; + default: + throw new RuntimeException("Has bigrams with no address"); + } + bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, + bigramAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; + } + return readLength; + } + } + + private final File mDictionaryBinaryFile; + private DictBuffer mDictBuffer; + + public Ver3DictDecoder(final File file) { + mDictionaryBinaryFile = file; + mDictBuffer = null; + } + + public void openDictBuffer(final DictDecoder.DictionaryBufferFactory factory) + throws FileNotFoundException, IOException { + mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile); + } + + public DictBuffer getDictBuffer() { + return mDictBuffer; + } + + @UsedForTesting + public DictBuffer openAndGetDictBuffer(final DictDecoder.DictionaryBufferFactory factory) + throws FileNotFoundException, IOException { + openDictBuffer(factory); + return getDictBuffer(); + } + + @Override + public FileHeader readHeader() throws IOException, UnsupportedFormatException { + final int version = HeaderReader.readVersion(mDictBuffer); + final int optionsFlags = HeaderReader.readOptionFlags(mDictBuffer); + + final int headerSize = HeaderReader.readHeaderSize(mDictBuffer); + + if (headerSize < 0) { + throw new UnsupportedFormatException("header size can't be negative."); + } + + final HashMap<String, String> attributes = HeaderReader.readAttributes(mDictBuffer, + headerSize); + + final FileHeader header = new FileHeader(headerSize, + new FusionDictionary.DictionaryOptions(attributes, + 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), + 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), + new FormatOptions(version, + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + return header; + } + + // TODO: Make this buffer multi thread safe. + private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; + @Override + public CharGroupInfo readPtNode(final int originalGroupAddress, + final FormatOptions options) { + int addressPointer = originalGroupAddress; + final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); + ++addressPointer; + + final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); + if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { + addressPointer += 3; + } + + final int characters[]; + if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { + int index = 0; + int character = CharEncoding.readChar(mDictBuffer); + addressPointer += CharEncoding.getCharSize(character); + while (-1 != character) { + // FusionDictionary is making sure that the length of the word is smaller than + // MAX_WORD_LENGTH. + // So we'll never write past the end of mCharacterBuffer. + mCharacterBuffer[index++] = character; + character = CharEncoding.readChar(mDictBuffer); + addressPointer += CharEncoding.getCharSize(character); + } + characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); + } else { + final int character = CharEncoding.readChar(mDictBuffer); + addressPointer += CharEncoding.getCharSize(character); + characters = new int[] { character }; + } + final int frequency; + if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { + ++addressPointer; + frequency = PtNodeReader.readFrequency(mDictBuffer); + } else { + frequency = CharGroup.NOT_A_TERMINAL; + } + int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); + if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { + childrenAddress += addressPointer; + } + addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); + ArrayList<WeightedString> shortcutTargets = null; + if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { + addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets); + } + ArrayList<PendingAttribute> bigrams = null; + if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { + bigrams = new ArrayList<PendingAttribute>(); + addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer); + if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { + MakedictLog.d("too many bigrams in a group."); + } + } + return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, + parentAddress, childrenAddress, shortcutTargets, bigrams); + } +} |