diff options
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 2 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/Constants.java | 12 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java | 260 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java | 6 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java | 17 | ||||
-rw-r--r-- | native/jni/src/defines.h | 35 | ||||
-rw-r--r-- | native/jni/src/proximity_info.cpp | 8 | ||||
-rw-r--r-- | native/jni/src/proximity_info.h | 9 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state.cpp | 75 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state.h | 17 | ||||
-rw-r--r-- | tools/dicttool/Android.mk | 7 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java | 48 |
12 files changed, 364 insertions, 132 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 8909526d8..c3ae81f3a 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -41,7 +41,7 @@ public class BinaryDictionary extends Dictionary { * It is necessary to keep it at this value because some languages e.g. German have * really long words. */ - public static final int MAX_WORD_LENGTH = 48; + public static final int MAX_WORD_LENGTH = Constants.Dictionary.MAX_WORD_LENGTH; public static final int MAX_WORDS = 18; public static final int MAX_SPACES = 16; diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index d71c0f995..57e12a64f 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -16,8 +16,6 @@ package com.android.inputmethod.latin; -import android.view.inputmethod.EditorInfo; - public final class Constants { public static final class Color { /** @@ -54,7 +52,7 @@ public final class Constants { * The private IME option used to indicate that the given text field needs ASCII code points * input. * - * @deprecated Use {@link EditorInfo#IME_FLAG_FORCE_ASCII}. + * @deprecated Use EditorInfo#IME_FLAG_FORCE_ASCII. */ @SuppressWarnings("dep-ann") public static final String FORCE_ASCII = "forceAscii"; @@ -128,6 +126,14 @@ public final class Constants { } } + public static class Dictionary { + public static final int MAX_WORD_LENGTH = 48; + + private Dictionary() { + // This utility class is no publicly instantiable. + } + } + public static final int NOT_A_CODE = -1; // See {@link KeyboardActionListener.Adapter#isInvalidCoordinate(int)}. diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 7de5cf340..6775144de 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -16,6 +16,7 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; @@ -44,9 +45,28 @@ import java.util.TreeMap; */ public class BinaryDictInputOutput { - final static boolean DBG = MakedictLog.DBG; + private static final boolean DBG = MakedictLog.DBG; - /* Node layout is as follows: + /* + * Array of Node(FusionDictionary.Node) layout is as follows: + * + * g | + * r | the number of groups, 1 or 2 bytes. + * o | 1 byte = bbbbbbbb match + * u | case 1xxxxxxx => xxxxxxx << 8 + next byte + * p | otherwise => bbbbbbbb + * c | + * ount + * + * g | + * r | sequence of groups, + * o | the layout of each group is described below. + * u | + * ps + * + */ + + /* Node(CharGroup) layout is as follows: * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE * 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS * f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE @@ -59,6 +79,13 @@ public class BinaryDictInputOutput { * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED * + * p | + * a | IF HAS_PARENT_ADDRESS (defined in the file header) + * r | parent address, 3byte + * e | the address must be negative, so the absolute value of the address is stored. + * n | + * taddress + * * c | IF FLAG_HAS_MULTIPLE_CHARS * h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers * a | end 1 byte, = 0 @@ -132,18 +159,22 @@ public class BinaryDictInputOutput { private static final int VERSION_1_MAGIC_NUMBER = 0x78B1; public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; private static final int MINIMUM_SUPPORTED_VERSION = 1; - private static final int MAXIMUM_SUPPORTED_VERSION = 2; + private static final int MAXIMUM_SUPPORTED_VERSION = 3; private static final int NOT_A_VERSION_NUMBER = -1; private static final int FIRST_VERSION_WITH_HEADER_SIZE = 2; + private static final int FIRST_VERSION_WITH_PARENT_ADDRESS = 3; // These options need to be the same numeric values as the one in the native reading code. private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; + private static final int HAS_PARENT_ADDRESS = 0x2; private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; private static final int CONTAINS_BIGRAMS_FLAG = 0x8; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. - private static final int MAX_WORD_LENGTH = 48; + private static final int MAX_WORD_LENGTH = Constants.Dictionary.MAX_WORD_LENGTH; + + private static final int PARENT_ADDRESS_SIZE = 3; private static final int MASK_GROUP_ADDRESS_TYPE = 0xC0; private static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; @@ -178,6 +209,7 @@ public class BinaryDictInputOutput { private static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2; private static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; + private static final int NO_PARENT_ADDRESS = 0; private static final int INVALID_CHARACTER = -1; private static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 @@ -246,8 +278,17 @@ public class BinaryDictInputOutput { */ public static class FormatOptions { public final int mVersion; + public final boolean mHasParentAddress; public FormatOptions(final int version) { + this(version, false); + } + public FormatOptions(final int version, final boolean hasParentAddress) { mVersion = version; + if (version < FIRST_VERSION_WITH_PARENT_ADDRESS && hasParentAddress) { + throw new RuntimeException("Parent addresses are only supported with versions " + + FIRST_VERSION_WITH_PARENT_ADDRESS + " and ulterior."); + } + mHasParentAddress = hasParentAddress; } } @@ -277,7 +318,7 @@ public class BinaryDictInputOutput { /** * Helper method to find out whether this code fits on one byte */ - private static boolean fitsOnOneByte(int character) { + private static boolean fitsOnOneByte(final int character) { return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE; } @@ -299,7 +340,7 @@ public class BinaryDictInputOutput { * @param character the character code. * @return the size in binary encoded-form, either 1 or 3 bytes. */ - private static int getCharSize(int character) { + private static int getCharSize(final int character) { // See char encoding in FusionDictionary.java if (fitsOnOneByte(character)) return 1; if (INVALID_CHARACTER == character) return 1; @@ -372,7 +413,7 @@ public class BinaryDictInputOutput { * @param buffer the ByteArrayOutputStream to write to. * @param word the string to write. */ - private static void writeString(ByteArrayOutputStream buffer, final String word) { + private static void writeString(final ByteArrayOutputStream buffer, final String word) { final int length = word.length(); for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { final int codePoint = word.codePointAt(i); @@ -428,7 +469,7 @@ public class BinaryDictInputOutput { * @param group the group * @return the size of the char array, including the terminator if any */ - private static int getGroupCharactersSize(CharGroup group) { + private static int getGroupCharactersSize(final CharGroup group) { int size = CharEncoding.getCharArraySize(group.mChars); if (group.hasSeveralChars()) size += GROUP_TERMINATOR_SIZE; return size; @@ -446,7 +487,7 @@ public class BinaryDictInputOutput { return 2; } else { throw new RuntimeException("Can't have more than " + MAX_CHARGROUPS_IN_A_NODE - + " groups in a node (found " + count +")"); + + " groups in a node (found " + count + ")"); } } @@ -493,10 +534,11 @@ public class BinaryDictInputOutput { * Compute the maximum size of a CharGroup, assuming 3-byte addresses for everything. * * @param group the CharGroup to compute the size of. + * @param options file format options. * @return the maximum size of the group. */ - private static int getCharGroupMaximumSize(CharGroup group) { - int size = getGroupCharactersSize(group) + GROUP_FLAGS_SIZE; + private static int getCharGroupMaximumSize(final CharGroup group, final FormatOptions options) { + int size = getGroupHeaderSize(group, options); // If terminal, one byte for the frequency if (group.isTerminal()) size += GROUP_FREQUENCY_SIZE; size += GROUP_MAX_ADDRESS_SIZE; // For children address @@ -513,11 +555,12 @@ public class BinaryDictInputOutput { * it in the 'actualSize' member of the node. * * @param node the node to compute the maximum size of. + * @param options file format options. */ - private static void setNodeMaximumSize(Node node) { + private static void setNodeMaximumSize(final Node node, final FormatOptions options) { int size = getGroupCountSize(node); for (CharGroup g : node.mData) { - final int groupSize = getCharGroupMaximumSize(g); + final int groupSize = getCharGroupMaximumSize(g, options); g.mCachedSize = groupSize; size += groupSize; } @@ -527,11 +570,33 @@ public class BinaryDictInputOutput { /** * Helper method to hide the actual value of the no children address. */ - private static boolean hasChildrenAddress(int address) { + private static boolean hasChildrenAddress(final int address) { return NO_CHILDREN_ADDRESS != address; } /** + * Helper method to check whether the CharGroup has a parent address. + */ + private static boolean hasParentAddress(final FormatOptions options) { + return options.mVersion >= FIRST_VERSION_WITH_PARENT_ADDRESS + && options.mHasParentAddress; + } + + /** + * Compute the size of the header (flag + [parent address] + characters size) of a CharGroup. + * + * @param group the group of which to compute the size of the header + * @param options file format options. + */ + private static int getGroupHeaderSize(final CharGroup group, final FormatOptions options) { + if (hasParentAddress(options)) { + return GROUP_FLAGS_SIZE + PARENT_ADDRESS_SIZE + getGroupCharactersSize(group); + } else { + return GROUP_FLAGS_SIZE + getGroupCharactersSize(group); + } + } + + /** * Compute the size, in bytes, that an address will occupy. * * This can be used either for children addresses (which are always positive) or for @@ -541,7 +606,7 @@ public class BinaryDictInputOutput { * @param address the address * @return the byte size. */ - private static int getByteSize(int address) { + private static int getByteSize(final int address) { assert(address < 0x1000000); if (!hasChildrenAddress(address)) { return 0; @@ -557,14 +622,14 @@ public class BinaryDictInputOutput { // This method is responsible for finding a nice ordering of the nodes that favors run-time // cache performance and dictionary size. - /* package for tests */ static ArrayList<Node> flattenTree(Node root) { + /* package for tests */ static ArrayList<Node> flattenTree(final Node root) { final int treeSize = FusionDictionary.countCharGroups(root); MakedictLog.i("Counted nodes : " + treeSize); final ArrayList<Node> flatTree = new ArrayList<Node>(treeSize); return flattenTreeInner(flatTree, root); } - private static ArrayList<Node> flattenTreeInner(ArrayList<Node> list, Node node) { + private static ArrayList<Node> flattenTreeInner(final ArrayList<Node> list, final Node node) { // Removing the node is necessary if the tails are merged, because we would then // add the same node several times when we only want it once. A number of places in // the code also depends on any node being only once in the list. @@ -614,9 +679,11 @@ public class BinaryDictInputOutput { * * @param node the node to compute the size of. * @param dict the dictionary in which the word/attributes are to be found. + * @param formatOptions file format options. * @return false if none of the cached addresses inside the node changed, true otherwise. */ - private static boolean computeActualNodeSize(Node node, FusionDictionary dict) { + private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict, + final FormatOptions formatOptions) { boolean changed = false; int size = getGroupCountSize(node); for (CharGroup group : node.mData) { @@ -624,11 +691,14 @@ public class BinaryDictInputOutput { changed = true; group.mCachedAddress = node.mCachedAddress + size; } - int groupSize = GROUP_FLAGS_SIZE + getGroupCharactersSize(group); + int groupSize = getGroupHeaderSize(group, formatOptions); if (group.isTerminal()) groupSize += GROUP_FREQUENCY_SIZE; if (null != group.mChildren) { - final int offsetBasePoint= groupSize + node.mCachedAddress + size; + final int offsetBasePoint = groupSize + node.mCachedAddress + size; final int offset = group.mChildren.mCachedAddress - offsetBasePoint; + // assign my address to children's parent address + group.mChildren.mCachedParentAddress = group.mCachedAddress + - group.mChildren.mCachedAddress; groupSize += getByteSize(offset); } groupSize += getShortcutListSize(group.mShortcutTargets); @@ -657,7 +727,7 @@ public class BinaryDictInputOutput { * @param flatNodes the array of nodes. * @return the byte size of the entire stack. */ - private static int stackNodes(ArrayList<Node> flatNodes) { + private static int stackNodes(final ArrayList<Node> flatNodes) { int nodeOffset = 0; for (Node n : flatNodes) { n.mCachedAddress = nodeOffset; @@ -687,12 +757,13 @@ public class BinaryDictInputOutput { * * @param dict the dictionary * @param flatNodes the ordered array of nodes + * @param formatOptions file format options. * @return the same array it was passed. The nodes have been updated for address and size. */ - private static ArrayList<Node> computeAddresses(FusionDictionary dict, - ArrayList<Node> flatNodes) { + private static ArrayList<Node> computeAddresses(final FusionDictionary dict, + final ArrayList<Node> flatNodes, final FormatOptions formatOptions) { // First get the worst sizes and offsets - for (Node n : flatNodes) setNodeMaximumSize(n); + for (Node n : flatNodes) setNodeMaximumSize(n, formatOptions); final int offset = stackNodes(flatNodes); MakedictLog.i("Compressing the array addresses. Original size : " + offset); @@ -704,7 +775,7 @@ public class BinaryDictInputOutput { changesDone = false; for (Node n : flatNodes) { final int oldNodeSize = n.mCachedSize; - final boolean changed = computeActualNodeSize(n, dict); + final boolean changed = computeActualNodeSize(n, dict, formatOptions); final int newNodeSize = n.mCachedSize; if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!"); changesDone |= changed; @@ -732,7 +803,7 @@ public class BinaryDictInputOutput { * * @param array the array node to check */ - private static void checkFlatNodeArray(ArrayList<Node> array) { + private static void checkFlatNodeArray(final ArrayList<Node> array) { int offset = 0; int index = 0; for (Node n : array) { @@ -890,12 +961,14 @@ public class BinaryDictInputOutput { /** * Makes the 2-byte value for options flags. */ - private static final int makeOptionsValue(final FusionDictionary dictionary) { + private static final int makeOptionsValue(final FusionDictionary dictionary, + final FormatOptions formatOptions) { final DictionaryOptions options = dictionary.mOptions; final boolean hasBigrams = dictionary.hasBigrams(); return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0) + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0) - + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0); + + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0) + + (formatOptions.mHasParentAddress ? HAS_PARENT_ADDRESS : 0); } /** @@ -918,13 +991,16 @@ public class BinaryDictInputOutput { * @param dict the dictionary the node is a part of (for relative offsets). * @param buffer the memory buffer to write to. * @param node the node to write. + * @param formatOptions file format options. * @return the address of the END of the node. */ - private static int writePlacedNode(FusionDictionary dict, byte[] buffer, Node node) { + private static int writePlacedNode(final FusionDictionary dict, byte[] buffer, + final Node node, final FormatOptions formatOptions) { int index = node.mCachedAddress; final int groupCount = node.mData.size(); final int countSize = getGroupCountSize(node); + final int parentAddress = node.mCachedParentAddress; if (1 == countSize) { buffer[index++] = (byte)groupCount; } else if (2 == countSize) { @@ -941,7 +1017,7 @@ public class BinaryDictInputOutput { if (index != group.mCachedAddress) throw new RuntimeException("Bug: write index is not " + "the same as the cached address of the group : " + index + " <> " + group.mCachedAddress); - groupAddress += GROUP_FLAGS_SIZE + getGroupCharactersSize(group); + groupAddress += getGroupHeaderSize(group, formatOptions); // Sanity checks. if (DBG && group.mFrequency > MAX_TERMINAL_FREQUENCY) { throw new RuntimeException("A node has a frequency > " + MAX_TERMINAL_FREQUENCY @@ -952,6 +1028,22 @@ public class BinaryDictInputOutput { ? NO_CHILDREN_ADDRESS : group.mChildren.mCachedAddress - groupAddress; byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset); buffer[index++] = flags; + + if (hasParentAddress(formatOptions)) { + if (parentAddress == NO_PARENT_ADDRESS) { + // this node is the root node. + buffer[index] = buffer[index + 1] = buffer[index + 2] = 0; + } else { + // write parent address. (version 3) + final int actualParentAddress = Math.abs(parentAddress + + (node.mCachedAddress - group.mCachedAddress)); + buffer[index] = (byte)((actualParentAddress >> 16) & 0xFF); + buffer[index + 1] = (byte)((actualParentAddress >> 8) & 0xFF); + buffer[index + 2] = (byte)(actualParentAddress & 0xFF); + } + index += 3; + } + index = CharEncoding.writeCharArray(group.mChars, buffer, index); if (group.hasSeveralChars()) { buffer[index++] = GROUP_CHARACTERS_TERMINATOR; @@ -1076,7 +1168,7 @@ public class BinaryDictInputOutput { * * @param destination the stream to write the binary data to. * @param dict the dictionary to write. - * @param formatOptions the options of file format. + * @param formatOptions file format options. */ public static void writeDictionaryBinary(final OutputStream destination, final FusionDictionary dict, final FormatOptions formatOptions) @@ -1115,7 +1207,7 @@ public class BinaryDictInputOutput { headerBuffer.write((byte) (0xFF & version)); } // Options flags - final int options = makeOptionsValue(dict); + final int options = makeOptionsValue(dict, formatOptions); headerBuffer.write((byte) (0xFF & (options >> 8))); headerBuffer.write((byte) (0xFF & options)); if (version >= FIRST_VERSION_WITH_HEADER_SIZE) { @@ -1149,20 +1241,20 @@ public class BinaryDictInputOutput { ArrayList<Node> flatNodes = flattenTree(dict.mRoot); MakedictLog.i("Computing addresses..."); - computeAddresses(dict, flatNodes); + computeAddresses(dict, flatNodes, formatOptions); MakedictLog.i("Checking array..."); if (DBG) checkFlatNodeArray(flatNodes); // Create a buffer that matches the final dictionary size. final Node lastNode = flatNodes.get(flatNodes.size() - 1); - final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize); + final int bufferSize = lastNode.mCachedAddress + lastNode.mCachedSize; final byte[] buffer = new byte[bufferSize]; int index = 0; MakedictLog.i("Writing file..."); int dataEndOffset = 0; for (Node n : flatNodes) { - dataEndOffset = writePlacedNode(dict, buffer, n); + dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions); } if (DBG) showStatistics(flatNodes); @@ -1177,23 +1269,36 @@ public class BinaryDictInputOutput { // Input methods: Read a binary dictionary to memory. // readDictionaryBinary is the public entry point for them. - static final int[] characterBuffer = new int[MAX_WORD_LENGTH]; + private static final int[] CHARACTER_BUFFER = new int[MAX_WORD_LENGTH]; private static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer, - final int originalGroupAddress) { + final int originalGroupAddress, final FormatOptions options) { int addressPointer = originalGroupAddress; final int flags = buffer.readUnsignedByte(); ++addressPointer; + + final int parentAddress; + if (hasParentAddress(options)) { + // read the parent address. (version 3) + parentAddress = -buffer.readUnsignedInt24(); + addressPointer += 3; + } else { + parentAddress = NO_PARENT_ADDRESS; + } + final int characters[]; if (0 != (flags & FLAG_HAS_MULTIPLE_CHARS)) { int index = 0; int character = CharEncoding.readChar(buffer); addressPointer += CharEncoding.getCharSize(character); while (-1 != character) { - characterBuffer[index++] = character; + // FusionDictionary is making sure that the length of the word is smaller than + // MAX_WORD_LENGTH. + // So we'll never write past the end of CHARACTER_BUFFER. + CHARACTER_BUFFER[index++] = character; character = CharEncoding.readChar(buffer); addressPointer += CharEncoding.getCharSize(character); } - characters = Arrays.copyOfRange(characterBuffer, 0, index); + characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index); } else { final int character = CharEncoding.readChar(buffer); addressPointer += CharEncoding.getCharSize(character); @@ -1271,7 +1376,7 @@ public class BinaryDictInputOutput { } } return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, - childrenAddress, shortcutTargets, bigrams); + parentAddress, childrenAddress, shortcutTargets, bigrams); } /** @@ -1298,13 +1403,56 @@ public class BinaryDictInputOutput { * @param buffer the buffer to read from. * @param headerSize the size of the header. * @param address the address to seek. + * @param formatOptions file format options. * @return the word, as a string. */ private static String getWordAtAddress(final FusionDictionaryBufferInterface buffer, - final int headerSize, final int address) { + final int headerSize, final int address, final FormatOptions formatOptions) { final String cachedString = wordCache.get(address); if (null != cachedString) return cachedString; + + final String result; final int originalPointer = buffer.position(); + + if (hasParentAddress(formatOptions)) { + result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions); + } else { + result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address, + formatOptions); + } + + wordCache.put(address, result); + buffer.position(originalPointer); + return result; + } + + private static int[] sGetWordBuffer = new int[MAX_WORD_LENGTH]; + private static String getWordAtAddressWithParentAddress( + final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, + final FormatOptions options) { + final StringBuilder builder = new StringBuilder(); + + int currentAddress = address; + int index = MAX_WORD_LENGTH - 1; + // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH + for (int count = 0; count < MAX_WORD_LENGTH; ++count) { + buffer.position(currentAddress + headerSize); + final CharGroupInfo currentInfo = readCharGroup(buffer, currentAddress, options); + for (int i = 0; i < currentInfo.mCharacters.length; ++i) { + sGetWordBuffer[index--] = + currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1]; + } + + if (currentInfo.mParentAddress == NO_PARENT_ADDRESS) break; + currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; + } + + return new String(sGetWordBuffer, index + 1, MAX_WORD_LENGTH - index - 1); + } + + private static String getWordAtAddressWithoutParentAddress( + final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, + final FormatOptions options) { buffer.position(headerSize); final int count = readCharGroupCount(buffer); int groupOffset = getGroupCountSize(count); @@ -1313,7 +1461,7 @@ public class BinaryDictInputOutput { CharGroupInfo last = null; for (int i = count - 1; i >= 0; --i) { - CharGroupInfo info = readCharGroup(buffer, groupOffset); + CharGroupInfo info = readCharGroup(buffer, groupOffset, options); groupOffset = info.mEndAddress; if (info.mOriginalAddress == address) { builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); @@ -1341,8 +1489,6 @@ public class BinaryDictInputOutput { continue; } } - buffer.position(originalPointer); - wordCache.put(address, result); return result; } @@ -1358,24 +1504,26 @@ public class BinaryDictInputOutput { * @param headerSize the size, in bytes, of the file header. * @param reverseNodeMap a mapping from addresses to already read nodes. * @param reverseGroupMap a mapping from addresses to already read character groups. + * @param options file format options. * @return the read node with all his children already read. */ private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, - final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap) + final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap, + final FormatOptions options) throws IOException { final int nodeOrigin = buffer.position() - headerSize; final int count = readCharGroupCount(buffer); final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>(); int groupOffset = nodeOrigin + getGroupCountSize(count); for (int i = count; i > 0; --i) { - CharGroupInfo info = readCharGroup(buffer, groupOffset); + CharGroupInfo info = readCharGroup(buffer, groupOffset, options); ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets; ArrayList<WeightedString> bigrams = null; if (null != info.mBigrams) { bigrams = new ArrayList<WeightedString>(); for (PendingAttribute bigram : info.mBigrams) { final String word = getWordAtAddress( - buffer, headerSize, bigram.mAddress); + buffer, headerSize, bigram.mAddress, options); bigrams.add(new WeightedString(word, bigram.mFrequency)); } } @@ -1385,7 +1533,7 @@ public class BinaryDictInputOutput { final int currentPosition = buffer.position(); buffer.position(info.mChildrenAddress + headerSize); children = readNode( - buffer, headerSize, reverseNodeMap, reverseGroupMap); + buffer, headerSize, reverseNodeMap, reverseGroupMap, options); buffer.position(currentPosition); } nodeContents.add( @@ -1429,7 +1577,8 @@ public class BinaryDictInputOutput { private static void readUnigramsAndBigramsBinaryInner( final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, - final Map<Integer, ArrayList<PendingAttribute>> bigrams) { + final Map<Integer, ArrayList<PendingAttribute>> bigrams, + final FormatOptions formatOptions) { int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; Stack<Position> stack = new Stack<Position>(); @@ -1455,7 +1604,7 @@ public class BinaryDictInputOutput { p.mPosition = 0; } - CharGroupInfo info = readCharGroup(buffer, p.mAddress - headerSize); + CharGroupInfo info = readCharGroup(buffer, p.mAddress - headerSize, formatOptions); for (int i = 0; i < info.mCharacters.length; ++i) { pushedChars[index++] = info.mCharacters[i]; } @@ -1497,11 +1646,9 @@ public class BinaryDictInputOutput { final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header - FormatOptions formatOptions = null; - DictionaryOptions dictionaryOptions = null; final FileHeader header = readHeader(buffer); - - readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams); + readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams, + header.mFormatOptions); } /** @@ -1562,7 +1709,8 @@ public class BinaryDictInputOutput { new FusionDictionary.DictionaryOptions(attributes, 0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version)); + new FormatOptions(version, + 0 != (optionsFlags & HAS_PARENT_ADDRESS))); return header; } @@ -1609,7 +1757,7 @@ public class BinaryDictInputOutput { Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping, - reverseGroupMapping); + reverseGroupMapping, header.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); if (null != dict) { diff --git a/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java b/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java index ef7dbb251..ed9388409 100644 --- a/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java +++ b/java/src/com/android/inputmethod/latin/makedict/CharGroupInfo.java @@ -31,18 +31,20 @@ public class CharGroupInfo { public final int[] mCharacters; public final int mFrequency; public final int mChildrenAddress; + public final int mParentAddress; public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<PendingAttribute> mBigrams; public CharGroupInfo(final int originalAddress, final int endAddress, final int flags, - final int[] characters, final int frequency, final int childrenAddress, - final ArrayList<WeightedString> shortcutTargets, + final int[] characters, final int frequency, final int parentAddress, + final int childrenAddress, final ArrayList<WeightedString> shortcutTargets, final ArrayList<PendingAttribute> bigrams) { mOriginalAddress = originalAddress; mEndAddress = endAddress; mFlags = flags; mCharacters = characters; mFrequency = frequency; + mParentAddress = parentAddress; mChildrenAddress = childrenAddress; mShortcutTargets = shortcutTargets; mBigrams = bigrams; diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 61f7371b8..6775de8a8 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -16,6 +16,8 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.latin.Constants; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -41,17 +43,15 @@ public class FusionDictionary implements Iterable<Word> { public static class Node { ArrayList<CharGroup> mData; // To help with binary generation - int mCachedSize; - int mCachedAddress; + int mCachedSize = Integer.MIN_VALUE; + int mCachedAddress = Integer.MIN_VALUE; + int mCachedParentAddress = 0; + public Node() { mData = new ArrayList<CharGroup>(); - mCachedSize = Integer.MIN_VALUE; - mCachedAddress = Integer.MIN_VALUE; } public Node(ArrayList<CharGroup> data) { mData = data; - mCachedSize = Integer.MIN_VALUE; - mCachedAddress = Integer.MIN_VALUE; } } @@ -382,6 +382,11 @@ public class FusionDictionary implements Iterable<Word> { final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord, final boolean isBlacklistEntry) { assert(frequency >= 0 && frequency <= 255); + if (word.length >= Constants.Dictionary.MAX_WORD_LENGTH) { + MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length); + return; + } + Node currentNode = mRoot; int charIndex = 0; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 0286365bc..95a90275d 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -31,6 +31,9 @@ dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) #define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) +// TODO: INTS_TO_CHARS +#define SHORTS_TO_CHARS(input, length, output) do { \ + shortArrayToCharArray(input, length, output); } while (0) static inline void dumpWordInfo(const unsigned short *word, const int length, const int rank, const int frequency) { @@ -87,12 +90,29 @@ static inline void dumpWordInt(const int *word, const int length) { AKLOGI("i[ %s ]", charBuf); } -#ifndef __ANDROID__ -#define ASSERT(success) do { if (!success) { showStackTrace(); assert(success);};} while (0) -#define SHOW_STACK_TRACE do { showStackTrace(); } while (0) +// TODO: Change this to intArrayToCharArray +static inline void shortArrayToCharArray( + const unsigned short *input, const int length, char *output) { + int i = 0; + for (;i < length; ++i) { + const unsigned short c = input[i]; + if (c == 0) { + break; + } + // static_cast only for debugging + output[i] = static_cast<char>(c); + } + output[i] = 0; +} +#ifndef __ANDROID__ +#include <cassert> #include <execinfo.h> #include <stdlib.h> + +#define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0) +#define SHOW_STACK_TRACE do { showStackTrace(); } while (0) + static inline void showStackTrace() { void *callstack[128]; int i, frames = backtrace(callstack, 128); @@ -107,7 +127,8 @@ static inline void showStackTrace() { free(strs); } #else -#define ASSERT(success) +#include <cassert> +#define ASSERT(success) assert(success) #define SHOW_STACK_TRACE #endif @@ -119,6 +140,8 @@ static inline void showStackTrace() { #define DUMP_WORD_INT(word, length) #define ASSERT(success) #define SHOW_STACK_TRACE +// TODO: INTS_TO_CHARS +#define SHORTS_TO_CHARS(input, length, output) #endif #ifdef FLAG_DO_PROFILE @@ -197,6 +220,8 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_GEO_FULL false + #else // FLAG_DBG #define DEBUG_DICT false @@ -211,6 +236,8 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_GEO_FULL false + #endif // FLAG_DBG #ifndef U_SHORT_MAX diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index a8c04300f..9bb8b29ae 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -275,22 +275,22 @@ void ProximityInfo::initializeG() { } } -float ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const { +int ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const { return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode)); } -float ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const { +int ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const { return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode)); } -float ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const { +int ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const { if (keyId >= 0) { return mCenterXsG[keyId]; } return 0; } -float ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const { +int ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const { if (keyId >= 0) { return mCenterYsG[keyId]; } diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index 7c22e108b..45df6ff6a 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -104,11 +104,10 @@ class ProximityInfo { return KEYBOARD_HEIGHT; } - // TODO: These should return int. - float getKeyCenterXOfCodePointG(int charCode) const; - float getKeyCenterYOfCodePointG(int charCode) const; - float getKeyCenterXOfKeyIdG(int keyId) const; - float getKeyCenterYOfKeyIdG(int keyId) const; + int getKeyCenterXOfCodePointG(int charCode) const; + int getKeyCenterYOfCodePointG(int charCode) const; + int getKeyCenterXOfKeyIdG(int keyId) const; + int getKeyCenterYOfKeyIdG(int keyId) const; int getKeyKeyDistanceG(int key0, int key1) const; private: diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index 8c3174c0a..a4eb7e353 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -29,6 +29,14 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize, const int *const xCoordinates, const int *const yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric) { + + if (isGeometric) { + mIsContinuationPossible = checkAndReturnIsContinuationPossible( + inputSize, xCoordinates, yCoordinates, times); + } else { + mIsContinuationPossible = false; + } + mProximityInfo = proximityInfo; mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData(); mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare(); @@ -70,19 +78,32 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi /////////////////////// // Setup touch points + int pushTouchPointStartIndex = 0; + int lastSavedInputSize = 0; mMaxPointToKeyLength = maxPointToKeyLength; - mInputXs.clear(); - mInputYs.clear(); - mTimes.clear(); - mLengthCache.clear(); - mDistanceCache.clear(); - mNearKeysVector.clear(); + if (mIsContinuationPossible && mInputIndice.size() > 1) { + // Just update difference. + // Two points prior is never skipped. Thus, we pop 2 input point data here. + pushTouchPointStartIndex = mInputIndice[mInputIndice.size() - 2]; + popInputData(); + popInputData(); + lastSavedInputSize = mInputXs.size(); + } else { + // Clear all data. + mInputXs.clear(); + mInputYs.clear(); + mTimes.clear(); + mInputIndice.clear(); + mLengthCache.clear(); + mDistanceCache.clear(); + mNearKeysVector.clear(); + } mInputSize = 0; if (xCoordinates && yCoordinates) { const bool proximityOnly = !isGeometric && (xCoordinates[0] < 0 || yCoordinates[0] < 0); - int lastInputIndex = 0; - for (int i = 0; i < inputSize; ++i) { + int lastInputIndex = pushTouchPointStartIndex; + for (int i = lastInputIndex; i < inputSize; ++i) { const int pid = pointerIds ? pointerIds[i] : 0; if (pointerId == pid) { lastInputIndex = i; @@ -95,7 +116,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1]; NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2]; - for (int i = 0; i < inputSize; ++i) { + for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) { // Assuming pointerId == 0 if pointerIds is null. const int pid = pointerIds ? pointerIds[i] : 0; if (pointerId == pid) { @@ -103,7 +124,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i]; const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i]; const int time = times ? times[i] : -1; - if (pushTouchPoint(c, x, y, time, isGeometric, i == lastInputIndex, + if (pushTouchPoint(i, c, x, y, time, isGeometric, i == lastInputIndex, currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances)) { // Previous point information was popped. @@ -125,7 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const int keyCount = mProximityInfo->getKeyCount(); mNearKeysVector.resize(mInputSize); mDistanceCache.resize(mInputSize * keyCount); - for (int i = 0; i < mInputSize; ++i) { + for (int i = lastSavedInputSize; i < mInputSize; ++i) { mNearKeysVector[i].reset(); static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f; for (int k = 0; k < keyCount; ++k) { @@ -146,7 +167,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight()) * READ_FORWORD_LENGTH_SCALE); for (int i = 0; i < mInputSize; ++i) { - for (int j = i + 1; j < mInputSize; ++j) { + for (int j = max(i + 1, lastSavedInputSize); j < mInputSize; ++j) { if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) { break; } @@ -199,6 +220,18 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi } } +bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSize, + const int *const xCoordinates, const int *const yCoordinates, const int *const times) { + for (int i = 0; i < mInputSize; ++i) { + const int index = mInputIndice[i]; + if (index > inputSize || xCoordinates[index] != mInputXs[i] || + yCoordinates[index] != mInputYs[i] || times[index] != mTimes[i]) { + return false; + } + } + return true; +} + // Calculating point to key distance for all near keys and returning the distance between // the given point and the nearest key position. float ProximityInfoState::updateNearKeysDistances(const int x, const int y, @@ -305,8 +338,8 @@ float ProximityInfoState::getPointScore( // Sampling touch point and pushing information to vectors. // Returning if previous point is popped or not. -bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const int time, - const bool sample, const bool isLastPoint, +bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, + const int time, const bool sample, const bool isLastPoint, NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevPrevNearKeysDistances) { @@ -320,10 +353,7 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances); if (score < 0) { // Pop previous point because it would be useless. - mInputXs.pop_back(); - mInputYs.pop_back(); - mTimes.pop_back(); - mLengthCache.pop_back(); + popInputData(); size = mInputXs.size(); popped = true; } else { @@ -371,6 +401,7 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const mInputXs.push_back(x); mInputYs.push_back(y); mTimes.push_back(time); + mInputIndice.push_back(inputIndex); return popped; } @@ -461,4 +492,12 @@ float ProximityInfoState::getAveragePointDuration() const { return static_cast<float>(mTimes[mInputSize - 1] - mTimes[0]) / static_cast<float>(mInputSize); } +void ProximityInfoState::popInputData() { + mInputXs.pop_back(); + mInputYs.pop_back(); + mTimes.pop_back(); + mLengthCache.pop_back(); + mInputIndice.pop_back(); +} + } // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 1d5777347..fce4b5bdc 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -57,9 +57,9 @@ class ProximityInfoState { : mProximityInfo(0), mMaxPointToKeyLength(0), mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(), mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0), - mInputXs(), mInputYs(), mTimes(), mDistanceCache(), mLengthCache(), - mNearKeysVector(), mTouchPositionCorrectionEnabled(false), - mInputSize(0) { + mIsContinuationPossible(false), mInputXs(), mInputYs(), mTimes(), mInputIndice(), + mDistanceCache(), mLengthCache(), mNearKeysVector(), + mTouchPositionCorrectionEnabled(false), mInputSize(0) { memset(mInputCodes, 0, sizeof(mInputCodes)); memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances)); memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); @@ -212,6 +212,10 @@ class ProximityInfoState { return mLengthCache[index]; } + bool isContinuationPossible() const { + return mIsContinuationPossible; + } + float getPointToKeyLength(const int inputIndex, const int charCode, const float scale) const; int getSpaceY() const; @@ -231,7 +235,7 @@ class ProximityInfoState { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; - bool pushTouchPoint(const int nodeChar, int x, int y, const int time, + bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time, const bool sample, const bool isLastPoint, NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, @@ -259,6 +263,9 @@ class ProximityInfoState { const NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; + bool checkAndReturnIsContinuationPossible(const int inputSize, const int *const xCoordinates, + const int *const yCoordinates, const int *const times); + void popInputData(); // const const ProximityInfo *mProximityInfo; @@ -271,10 +278,12 @@ class ProximityInfoState { int mCellWidth; int mGridHeight; int mGridWidth; + bool mIsContinuationPossible; std::vector<int> mInputXs; std::vector<int> mInputYs; std::vector<int> mTimes; + std::vector<int> mInputIndice; std::vector<float> mDistanceCache; std::vector<int> mLengthCache; std::vector<NearKeycodesSet> mNearKeysVector; diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index b0b47af00..5bd836a01 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -16,13 +16,16 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -MAKEDICT_CORE_SOURCE_DIRECTORY := ../../java/src/com/android/inputmethod/latin/makedict +LATINIME_CORE_SOURCE_DIRECTORY := ../../java/src/com/android/inputmethod/latin +MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict LOCAL_MAIN_SRC_FILES := $(call all-java-files-under,$(MAKEDICT_CORE_SOURCE_DIRECTORY)) LOCAL_TOOL_SRC_FILES := $(call all-java-files-under,src) LOCAL_SRC_FILES := $(LOCAL_TOOL_SRC_FILES) \ $(filter-out $(addprefix %/, $(notdir $(LOCAL_TOOL_SRC_FILES))), $(LOCAL_MAIN_SRC_FILES)) \ - $(call all-java-files-under,tests) + $(call all-java-files-under,tests) \ + $(LATINIME_CORE_SOURCE_DIRECTORY)/Constants.java + LOCAL_JAR_MANIFEST := etc/manifest.txt LOCAL_MODULE := dicttool_aosp LOCAL_JAVA_LIBRARIES := junit diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java index f72385259..7cd4564f3 100644 --- a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -42,40 +42,35 @@ import org.xml.sax.SAXException; public class DictionaryMaker { static class Arguments { - private final static String OPTION_VERSION_2 = "-2"; - private final static String OPTION_INPUT_SOURCE = "-s"; - private final static String OPTION_INPUT_BIGRAM_XML = "-b"; - private final static String OPTION_INPUT_SHORTCUT_XML = "-c"; - private final static String OPTION_OUTPUT_BINARY = "-d"; - private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1"; - private final static String OPTION_OUTPUT_XML = "-x"; - private final static String OPTION_HELP = "-h"; + private static final String OPTION_VERSION_1 = "-1"; + private static final String OPTION_VERSION_2 = "-2"; + private static final String OPTION_VERSION_3 = "-3"; + private static final String OPTION_INPUT_SOURCE = "-s"; + private static final String OPTION_INPUT_BIGRAM_XML = "-b"; + private static final String OPTION_INPUT_SHORTCUT_XML = "-c"; + private static final String OPTION_OUTPUT_BINARY = "-d"; + private static final String OPTION_OUTPUT_XML = "-x"; + private static final String OPTION_HELP = "-h"; public final String mInputBinary; public final String mInputUnigramXml; public final String mInputShortcutXml; public final String mInputBigramXml; public final String mOutputBinary; - public final String mOutputBinaryFormat1; public final String mOutputXml; + public final int mOutputBinaryFormatVersion; private void checkIntegrity() throws IOException { checkHasExactlyOneInput(); checkHasAtLeastOneOutput(); checkNotSameFile(mInputBinary, mOutputBinary); - checkNotSameFile(mInputBinary, mOutputBinaryFormat1); checkNotSameFile(mInputBinary, mOutputXml); checkNotSameFile(mInputUnigramXml, mOutputBinary); - checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1); checkNotSameFile(mInputUnigramXml, mOutputXml); checkNotSameFile(mInputShortcutXml, mOutputBinary); - checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1); checkNotSameFile(mInputShortcutXml, mOutputXml); checkNotSameFile(mInputBigramXml, mOutputBinary); - checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1); checkNotSameFile(mInputBigramXml, mOutputXml); - checkNotSameFile(mOutputBinary, mOutputBinaryFormat1); checkNotSameFile(mOutputBinary, mOutputXml); - checkNotSameFile(mOutputBinaryFormat1, mOutputXml); } private void checkHasExactlyOneInput() { @@ -90,7 +85,7 @@ public class DictionaryMaker { } private void checkHasAtLeastOneOutput() { - if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) { + if (null == mOutputBinary && null == mOutputXml) { throw new RuntimeException("No output specified"); } } @@ -114,13 +109,13 @@ public class DictionaryMaker { public static String getHelp() { return "Usage: makedict " + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] " - + "| -s <binary input>] [-d <binary output format version 2>] " - + "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n" + + "| [-s <binary input>] [-d <binary output>] [-x <xml output>] " + + "[-1] [-2] [-3]\n" + "\n" + " Converts a source dictionary file to one or several outputs.\n" + " Source can be an XML file, with an optional XML bigrams file, or a\n" + " binary dictionary file.\n" - + " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n" + + " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean), 3 and XML outputs\n" + " are supported. All three can be output at the same time, but the same\n" + " output format cannot be specified several times. The behavior is\n" + " unspecified if the same file is specified for input and output, or for\n" @@ -137,8 +132,8 @@ public class DictionaryMaker { String inputShortcutXml = null; String inputBigramXml = null; String outputBinary = null; - String outputBinaryFormat1 = null; String outputXml = null; + int outputBinaryFormatVersion = 2; // the default version is 2. while (!args.isEmpty()) { final String arg = args.get(0); @@ -146,6 +141,10 @@ public class DictionaryMaker { if (arg.charAt(0) == '-') { if (OPTION_VERSION_2.equals(arg)) { // Do nothing, this is the default + } else if (OPTION_VERSION_3.equals(arg)) { + outputBinaryFormatVersion = 3; + } else if (OPTION_VERSION_1.equals(arg)) { + outputBinaryFormatVersion = 1; } else if (OPTION_HELP.equals(arg)) { displayHelp(); } else { @@ -168,8 +167,6 @@ public class DictionaryMaker { inputBigramXml = filename; } else if (OPTION_OUTPUT_BINARY.equals(arg)) { outputBinary = filename; - } else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) { - outputBinaryFormat1 = filename; } else if (OPTION_OUTPUT_XML.equals(arg)) { outputXml = filename; } else { @@ -196,8 +193,8 @@ public class DictionaryMaker { mInputShortcutXml = inputShortcutXml; mInputBigramXml = inputBigramXml; mOutputBinary = outputBinary; - mOutputBinaryFormat1 = outputBinaryFormat1; mOutputXml = outputXml; + mOutputBinaryFormatVersion = outputBinaryFormatVersion; checkIntegrity(); } } @@ -294,10 +291,7 @@ public class DictionaryMaker { throws FileNotFoundException, IOException, UnsupportedFormatException, IllegalArgumentException { if (null != args.mOutputBinary) { - writeBinaryDictionary(args.mOutputBinary, dict, 2); - } - if (null != args.mOutputBinaryFormat1) { - writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1); + writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion); } if (null != args.mOutputXml) { writeXmlDictionary(args.mOutputXml, dict); |