diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict')
3 files changed, 55 insertions, 16 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 6f508695e..72d12299b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -370,6 +370,9 @@ public class BinaryDictInputOutput { g.mCachedSize = groupSize; size += groupSize; } + if (options.mHasLinkedListNode) { + size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; + } node.mCachedSize = size; } @@ -521,6 +524,9 @@ public class BinaryDictInputOutput { group.mCachedSize = groupSize; size += groupSize; } + if (formatOptions.mHasLinkedListNode) { + size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; + } if (node.mCachedSize != size) { node.mCachedSize = size; changed = true; @@ -532,9 +538,11 @@ public class BinaryDictInputOutput { * Computes the byte size of a list of nodes and updates each node cached position. * * @param flatNodes the array of nodes. + * @param formatOptions file format options. * @return the byte size of the entire stack. */ - private static int stackNodes(final ArrayList<Node> flatNodes) { + private static int stackNodes(final ArrayList<Node> flatNodes, + final FormatOptions formatOptions) { int nodeOffset = 0; for (Node n : flatNodes) { n.mCachedAddress = nodeOffset; @@ -544,7 +552,9 @@ public class BinaryDictInputOutput { g.mCachedAddress = groupCountSize + nodeOffset + groupOffset; groupOffset += g.mCachedSize; } - if (groupOffset + groupCountSize != n.mCachedSize) { + final int nodeSize = groupCountSize + groupOffset + + (formatOptions.mHasLinkedListNode ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); + if (nodeSize != n.mCachedSize) { throw new RuntimeException("Bug : Stored and computed node size differ"); } nodeOffset += n.mCachedSize; @@ -571,7 +581,7 @@ public class BinaryDictInputOutput { final ArrayList<Node> flatNodes, final FormatOptions formatOptions) { // First get the worst sizes and offsets for (Node n : flatNodes) setNodeMaximumSize(n, formatOptions); - final int offset = stackNodes(flatNodes); + final int offset = stackNodes(flatNodes, formatOptions); MakedictLog.i("Compressing the array addresses. Original size : " + offset); MakedictLog.i("(Recursively seen size : " + offset + ")"); @@ -587,7 +597,7 @@ public class BinaryDictInputOutput { if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!"); changesDone |= changed; } - stackNodes(flatNodes); + stackNodes(flatNodes, formatOptions); ++passes; if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); } while (changesDone); @@ -776,7 +786,8 @@ public class BinaryDictInputOutput { return (options.mFrenchLigatureProcessing ? FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG : 0) + (options.mGermanUmlautProcessing ? FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG : 0) + (hasBigrams ? FormatSpec.CONTAINS_BIGRAMS_FLAG : 0) - + (formatOptions.mHasParentAddress ? FormatSpec.HAS_PARENT_ADDRESS : 0); + + (formatOptions.mHasParentAddress ? FormatSpec.HAS_PARENT_ADDRESS : 0) + + (formatOptions.mHasLinkedListNode ? FormatSpec.HAS_LINKEDLIST_NODE : 0); } /** @@ -910,6 +921,11 @@ public class BinaryDictInputOutput { } } + if (formatOptions.mHasLinkedListNode) { + buffer[index] = buffer[index + 1] = buffer[index + 2] + = FormatSpec.NO_FORWARD_LINK_ADDRESS; + index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; + } if (index != node.mCachedAddress + node.mCachedSize) throw new RuntimeException( "Not the same size : written " + (index - node.mCachedAddress) + " bytes out of a node that should have " @@ -1525,7 +1541,8 @@ public class BinaryDictInputOutput { 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.HAS_PARENT_ADDRESS))); + 0 != (optionsFlags & FormatSpec.HAS_PARENT_ADDRESS), + 0 != (optionsFlags & FormatSpec.HAS_LINKEDLIST_NODE))); return header; } @@ -1543,11 +1560,6 @@ public class BinaryDictInputOutput { options.put(key, value); } } - // TODO: remove this method. - public static void populateOptions(final ByteBuffer buffer, final int headerSize, - final HashMap<String, String> options) { - populateOptions(new ByteBufferWrapper(buffer), headerSize, options); - } /** * Reads a buffer and returns the memory representation of the dictionary. diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 1707ccc39..f8f13b197 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -41,6 +41,12 @@ public final class FormatSpec { * u | * ps * + * f | + * o | IF HAS_LINKEDLIST_NODE (defined in the file header) + * r | forward link address, 3byte + * w | the address must be positive. + * a | + * rdlinkaddress */ /* Node(CharGroup) layout is as follows: @@ -140,18 +146,23 @@ public final class FormatSpec { static final int NOT_A_VERSION_NUMBER = -1; static final int FIRST_VERSION_WITH_HEADER_SIZE = 2; static final int FIRST_VERSION_WITH_PARENT_ADDRESS = 3; + static final int FIRST_VERSION_WITH_LINKEDLIST_NODE = 3; // These options need to be the same numeric values as the one in the native reading code. static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; + // TODO: Make the native reading code read this variable. static final int HAS_PARENT_ADDRESS = 0x2; static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; static final int CONTAINS_BIGRAMS_FLAG = 0x8; + // TODO: Make the native reading code read this variable. + static final int HAS_LINKEDLIST_NODE = 0x10; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. static final int MAX_WORD_LENGTH = Constants.Dictionary.MAX_WORD_LENGTH; static final int PARENT_ADDRESS_SIZE = 3; + static final int FORWARD_LINK_ADDRESS_SIZE = 3; static final int MASK_GROUP_ADDRESS_TYPE = 0xC0; static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; @@ -187,6 +198,7 @@ public final class FormatSpec { static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; + static final int NO_FORWARD_LINK_ADDRESS = 0; static final int INVALID_CHARACTER = -1; static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 @@ -201,16 +213,30 @@ public final class FormatSpec { public static class FormatOptions { public final int mVersion; public final boolean mHasParentAddress; + public final boolean mHasLinkedListNode; public FormatOptions(final int version) { this(version, false); } public FormatOptions(final int version, final boolean hasParentAddress) { + this(version, hasParentAddress, false); + } + public FormatOptions(final int version, final boolean hasParentAddress, + final boolean hasLinkedListNode) { mVersion = version; - if (version < FormatSpec.FIRST_VERSION_WITH_PARENT_ADDRESS && hasParentAddress) { + if (version < FIRST_VERSION_WITH_PARENT_ADDRESS && hasParentAddress) { throw new RuntimeException("Parent addresses are only supported with versions " - + FormatSpec.FIRST_VERSION_WITH_PARENT_ADDRESS + " and ulterior."); + + FIRST_VERSION_WITH_PARENT_ADDRESS + " and ulterior."); } mHasParentAddress = hasParentAddress; + + if (version < FIRST_VERSION_WITH_LINKEDLIST_NODE && hasLinkedListNode) { + throw new RuntimeException("Linked list nodes are only supported with versions " + + FIRST_VERSION_WITH_LINKEDLIST_NODE + " and ulterior."); + } + if (!hasParentAddress && hasLinkedListNode) { + throw new RuntimeException("Linked list nodes need parent addresses."); + } + mHasLinkedListNode = hasLinkedListNode; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 6775de8a8..98cf308c8 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -556,6 +556,7 @@ public class FusionDictionary implements Iterable<Word> { final StringBuilder checker = DBG ? new StringBuilder() : null; CharGroup currentGroup; + final int codePointCountInS = s.codePointCount(0, s.length()); do { int indexOfGroup = findIndexOfChar(node, s.codePointAt(index)); if (CHARACTER_NOT_FOUND == indexOfGroup) return null; @@ -570,12 +571,12 @@ public class FusionDictionary implements Iterable<Word> { index = newIndex; if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length)); - if (index < s.length()) { + if (index < codePointCountInS) { node = currentGroup.mChildren; } - } while (null != node && index < s.length()); + } while (null != node && index < codePointCountInS); - if (index < s.length()) return null; + if (index < codePointCountInS) return null; if (!currentGroup.isTerminal()) return null; if (DBG && !s.equals(checker.toString())) return null; return currentGroup; |