diff options
Diffstat (limited to 'java/src')
3 files changed, 73 insertions, 11 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 19da5124a..b97be0543 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -85,7 +85,10 @@ public class BinaryDictIOUtils { } p.mPosition++; - if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word + final boolean isMovedGroup = BinaryDictInputOutput.isMovedGroup(info.mFlags, + formatOptions); + if (!isMovedGroup + && info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) {// found word words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); frequencies.put(info.mOriginalAddress, info.mFrequency); if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); @@ -109,7 +112,7 @@ public class BinaryDictIOUtils { p.mAddress = buffer.position(); } - if (BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) { + if (!isMovedGroup && BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) { Position childrenPos = new Position(info.mChildrenAddress + headerSize, index); stack.push(childrenPos); } @@ -168,6 +171,10 @@ public class BinaryDictIOUtils { final int charGroupPos = buffer.position(); final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, buffer.position(), header.mFormatOptions); + if (BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags, + header.mFormatOptions)) { + continue; + } boolean same = true; for (int p = 0, j = word.offsetByCodePoints(0, wordPos); p < currentInfo.mCharacters.length; @@ -239,4 +246,34 @@ public class BinaryDictIOUtils { buffer.position(wordPosition); buffer.put((byte)newFlags); } + + private static void putSInt24(final FusionDictionaryBufferInterface buffer, + final int value) { + final int absValue = Math.abs(value); + buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); + buffer.put((byte)((absValue >> 8) & 0xFF)); + buffer.put((byte)(absValue & 0xFF)); + } + + /** + * Update a parent address in a CharGroup that is addressed by groupOriginAddress. + * + * @param buffer the buffer to write. + * @param groupOriginAddress the address of the group. + * @param newParentAddress the absolute address of the parent. + * @param formatOptions file format options. + */ + public static void updateParentAddress(final FusionDictionaryBufferInterface buffer, + final int groupOriginAddress, final int newParentAddress, + final FormatOptions formatOptions) { + final int originalPosition = buffer.position(); + buffer.position(groupOriginAddress); + if (!formatOptions.mSupportsDynamicUpdate) { + throw new RuntimeException("this file format does not support parent addresses"); + } + final int flags = buffer.readUnsignedByte(); + final int parentOffset = newParentAddress - groupOriginAddress; + putSInt24(buffer, parentOffset); + buffer.position(originalPosition); + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index f9339de08..9fc694218 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -53,6 +53,7 @@ public class BinaryDictInputOutput { // If the number of passes exceeds this number, makedict bails with an exception on // suspicion that a bug might be causing an infinite loop. private static final int MAX_PASSES = 24; + private static final int MAX_JUMPS = 12; public interface FusionDictionaryBufferInterface { public int readUnsignedByte(); @@ -395,6 +396,13 @@ public class BinaryDictInputOutput { } /** + * Helper method to check whether the group is moved. + */ + public static boolean isMovedGroup(final int flags, final FormatOptions options) { + return options.mSupportsDynamicUpdate && ((flags & FormatSpec.FLAG_IS_MOVED) == 1); + } + + /** * Helper method to check whether the dictionary can be updated dynamically. */ public static boolean supportsDynamicUpdate(final FormatOptions options) { @@ -1374,8 +1382,18 @@ public class BinaryDictInputOutput { int index = FormatSpec.MAX_WORD_LENGTH - 1; // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { - buffer.position(currentAddress + headerSize); - final CharGroupInfo currentInfo = readCharGroup(buffer, currentAddress, options); + CharGroupInfo currentInfo; + int loopCounter = 0; + do { + buffer.position(currentAddress + headerSize); + currentInfo = readCharGroup(buffer, currentAddress, options); + if (isMovedGroup(currentInfo.mFlags, options)) { + currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; + } + if (DBG && loopCounter++ > MAX_JUMPS) { + MakedictLog.d("Too many jumps - probably a bug"); + } + } while (isMovedGroup(currentInfo.mFlags, options)); for (int i = 0; i < currentInfo.mCharacters.length; ++i) { sGetWordBuffer[index--] = currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1]; @@ -1457,6 +1475,7 @@ public class BinaryDictInputOutput { int groupOffset = nodeHeadPosition + getGroupCountSize(count); for (int i = count; i > 0; --i) { // Scan the array of CharGroup. CharGroupInfo info = readCharGroup(buffer, groupOffset, options); + if (isMovedGroup(info.mFlags, options)) continue; ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets; ArrayList<WeightedString> bigrams = null; if (null != info.mBigrams) { diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index cab0661f6..35311f0c2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -52,13 +52,18 @@ public final class FormatSpec { */ /* Node(CharGroup) layout is as follows: - * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE - * 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS - * f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE - * l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES - * a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES - * g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS - * s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL + * | IF !SUPPORTS_DYNAMIC_UPDATE + * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE + * | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS + * f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE + * l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES + * a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES + * g | ELSE + * s | is moved ? 2 bits, 11 = no + * | 01 = yes + * | the new address is stored in the same place as the parent address + * | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS + * | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD @@ -178,6 +183,7 @@ public final class FormatSpec { static final int FLAG_HAS_BIGRAMS = 0x04; static final int FLAG_IS_NOT_A_WORD = 0x02; static final int FLAG_IS_BLACKLISTED = 0x01; + static final int FLAG_IS_MOVED = 0x40; static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; |