aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java105
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java105
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java15
3 files changed, 164 insertions, 61 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 5ed910c50..6210c923e 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -28,7 +28,6 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
-import java.util.List;
import java.util.Map;
import java.util.Stack;
@@ -95,7 +94,9 @@ public final class BinaryDictIOUtils {
final boolean isMovedGroup = BinaryDictInputOutput.isMovedGroup(info.mFlags,
formatOptions);
- if (!isMovedGroup
+ final boolean isDeletedGroup = BinaryDictInputOutput.isDeletedGroup(info.mFlags,
+ formatOptions);
+ if (!isMovedGroup && !isDeletedGroup
&& info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) {// found word
words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
frequencies.put(info.mOriginalAddress, info.mFrequency);
@@ -170,19 +171,19 @@ public final class BinaryDictIOUtils {
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
do {
- int groupOffset = buffer.position() - header.mHeaderSize;
final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer);
- groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount);
-
boolean foundNextCharGroup = false;
for (int i = 0; i < charGroupCount; ++i) {
final int charGroupPos = buffer.position();
final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
buffer.position(), header.mFormatOptions);
- if (BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags,
- header.mFormatOptions)) {
- continue;
- }
+ final boolean isMovedGroup =
+ BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags,
+ header.mFormatOptions);
+ final boolean isDeletedGroup =
+ BinaryDictInputOutput.isDeletedGroup(currentInfo.mFlags,
+ header.mFormatOptions);
+ if (isMovedGroup) continue;
boolean same = true;
for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
p < currentInfo.mCharacters.length;
@@ -197,7 +198,8 @@ public final class BinaryDictIOUtils {
if (same) {
// found the group matches the word.
if (wordPos + currentInfo.mCharacters.length == wordLen) {
- if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) {
+ if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL
+ || isDeletedGroup) {
return FormatSpec.NOT_VALID_WORD;
} else {
return charGroupPos;
@@ -211,7 +213,6 @@ public final class BinaryDictIOUtils {
buffer.position(currentInfo.mChildrenAddress);
break;
}
- groupOffset = currentInfo.mEndAddress;
}
// If we found the next char group, it is under the file pointer.
@@ -233,6 +234,10 @@ public final class BinaryDictIOUtils {
return FormatSpec.NOT_VALID_WORD;
}
+ private static int markAsDeleted(final int flags) {
+ return (flags & (~FormatSpec.MASK_GROUP_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED;
+ }
+
/**
* Delete the word from the binary file.
*
@@ -250,9 +255,8 @@ public final class BinaryDictIOUtils {
buffer.position(wordPosition);
final int flags = buffer.readUnsignedByte();
- final int newFlags = flags ^ FormatSpec.FLAG_IS_TERMINAL;
buffer.position(wordPosition);
- buffer.put((byte)newFlags);
+ buffer.put((byte)markAsDeleted(flags));
}
/**
@@ -302,7 +306,7 @@ public final class BinaryDictIOUtils {
}
/**
- * Update a parent address in a CharGroup that is addressed by groupOriginAddress.
+ * Update a parent address in a CharGroup that is referred to by groupOriginAddress.
*
* @param buffer the buffer to write.
* @param groupOriginAddress the address of the group.
@@ -318,11 +322,82 @@ public final class BinaryDictIOUtils {
throw new RuntimeException("this file format does not support parent addresses");
}
final int flags = buffer.readUnsignedByte();
+ if (BinaryDictInputOutput.isMovedGroup(flags, formatOptions)) {
+ // if the group is moved, the parent address is stored in the destination group.
+ // We are guaranteed to process the destination group later, so there is no need to
+ // update anything here.
+ buffer.position(originalPosition);
+ return;
+ }
+ if (DBG) {
+ MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress);
+ }
final int parentOffset = newParentAddress - groupOriginAddress;
writeSInt24ToBuffer(buffer, parentOffset);
buffer.position(originalPosition);
}
+ private static void skipCharGroup(final FusionDictionaryBufferInterface buffer,
+ final FormatOptions formatOptions) {
+ final int flags = buffer.readUnsignedByte();
+ BinaryDictInputOutput.readParentAddress(buffer, formatOptions);
+ skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
+ BinaryDictInputOutput.readChildrenAddress(buffer, flags, formatOptions);
+ if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
+ if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
+ final int shortcutsSize = buffer.readUnsignedShort();
+ buffer.position(buffer.position() + shortcutsSize
+ - FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE);
+ }
+ if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
+ int bigramCount = 0;
+ while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
+ final int bigramFlags = buffer.readUnsignedByte();
+ switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ buffer.readUnsignedByte();
+ break;
+ case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ buffer.readUnsignedShort();
+ break;
+ case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ buffer.readUnsignedInt24();
+ break;
+ }
+ if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break;
+ }
+ if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
+ throw new RuntimeException("Too many bigrams in a group.");
+ }
+ }
+ }
+
+ /**
+ * Update parent addresses in a Node that is referred to by nodeOriginAddress.
+ *
+ * @param buffer the buffer to be modified.
+ * @param nodeOriginAddress the address of a modified Node.
+ * @param newParentAddress the address to be written
+ * @param formatOptions file format options
+ */
+ public static void updateParentAddresses(final FusionDictionaryBufferInterface buffer,
+ final int nodeOriginAddress, final int newParentAddress,
+ final FormatOptions formatOptions) {
+ final int originalPosition = buffer.position();
+ buffer.position(nodeOriginAddress);
+ do {
+ final int count = BinaryDictInputOutput.readCharGroupCount(buffer);
+ for (int i = 0; i < count; ++i) {
+ updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions);
+ skipCharGroup(buffer, formatOptions);
+ }
+ final int forwardLinkAddress = buffer.readUnsignedInt24();
+ buffer.position(forwardLinkAddress);
+ } while (formatOptions.mSupportsDynamicUpdate
+ && buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
+ buffer.position(originalPosition);
+ }
+
private static void skipString(final FusionDictionaryBufferInterface buffer,
final boolean hasMultipleChars) {
if (hasMultipleChars) {
@@ -380,7 +455,7 @@ public final class BinaryDictIOUtils {
final int flags = buffer.readUnsignedByte();
final int parentAddress = BinaryDictInputOutput.readParentAddress(buffer, formatOptions);
skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
- if ((FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
+ if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
writeSInt24ToBuffer(buffer, childrenOffset);
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index ba29523c8..78171d03c 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -402,6 +402,14 @@ public final class BinaryDictInputOutput {
}
/**
+ * Helper method to check whether the group is deleted.
+ */
+ public static boolean isDeletedGroup(final int flags, final FormatOptions formatOptions) {
+ return formatOptions.mSupportsDynamicUpdate
+ && ((flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
+ }
+
+ /**
* Helper method to check whether the dictionary can be updated dynamically.
*/
public static boolean supportsDynamicUpdate(final FormatOptions options) {
@@ -720,53 +728,60 @@ public final class BinaryDictInputOutput {
return 3;
}
- private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
- final int childrenOffset, final FormatOptions formatOptions) {
+ /**
+ * Makes the flag value for a char group.
+ *
+ * @param hasMultipleChars whether the group has multiple chars.
+ * @param isTerminal whether the group is terminal.
+ * @param childrenAddressSize the size of a children address.
+ * @param hasShortcuts whether the group has shortcuts.
+ * @param hasBigrams whether the group has bigrams.
+ * @param isNotAWord whether the group is not a word.
+ * @param isBlackListEntry whether the group is a blacklist entry.
+ * @param formatOptions file format options.
+ * @return the flags
+ */
+ static int makeCharGroupFlags(final boolean hasMultipleChars, final boolean isTerminal,
+ final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
+ final boolean isNotAWord, final boolean isBlackListEntry,
+ final FormatOptions formatOptions) {
byte flags = 0;
- if (group.mChars.length > 1) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
- if (group.mFrequency >= 0) {
- flags |= FormatSpec.FLAG_IS_TERMINAL;
- }
- if (null != group.mChildren) {
- final int byteSize = formatOptions.mSupportsDynamicUpdate
- ? FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE : getByteSize(childrenOffset);
- switch (byteSize) {
- case 1:
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
- break;
- case 2:
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
- break;
- case 3:
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
- break;
- default:
- throw new RuntimeException("Node with a strange address");
- }
- } else if (formatOptions.mSupportsDynamicUpdate) {
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
- }
- if (null != group.mShortcutTargets) {
- if (DBG && 0 == group.mShortcutTargets.size()) {
- throw new RuntimeException("0-sized shortcut list must be null");
- }
- flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
- }
- if (null != group.mBigrams) {
- if (DBG && 0 == group.mBigrams.size()) {
- throw new RuntimeException("0-sized bigram list must be null");
+ if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
+ if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
+ if (formatOptions.mSupportsDynamicUpdate) {
+ flags |= FormatSpec.FLAG_IS_NOT_MOVED;
+ } else if (true) {
+ switch (childrenAddressSize) {
+ case 1:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
+ break;
+ case 2:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
+ break;
+ case 3:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
+ break;
+ case 0:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
+ break;
+ default:
+ throw new RuntimeException("Node with a strange address");
}
- flags |= FormatSpec.FLAG_HAS_BIGRAMS;
- }
- if (group.mIsNotAWord) {
- flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
- }
- if (group.mIsBlacklistEntry) {
- flags |= FormatSpec.FLAG_IS_BLACKLISTED;
}
+ if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
+ if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS;
+ if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
+ if (isBlackListEntry) flags |= FormatSpec.FLAG_IS_BLACKLISTED;
return flags;
}
+ private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
+ final int childrenOffset, final FormatOptions formatOptions) {
+ return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0,
+ getByteSize(childrenOffset), group.mShortcutTargets != null, group.mBigrams != null,
+ group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions);
+ }
+
/**
* Makes the flag value for a bigram.
*
@@ -1194,7 +1209,7 @@ public final class BinaryDictInputOutput {
}
}
- private static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
+ static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
final int optionFlags, final FormatOptions options) {
if (options.mSupportsDynamicUpdate) {
final int address = buffer.readUnsignedInt24();
@@ -1290,7 +1305,8 @@ public final class BinaryDictInputOutput {
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- while (true) {
+ int bigramCount = 0;
+ while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = buffer.readUnsignedByte();
++addressPointer;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
@@ -1318,6 +1334,9 @@ public final class BinaryDictInputOutput {
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
+ if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
+ MakedictLog.d("too many bigrams in a group.");
+ }
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams);
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index b3fbb9fb5..ca851c6a9 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -59,9 +59,11 @@ public final class FormatSpec {
* l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
* g | ELSE
- * s | is moved ? 2 bits, 11 = no
- * | 01 = yes
+ * s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED
+ * | This must be the same as FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
+ * | 01 = yes : FLAG_IS_MOVED
* | the new address is stored in the same place as the parent address
+ * | is deleted? 10 = yes : FLAG_IS_DELETED
* | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
* | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
@@ -170,6 +172,7 @@ public final class FormatSpec {
static final int PARENT_ADDRESS_SIZE = 3;
static final int FORWARD_LINK_ADDRESS_SIZE = 3;
+ // These flags are used only in the static dictionary.
static final int MASK_GROUP_ADDRESS_TYPE = 0xC0;
static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
@@ -183,7 +186,12 @@ public final class FormatSpec {
static final int FLAG_HAS_BIGRAMS = 0x04;
static final int FLAG_IS_NOT_A_WORD = 0x02;
static final int FLAG_IS_BLACKLISTED = 0x01;
- static final int FLAG_IS_MOVED = 0x40;
+
+ // These flags are used only in the dynamic dictionary.
+ static final int FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE = 0x40;
+ static final int FLAG_IS_MOVED = 0x00 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE;
+ static final int FLAG_IS_NOT_MOVED = 0x80 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE;
+ static final int FLAG_IS_DELETED = 0x80;
static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -210,6 +218,7 @@ public final class FormatSpec {
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
+ static final int MAX_BIGRAMS_IN_A_GROUP = 10000;
static final int MAX_TERMINAL_FREQUENCY = 255;
static final int MAX_BIGRAM_FREQUENCY = 15;