aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin')
-rw-r--r--java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java5
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java80
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java218
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java21
4 files changed, 225 insertions, 99 deletions
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
index 4a3d11aa1..05255a6b3 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
@@ -100,6 +100,11 @@ public class UserHistoryDictIOUtils {
@Override
public int limit() {
+ return mBuffer.length - 1;
+ }
+
+ @Override
+ public int capacity() {
return mBuffer.length;
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index ac0fb0ece..19da5124a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -157,47 +157,63 @@ public class BinaryDictIOUtils {
final int wordLen = word.codePointCount(0, word.length());
for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) {
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
- int groupOffset = buffer.position() - header.mHeaderSize;
- final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer);
- groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount);
-
- for (int i = 0; i < charGroupCount; ++i) {
- final int charGroupPos = buffer.position();
- final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
- buffer.position(), header.mFormatOptions);
- boolean same = true;
- for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
- p < currentInfo.mCharacters.length;
- ++p, j = word.offsetByCodePoints(j, 1)) {
- if (wordPos + p >= wordLen
- || word.codePointAt(j) != currentInfo.mCharacters[p]) {
- same = false;
- break;
+
+ do {
+ int groupOffset = buffer.position() - header.mHeaderSize;
+ final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer);
+ groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount);
+
+ boolean foundNextCharGroup = false;
+ for (int i = 0; i < charGroupCount; ++i) {
+ final int charGroupPos = buffer.position();
+ final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
+ buffer.position(), header.mFormatOptions);
+ boolean same = true;
+ for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
+ p < currentInfo.mCharacters.length;
+ ++p, j = word.offsetByCodePoints(j, 1)) {
+ if (wordPos + p >= wordLen
+ || word.codePointAt(j) != currentInfo.mCharacters[p]) {
+ same = false;
+ break;
+ }
}
- }
- if (same) {
- if (wordPos + currentInfo.mCharacters.length == wordLen) {
- if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) {
+ if (same) {
+ // found the group matches the word.
+ if (wordPos + currentInfo.mCharacters.length == wordLen) {
+ if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) {
+ return FormatSpec.NOT_VALID_WORD;
+ } else {
+ return charGroupPos;
+ }
+ }
+ wordPos += currentInfo.mCharacters.length;
+ if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
return FormatSpec.NOT_VALID_WORD;
- } else {
- return charGroupPos;
}
+ foundNextCharGroup = true;
+ buffer.position(currentInfo.mChildrenAddress);
+ break;
}
- wordPos += currentInfo.mCharacters.length;
- if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
- return FormatSpec.NOT_VALID_WORD;
- }
- buffer.position(currentInfo.mChildrenAddress);
- break;
+ groupOffset = currentInfo.mEndAddress;
}
- groupOffset = currentInfo.mEndAddress;
- // not found
- if (i >= charGroupCount - 1) {
+ // If we found the next char group, it is under the file pointer.
+ // But if not, we are at the end of this node so we expect to have
+ // a forward link address that we need to consult and possibly resume
+ // search on the next node in the linked list.
+ if (foundNextCharGroup) break;
+ if (!header.mFormatOptions.mSupportsDynamicUpdate) {
return FormatSpec.NOT_VALID_WORD;
}
- }
+
+ final int forwardLinkAddress = buffer.readUnsignedInt24();
+ if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
+ return FormatSpec.NOT_VALID_WORD;
+ }
+ buffer.position(forwardLinkAddress);
+ } while(true);
}
return FormatSpec.NOT_VALID_WORD;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 4806bf9dc..f9339de08 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -36,7 +36,6 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
-import java.util.Stack;
import java.util.TreeMap;
/**
@@ -64,6 +63,7 @@ public class BinaryDictInputOutput {
public void position(int newPosition);
public void put(final byte b);
public int limit();
+ public int capacity();
}
public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
@@ -113,6 +113,11 @@ public class BinaryDictInputOutput {
public int limit() {
return mBuffer.limit();
}
+
+ @Override
+ public int capacity() {
+ return mBuffer.capacity();
+ }
}
/**
@@ -412,6 +417,10 @@ public class BinaryDictInputOutput {
}
}
+ private static final int UINT8_MAX = 0xFF;
+ private static final int UINT16_MAX = 0xFFFF;
+ private static final int UINT24_MAX = 0xFFFFFF;
+
/**
* Compute the size, in bytes, that an address will occupy.
*
@@ -423,17 +432,25 @@ public class BinaryDictInputOutput {
* @return the byte size.
*/
private static int getByteSize(final int address) {
- assert(address < 0x1000000);
+ assert(address <= UINT24_MAX);
if (!hasChildrenAddress(address)) {
return 0;
- } else if (Math.abs(address) < 0x100) {
+ } else if (Math.abs(address) <= UINT8_MAX) {
return 1;
- } else if (Math.abs(address) < 0x10000) {
+ } else if (Math.abs(address) <= UINT16_MAX) {
return 2;
} else {
return 3;
}
}
+
+ private static final int SINT8_MAX = 0x7F;
+ private static final int SINT16_MAX = 0x7FFF;
+ private static final int SINT24_MAX = 0x7FFFFF;
+ private static final int MSB8 = 0x80;
+ private static final int MSB16 = 0x8000;
+ private static final int MSB24 = 0x800000;
+
// End utility methods.
// This method is responsible for finding a nice ordering of the nodes that favors run-time
@@ -509,13 +526,19 @@ public class BinaryDictInputOutput {
}
int groupSize = getGroupHeaderSize(group, formatOptions);
if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
- if (null != group.mChildren) {
+ if (null == group.mChildren && formatOptions.mSupportsDynamicUpdate) {
+ groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
+ } else if (null != group.mChildren) {
final int offsetBasePoint = groupSize + node.mCachedAddress + size;
final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
// assign my address to children's parent address
group.mChildren.mCachedParentAddress = group.mCachedAddress
- group.mChildren.mCachedAddress;
- groupSize += getByteSize(offset);
+ if (formatOptions.mSupportsDynamicUpdate) {
+ groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
+ } else {
+ groupSize += getByteSize(offset);
+ }
}
groupSize += getShortcutListSize(group.mShortcutTargets);
if (null != group.mBigrams) {
@@ -669,27 +692,52 @@ public class BinaryDictInputOutput {
}
}
+ /**
+ * Helper method to write a variable-size signed address to a file.
+ *
+ * @param buffer the buffer to write to.
+ * @param index the index in the buffer to write the address to.
+ * @param address the address to write.
+ * @return the size in bytes the address actually took.
+ */
+ private static int writeVariableSignedAddress(final byte[] buffer, int index,
+ final int address) {
+ if (!hasChildrenAddress(address)) {
+ buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
+ } else {
+ final int absAddress = Math.abs(address);
+ buffer[index++] = (byte)((address < 0 ? MSB8 : 0) | (0xFF & (absAddress >> 16)));
+ buffer[index++] = (byte)(0xFF & (absAddress >> 8));
+ buffer[index++] = (byte)(0xFF & absAddress);
+ }
+ return 3;
+ }
+
private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
- final int childrenOffset) {
+ final int childrenOffset, final FormatOptions formatOptions) {
byte flags = 0;
if (group.mChars.length > 1) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
if (group.mFrequency >= 0) {
flags |= FormatSpec.FLAG_IS_TERMINAL;
}
if (null != group.mChildren) {
- switch (getByteSize(childrenOffset)) {
- case 1:
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
- break;
- case 2:
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
- break;
- case 3:
- flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
- break;
- default:
- throw new RuntimeException("Node with a strange address");
- }
+ final int byteSize = formatOptions.mSupportsDynamicUpdate
+ ? FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE : getByteSize(childrenOffset);
+ switch (byteSize) {
+ case 1:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
+ break;
+ case 2:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
+ break;
+ case 3:
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
+ break;
+ default:
+ throw new RuntimeException("Node with a strange address");
+ }
+ } else if (formatOptions.mSupportsDynamicUpdate) {
+ flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
}
if (null != group.mShortcutTargets) {
if (DBG && 0 == group.mShortcutTargets.size()) {
@@ -808,6 +856,25 @@ public class BinaryDictInputOutput {
+ (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
}
+ private static final int writeParentAddress(final byte[] buffer, final int index,
+ final int address, final FormatOptions formatOptions) {
+ if (supportsDynamicUpdate(formatOptions)) {
+ if (address == FormatSpec.NO_PARENT_ADDRESS) {
+ buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
+ } else {
+ final int absAddress = Math.abs(address);
+ assert(absAddress <= SINT24_MAX);
+ buffer[index] = (byte)((address < 0 ? MSB8 : 0)
+ | ((absAddress >> 16) & 0xFF));
+ buffer[index + 1] = (byte)((absAddress >> 8) & 0xFF);
+ buffer[index + 2] = (byte)(absAddress & 0xFF);
+ }
+ return index + 3;
+ } else {
+ return index;
+ }
+ }
+
/**
* Write a node to memory. The node is expected to have its final position cached.
*
@@ -854,22 +921,15 @@ public class BinaryDictInputOutput {
final int childrenOffset = null == group.mChildren
? FormatSpec.NO_CHILDREN_ADDRESS
: group.mChildren.mCachedAddress - groupAddress;
- byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset);
+ byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions);
buffer[index++] = flags;
- if (supportsDynamicUpdate(formatOptions)) {
- if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
- // this node is the root node.
- buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
- } else {
- // write parent address. (version 3)
- final int actualParentAddress = Math.abs(parentAddress
- + (node.mCachedAddress - group.mCachedAddress));
- buffer[index] = (byte)((actualParentAddress >> 16) & 0xFF);
- buffer[index + 1] = (byte)((actualParentAddress >> 8) & 0xFF);
- buffer[index + 2] = (byte)(actualParentAddress & 0xFF);
- }
- index += 3;
+ if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
+ index = writeParentAddress(buffer, index, parentAddress, formatOptions);
+ } else {
+ index = writeParentAddress(buffer, index,
+ parentAddress + (node.mCachedAddress - group.mCachedAddress),
+ formatOptions);
}
index = CharEncoding.writeCharArray(group.mChars, buffer, index);
@@ -879,7 +939,13 @@ public class BinaryDictInputOutput {
if (group.mFrequency >= 0) {
buffer[index++] = (byte) group.mFrequency;
}
- final int shift = writeVariableAddress(buffer, index, childrenOffset);
+
+ final int shift;
+ if (formatOptions.mSupportsDynamicUpdate) {
+ shift = writeVariableSignedAddress(buffer, index, childrenOffset);
+ } else {
+ shift = writeVariableAddress(buffer, index, childrenOffset);
+ }
index += shift;
groupAddress += shift;
@@ -1104,6 +1170,58 @@ public class BinaryDictInputOutput {
// Input methods: Read a binary dictionary to memory.
// readDictionaryBinary is the public entry point for them.
+ private static int getChildrenAddressSize(final int optionFlags,
+ final FormatOptions formatOptions) {
+ if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
+ switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+ return 1;
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+ return 2;
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+ return 3;
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
+ default:
+ return 0;
+ }
+ }
+
+ private static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
+ final int optionFlags, final FormatOptions options) {
+ if (options.mSupportsDynamicUpdate) {
+ final int address = buffer.readUnsignedInt24();
+ if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
+ if ((address & MSB24) != 0) {
+ return -(address & SINT24_MAX);
+ } else {
+ return address;
+ }
+ }
+ int address;
+ switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+ return buffer.readUnsignedByte();
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+ return buffer.readUnsignedShort();
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+ return buffer.readUnsignedInt24();
+ case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
+ default:
+ return FormatSpec.NO_CHILDREN_ADDRESS;
+ }
+ }
+
+ private static int readParentAddress(final FusionDictionaryBufferInterface buffer,
+ final FormatOptions formatOptions) {
+ if (supportsDynamicUpdate(formatOptions)) {
+ final int parentAddress = buffer.readUnsignedInt24();
+ final int sign = ((parentAddress & MSB24) != 0) ? -1 : 1;
+ return sign * (parentAddress & SINT24_MAX);
+ } else {
+ return FormatSpec.NO_PARENT_ADDRESS;
+ }
+ }
+
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
final int originalGroupAddress, final FormatOptions options) {
@@ -1111,13 +1229,9 @@ public class BinaryDictInputOutput {
final int flags = buffer.readUnsignedByte();
++addressPointer;
- final int parentAddress;
+ final int parentAddress = readParentAddress(buffer, options);
if (supportsDynamicUpdate(options)) {
- // read the parent address. (version 3)
- parentAddress = -buffer.readUnsignedInt24();
addressPointer += 3;
- } else {
- parentAddress = FormatSpec.NO_PARENT_ADDRESS;
}
final int characters[];
@@ -1146,25 +1260,11 @@ public class BinaryDictInputOutput {
} else {
frequency = CharGroup.NOT_A_TERMINAL;
}
- int childrenAddress = addressPointer;
- switch (flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
- case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
- childrenAddress += buffer.readUnsignedByte();
- addressPointer += 1;
- break;
- case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
- childrenAddress += buffer.readUnsignedShort();
- addressPointer += 2;
- break;
- case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
- childrenAddress += buffer.readUnsignedInt24();
- addressPointer += 3;
- break;
- case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
- default:
- childrenAddress = FormatSpec.NO_CHILDREN_ADDRESS;
- break;
+ int childrenAddress = readChildrenAddress(buffer, flags, options);
+ if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
+ childrenAddress += addressPointer;
}
+ addressPointer += getChildrenAddressSize(flags, options);
ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
final int pointerBefore = buffer.position();
@@ -1250,6 +1350,7 @@ public class BinaryDictInputOutput {
final String result;
final int originalPointer = buffer.position();
+ buffer.position(address);
if (supportsDynamicUpdate(formatOptions)) {
result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
@@ -1279,7 +1380,6 @@ public class BinaryDictInputOutput {
sGetWordBuffer[index--] =
currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
}
-
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 63a61b46f..cab0661f6 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -42,11 +42,13 @@ public final class FormatSpec {
* ps
*
* f |
- * o | IF HAS_LINKEDLIST_NODE (defined in the file header)
+ * o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
* r | forward link address, 3byte
- * w | the address must be positive.
- * a |
- * rdlinkaddress
+ * w | 1 byte = bbbbbbbb match
+ * a | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte)
+ * r | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte
+ * d |
+ * linkaddress
*/
/* Node(CharGroup) layout is as follows:
@@ -63,11 +65,13 @@ public final class FormatSpec {
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
*
* p |
- * a | IF HAS_PARENT_ADDRESS (defined in the file header)
+ * a | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
* r | parent address, 3byte
- * e | the address must be negative, so the absolute value of the address is stored.
- * n |
- * taddress
+ * e | 1 byte = bbbbbbbb match
+ * n | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
+ * t | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
+ * a |
+ * ddress
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -206,6 +210,7 @@ public final class FormatSpec {
// This option needs to be the same numeric value as the one in binary_format.h.
static final int NOT_VALID_WORD = -99;
+ static final int SIGNED_CHILDREN_ADDRESS_SIZE = 3;
/**
* Options about file format.