aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
authorYuichiro Hanada <yhanada@google.com>2012-09-28 17:03:23 +0900
committerYuichiro Hanada <yhanada@google.com>2012-10-01 21:50:38 +0900
commitfb7e08ea8f40ebb4ac57e443f837043e7b57fd87 (patch)
treef4115eef567acccb963db9bcfe34b351b91b3e56 /java/src
parentc3a7d483caa6d547618fc579e1439c0b274198a2 (diff)
downloadlatinime-fb7e08ea8f40ebb4ac57e443f837043e7b57fd87.tar.gz
latinime-fb7e08ea8f40ebb4ac57e443f837043e7b57fd87.tar.xz
latinime-fb7e08ea8f40ebb4ac57e443f837043e7b57fd87.zip
Add writeCharGroup.
bug: 6669677 Change-Id: I36792ba9c511a5148c963096cc93ca8c2e0ee04e
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java127
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java9
2 files changed, 129 insertions, 7 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index c34d19d22..5c6417b04 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -22,9 +22,13 @@ import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictio
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.IOException;
+import java.io.OutputStream;
import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Stack;
@@ -251,7 +255,7 @@ public final class BinaryDictIOUtils {
buffer.put((byte)newFlags);
}
- private static void putSInt24(final FusionDictionaryBufferInterface buffer,
+ private static void writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer,
final int value) {
final int absValue = Math.abs(value);
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
@@ -259,6 +263,32 @@ public final class BinaryDictIOUtils {
buffer.put((byte)(absValue & 0xFF));
}
+ private static void writeSInt24ToStream(final OutputStream destination, final int value)
+ throws IOException {
+ final int absValue = Math.abs(value);
+ destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
+ destination.write((byte)((absValue >> 8) & 0xFF));
+ destination.write((byte)(absValue & 0xFF));
+ }
+
+ private static void writeVariableAddress(final OutputStream destination, final int value)
+ throws IOException {
+ switch (BinaryDictInputOutput.getByteSize(value)) {
+ case 1:
+ destination.write((byte)value);
+ break;
+ case 2:
+ destination.write((byte)(0xFF & (value >> 8)));
+ destination.write((byte)(0xFF & value));
+ break;
+ case 3:
+ destination.write((byte)(0xFF & (value >> 16)));
+ destination.write((byte)(0xFF & (value >> 8)));
+ destination.write((byte)(0xFF & value));
+ break;
+ }
+ }
+
/**
* Update a parent address in a CharGroup that is addressed by groupOriginAddress.
*
@@ -277,7 +307,7 @@ public final class BinaryDictIOUtils {
}
final int flags = buffer.readUnsignedByte();
final int parentOffset = newParentAddress - groupOriginAddress;
- putSInt24(buffer, parentOffset);
+ writeSInt24ToBuffer(buffer, parentOffset);
buffer.position(originalPosition);
}
@@ -293,6 +323,22 @@ public final class BinaryDictIOUtils {
}
}
+ private static void writeString(final OutputStream destination, final String word)
+ throws IOException {
+ final int length = word.length();
+ for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
+ final int codePoint = word.codePointAt(i);
+ if (CharEncoding.getCharSize(codePoint) == 1) {
+ destination.write((byte)codePoint);
+ } else {
+ destination.write((byte)(0xFF & (codePoint >> 16)));
+ destination.write((byte)(0xFF & (codePoint >> 8)));
+ destination.write((byte)(0xFF & codePoint));
+ }
+ }
+ destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR);
+ }
+
/**
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
*
@@ -312,7 +358,82 @@ public final class BinaryDictIOUtils {
if ((FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
- putSInt24(buffer, childrenOffset);
+ writeSInt24ToBuffer(buffer, childrenOffset);
buffer.position(originalPosition);
}
+
+ /**
+ * Write a char group to an output stream.
+ * A char group is an in-memory representation of a node in trie.
+ * A char group info is an on-disk representation of a node.
+ *
+ * @param destination the stream to write.
+ * @param info the char group info to be written.
+ */
+ public static void writeCharGroup(final OutputStream destination, final CharGroupInfo info)
+ throws IOException {
+ destination.write((byte)info.mFlags);
+ final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ?
+ FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress;
+ writeSInt24ToStream(destination, parentOffset);
+
+ for (int i = 0; i < info.mCharacters.length; ++i) {
+ if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) {
+ destination.write((byte)info.mCharacters[i]);
+ } else {
+ writeSInt24ToStream(destination, info.mCharacters[i]);
+ }
+ }
+ if (info.mCharacters.length > 1) {
+ destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR);
+ }
+
+ if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
+ destination.write((byte)info.mFrequency);
+ }
+
+ final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ?
+ 0 : info.mChildrenAddress - info.mOriginalAddress;
+ writeSInt24ToStream(destination, childrenOffset);
+
+ if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) {
+ final int shortcutListSize =
+ BinaryDictInputOutput.getShortcutListSize(info.mShortcutTargets);
+ destination.write((byte)(shortcutListSize >> 8));
+ destination.write((byte)(shortcutListSize & 0xFF));
+ final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator();
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = shortcutIterator.next();
+ destination.write((byte)BinaryDictInputOutput.makeShortcutFlags(
+ shortcutIterator.hasNext(), target.mFrequency));
+ writeString(destination, target.mWord);
+ }
+ }
+
+ if (info.mBigrams != null) {
+ // TODO: Consolidate this code with the code that computes the size of the bigram list
+ // in BinaryDictionaryInputOutput#computeActualNodeSize
+ for (int i = 0; i < info.mBigrams.size(); ++i) {
+ final int bigramOffset = info.mBigrams.get(i).mAddress - info.mOriginalAddress;
+ final int bigramFrequency = info.mBigrams.get(i).mFrequency;
+ int bigramFlags = (i < info.mBigrams.size() - 1)
+ ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0;
+ bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0;
+ switch (BinaryDictInputOutput.getByteSize(bigramOffset)) {
+ case 1:
+ bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ break;
+ case 2:
+ bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ break;
+ case 3:
+ bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ break;
+ }
+ bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY;
+ destination.write((byte)bigramFlags);
+ writeVariableAddress(destination, Math.abs(bigramOffset));
+ }
+ }
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 3e6965a17..ba29523c8 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -153,7 +153,7 @@ public final class BinaryDictInputOutput {
* @param character the character code.
* @return the size in binary encoded-form, either 1 or 3 bytes.
*/
- private static int getCharSize(final int character) {
+ static int getCharSize(final int character) {
// See char encoding in FusionDictionary.java
if (fitsOnOneByte(character)) return 1;
if (FormatSpec.INVALID_CHARACTER == character) return 1;
@@ -337,7 +337,7 @@ public final class BinaryDictInputOutput {
* This is known in advance and does not change according to position in the file
* like address lists do.
*/
- private static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
+ static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
if (null == shortcutList) return 0;
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
for (final WeightedString shortcut : shortcutList) {
@@ -438,7 +438,7 @@ public final class BinaryDictInputOutput {
* @param address the address
* @return the byte size.
*/
- private static int getByteSize(final int address) {
+ static int getByteSize(final int address) {
assert(address <= UINT24_MAX);
if (!hasChildrenAddress(address)) {
return 0;
@@ -858,7 +858,7 @@ public final class BinaryDictInputOutput {
* @param frequency the frequency of the attribute, 0..15
* @return the flags
*/
- private static final int makeShortcutFlags(final boolean more, final int frequency) {
+ static final int makeShortcutFlags(final boolean more, final int frequency) {
return (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0)
+ (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
}
@@ -896,6 +896,7 @@ public final class BinaryDictInputOutput {
*/
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
final Node node, final FormatOptions formatOptions) {
+ // TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
int index = node.mCachedAddress;
final int groupCount = node.mData.size();