aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Android.mk4
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java214
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java3
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java29
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java131
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/Word.java9
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java137
-rw-r--r--tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java10
8 files changed, 427 insertions, 110 deletions
diff --git a/tools/Android.mk b/tools/Android.mk
index 8f1acc55a..91b2fbbb0 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -12,6 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-LOCAL_PATH := $(call my-dir)
-
-include $(call all-makefiles-under,$(LOCAL_PATH))
+include $(call all-subdir-makefiles)
diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
index 92f402d3e..7aadc677b 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
@@ -26,6 +26,7 @@ import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
@@ -44,8 +45,9 @@ public class BinaryDictInputOutput {
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
* g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
- * | reserved 1 bit, 1 = yes, 0 = no
+ * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
+ * | is shortcut only ? 1 bit, 1 = yes, 0 = no : FLAG_IS_SHORTCUT_ONLY
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -71,6 +73,8 @@ public class BinaryDictInputOutput {
* d
* dress
*
+ * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
+ * | shortcut targets address list
* | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS
* | bigrams address list
*
@@ -126,7 +130,9 @@ public class BinaryDictInputOutput {
private static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
private static final int FLAG_IS_TERMINAL = 0x10;
+ private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
private static final int FLAG_HAS_BIGRAMS = 0x04;
+ private static final int FLAG_IS_SHORTCUT_ONLY = 0x02;
private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -138,7 +144,6 @@ public class BinaryDictInputOutput {
private static final int GROUP_CHARACTERS_TERMINATOR = 0x1F;
- private static final int GROUP_COUNT_SIZE = 1;
private static final int GROUP_TERMINATOR_SIZE = 1;
private static final int GROUP_FLAGS_SIZE = 1;
private static final int GROUP_FREQUENCY_SIZE = 1;
@@ -149,9 +154,8 @@ public class BinaryDictInputOutput {
private static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
private static final int INVALID_CHARACTER = -1;
- // Limiting to 127 for upward compatibility
- // TODO: implement a scheme to be able to shoot 256 chargroups in a node
- private static final int MAX_CHARGROUPS_IN_A_NODE = 127;
+ private static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
+ private static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
private static final int MAX_TERMINAL_FREQUENCY = 255;
@@ -261,6 +265,31 @@ public class BinaryDictInputOutput {
}
/**
+ * Compute the binary size of the group count
+ * @param count the group count
+ * @return the size of the group count, either 1 or 2 bytes.
+ */
+ private static int getGroupCountSize(final int count) {
+ if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
+ return 1;
+ } else if (MAX_CHARGROUPS_IN_A_NODE >= count) {
+ return 2;
+ } else {
+ throw new RuntimeException("Can't have more than " + MAX_CHARGROUPS_IN_A_NODE
+ + " groups in a node (found " + count +")");
+ }
+ }
+
+ /**
+ * Compute the binary size of the group count for a node
+ * @param node the node
+ * @return the size of the group count, either 1 or 2 bytes.
+ */
+ private static int getGroupCountSize(final Node node) {
+ return getGroupCountSize(node.mData.size());
+ }
+
+ /**
* Compute the maximum size of a CharGroup, assuming 3-byte addresses for everything.
*
* @param group the CharGroup to compute the size of.
@@ -271,10 +300,13 @@ public class BinaryDictInputOutput {
// If terminal, one byte for the frequency
if (group.isTerminal()) size += GROUP_FREQUENCY_SIZE;
size += GROUP_MAX_ADDRESS_SIZE; // For children address
+ if (null != group.mShortcutTargets) {
+ size += (GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE)
+ * group.mShortcutTargets.size();
+ }
if (null != group.mBigrams) {
- for (WeightedString bigram : group.mBigrams) {
- size += GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE;
- }
+ size += (GROUP_ATTRIBUTE_FLAGS_SIZE + GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE)
+ * group.mBigrams.size();
}
return size;
}
@@ -286,7 +318,7 @@ public class BinaryDictInputOutput {
* @param node the node to compute the maximum size of.
*/
private static void setNodeMaximumSize(Node node) {
- int size = GROUP_COUNT_SIZE;
+ int size = getGroupCountSize(node);
for (CharGroup g : node.mData) {
final int groupSize = getCharGroupMaximumSize(g);
g.mCachedSize = groupSize;
@@ -303,6 +335,13 @@ public class BinaryDictInputOutput {
}
/**
+ * Helper method to find out if a character info is a shortcut only.
+ */
+ private static boolean isShortcutOnly(final CharGroupInfo info) {
+ return 0 != (info.mFlags & FLAG_IS_SHORTCUT_ONLY);
+ }
+
+ /**
* Compute the size, in bytes, that an address will occupy.
*
* This can be used either for children addresses (which are always positive) or for
@@ -378,7 +417,7 @@ public class BinaryDictInputOutput {
* @param dict the dictionary in which the word/attributes are to be found.
*/
private static void computeActualNodeSize(Node node, FusionDictionary dict) {
- int size = GROUP_COUNT_SIZE;
+ int size = getGroupCountSize(node);
for (CharGroup group : node.mData) {
int groupSize = GROUP_FLAGS_SIZE + getGroupCharactersSize(group);
if (group.isTerminal()) groupSize += GROUP_FREQUENCY_SIZE;
@@ -387,6 +426,15 @@ public class BinaryDictInputOutput {
final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
groupSize += getByteSize(offset);
}
+ if (null != group.mShortcutTargets) {
+ for (WeightedString target : group.mShortcutTargets) {
+ final int offsetBasePoint = groupSize + node.mCachedAddress + size
+ + GROUP_FLAGS_SIZE;
+ final int addressOfTarget = findAddressOfWord(dict, target.mWord);
+ final int offset = addressOfTarget - offsetBasePoint;
+ groupSize += getByteSize(offset) + GROUP_FLAGS_SIZE;
+ }
+ }
if (null != group.mBigrams) {
for (WeightedString bigram : group.mBigrams) {
final int offsetBasePoint = groupSize + node.mCachedAddress + size
@@ -412,12 +460,13 @@ public class BinaryDictInputOutput {
int nodeOffset = 0;
for (Node n : flatNodes) {
n.mCachedAddress = nodeOffset;
+ int groupCountSize = getGroupCountSize(n);
int groupOffset = 0;
for (CharGroup g : n.mData) {
- g.mCachedAddress = GROUP_COUNT_SIZE + nodeOffset + groupOffset;
+ g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
groupOffset += g.mCachedSize;
}
- if (groupOffset + GROUP_COUNT_SIZE != n.mCachedSize) {
+ if (groupOffset + groupCountSize != n.mCachedSize) {
throw new RuntimeException("Bug : Stored and computed node size differ");
}
nodeOffset += n.mCachedSize;
@@ -545,7 +594,21 @@ public class BinaryDictInputOutput {
throw new RuntimeException("Node with a strange address");
}
}
- if (null != group.mBigrams) flags |= FLAG_HAS_BIGRAMS;
+ if (null != group.mShortcutTargets) {
+ if (0 == group.mShortcutTargets.size()) {
+ throw new RuntimeException("0-sized shortcut list must be null");
+ }
+ flags |= FLAG_HAS_SHORTCUT_TARGETS;
+ }
+ if (null != group.mBigrams) {
+ if (0 == group.mBigrams.size()) {
+ throw new RuntimeException("0-sized bigram list must be null");
+ }
+ flags |= FLAG_HAS_BIGRAMS;
+ }
+ if (group.mIsShortcutOnly) {
+ flags |= FLAG_IS_SHORTCUT_ONLY;
+ }
return flags;
}
@@ -592,13 +655,20 @@ public class BinaryDictInputOutput {
private static int writePlacedNode(FusionDictionary dict, byte[] buffer, Node node) {
int index = node.mCachedAddress;
- final int size = node.mData.size();
- if (size > MAX_CHARGROUPS_IN_A_NODE)
- throw new RuntimeException("A node has a group count over 127 (" + size + ").");
-
- buffer[index++] = (byte)size;
+ final int groupCount = node.mData.size();
+ final int countSize = getGroupCountSize(node);
+ if (1 == countSize) {
+ buffer[index++] = (byte)groupCount;
+ } else if (2 == countSize) {
+ // We need to signal 2-byte size by setting the top bit of the MSB to 1, so
+ // we | 0x80 to do this.
+ buffer[index++] = (byte)((groupCount >> 8) | 0x80);
+ buffer[index++] = (byte)(groupCount & 0xFF);
+ } else {
+ throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
+ }
int groupAddress = index;
- for (int i = 0; i < size; ++i) {
+ for (int i = 0; i < groupCount; ++i) {
CharGroup group = node.mData.get(i);
if (index != group.mCachedAddress) throw new RuntimeException("Bug: write index is not "
+ "the same as the cached address of the group");
@@ -624,20 +694,36 @@ public class BinaryDictInputOutput {
index += shift;
groupAddress += shift;
+ // Write shortcuts
+ if (null != group.mShortcutTargets) {
+ final Iterator shortcutIterator = group.mShortcutTargets.iterator();
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = (WeightedString)shortcutIterator.next();
+ final int addressOfTarget = findAddressOfWord(dict, target.mWord);
+ ++groupAddress;
+ final int offset = addressOfTarget - groupAddress;
+ int shortcutFlags = makeAttributeFlags(shortcutIterator.hasNext(), offset,
+ target.mFrequency);
+ buffer[index++] = (byte)shortcutFlags;
+ final int shortcutShift = writeVariableAddress(buffer, index, Math.abs(offset));
+ index += shortcutShift;
+ groupAddress += shortcutShift;
+ }
+ }
// Write bigrams
if (null != group.mBigrams) {
- int remainingBigrams = group.mBigrams.size();
- for (WeightedString bigram : group.mBigrams) {
- boolean more = remainingBigrams > 1;
+ final Iterator bigramIterator = group.mBigrams.iterator();
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = (WeightedString)bigramIterator.next();
final int addressOfBigram = findAddressOfWord(dict, bigram.mWord);
++groupAddress;
final int offset = addressOfBigram - groupAddress;
- int bigramFlags = makeAttributeFlags(more, offset, bigram.mFrequency);
+ int bigramFlags = makeAttributeFlags(bigramIterator.hasNext(), offset,
+ bigram.mFrequency);
buffer[index++] = (byte)bigramFlags;
final int bigramShift = writeVariableAddress(buffer, index, Math.abs(offset));
index += bigramShift;
groupAddress += bigramShift;
- --remainingBigrams;
}
}
@@ -814,14 +900,43 @@ public class BinaryDictInputOutput {
childrenAddress = NO_CHILDREN_ADDRESS;
break;
}
+ ArrayList<PendingAttribute> shortcutTargets = null;
+ if (0 != (flags & FLAG_HAS_SHORTCUT_TARGETS)) {
+ shortcutTargets = new ArrayList<PendingAttribute>();
+ while (true) {
+ final int targetFlags = source.readUnsignedByte();
+ ++addressPointer;
+ final int sign = 0 == (targetFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1;
+ int targetAddress = addressPointer;
+ switch (targetFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ targetAddress += sign * source.readUnsignedByte();
+ addressPointer += 1;
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ targetAddress += sign * source.readUnsignedShort();
+ addressPointer += 2;
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ final int offset = ((source.readUnsignedByte() << 16)
+ + source.readUnsignedShort());
+ targetAddress += sign * offset;
+ addressPointer += 3;
+ break;
+ default:
+ throw new RuntimeException("Has shortcut targets with no address");
+ }
+ shortcutTargets.add(new PendingAttribute(targetFlags & FLAG_ATTRIBUTE_FREQUENCY,
+ targetAddress));
+ if (0 == (targetFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
+ }
+ }
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- boolean more = true;
- while (more) {
- int bigramFlags = source.readUnsignedByte();
+ while (true) {
+ final int bigramFlags = source.readUnsignedByte();
++addressPointer;
- more = (0 != (bigramFlags & FLAG_ATTRIBUTE_HAS_NEXT));
final int sign = 0 == (bigramFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1;
int bigramAddress = addressPointer;
switch (bigramFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
@@ -840,14 +955,28 @@ public class BinaryDictInputOutput {
addressPointer += 3;
break;
default:
- throw new RuntimeException("Has attribute with no address");
+ throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(bigramFlags & FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress));
+ if (0 == (bigramFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
- childrenAddress, bigrams);
+ childrenAddress, shortcutTargets, bigrams);
+ }
+
+ /**
+ * Reads and returns the char group count out of a file and forwards the pointer.
+ */
+ private static int readCharGroupCount(RandomAccessFile source) throws IOException {
+ final int msb = source.readUnsignedByte();
+ if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
+ return msb;
+ } else {
+ return ((MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+ + source.readUnsignedByte();
+ }
}
/**
@@ -863,8 +992,8 @@ public class BinaryDictInputOutput {
int address) throws IOException {
final long originalPointer = source.getFilePointer();
source.seek(headerSize);
- final int count = source.readUnsignedByte();
- int groupOffset = 1; // 1 for the group count
+ final int count = readCharGroupCount(source);
+ int groupOffset = getGroupCountSize(count);
final StringBuilder builder = new StringBuilder();
String result = null;
@@ -920,11 +1049,19 @@ public class BinaryDictInputOutput {
Map<Integer, Node> reverseNodeMap, Map<Integer, CharGroup> reverseGroupMap)
throws IOException {
final int nodeOrigin = (int)(source.getFilePointer() - headerSize);
- final int count = source.readUnsignedByte();
+ final int count = readCharGroupCount(source);
final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
- int groupOffset = nodeOrigin + 1; // 1 byte for the group count
+ int groupOffset = nodeOrigin + getGroupCountSize(count);
for (int i = count; i > 0; --i) {
CharGroupInfo info = readCharGroup(source, groupOffset);
+ ArrayList<WeightedString> shortcutTargets = null;
+ if (null != info.mShortcutTargets) {
+ shortcutTargets = new ArrayList<WeightedString>();
+ for (PendingAttribute target : info.mShortcutTargets) {
+ final String word = getWordAtAddress(source, headerSize, target.mAddress);
+ shortcutTargets.add(new WeightedString(word, target.mFrequency));
+ }
+ }
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
@@ -942,11 +1079,12 @@ public class BinaryDictInputOutput {
source.seek(currentPosition);
}
nodeContents.add(
- new CharGroup(info.mCharacters, bigrams, info.mFrequency,
- children));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+ children, isShortcutOnly(info)));
} else {
nodeContents.add(
- new CharGroup(info.mCharacters, bigrams, info.mFrequency));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+ isShortcutOnly(info)));
}
groupOffset = info.mEndAddress;
}
@@ -996,7 +1134,7 @@ public class BinaryDictInputOutput {
new FusionDictionary.DictionaryOptions());
if (null != dict) {
for (Word w : dict) {
- newDict.add(w.mWord, w.mFrequency, w.mBigrams);
+ newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mBigrams);
}
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
index 6badfd13a..759cd452d 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/CharGroupInfo.java
@@ -29,10 +29,12 @@ public class CharGroupInfo {
public final int[] mCharacters;
public final int mFrequency;
public final int mChildrenAddress;
+ public final ArrayList<PendingAttribute> mShortcutTargets;
public final ArrayList<PendingAttribute> mBigrams;
public CharGroupInfo(final int originalAddress, final int endAddress, final int flags,
final int[] characters, final int frequency, final int childrenAddress,
+ final ArrayList<PendingAttribute> shortcutTargets,
final ArrayList<PendingAttribute> bigrams) {
mOriginalAddress = originalAddress;
mEndAddress = endAddress;
@@ -40,6 +42,7 @@ public class CharGroupInfo {
mCharacters = characters;
mFrequency = frequency;
mChildrenAddress = childrenAddress;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
}
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java
index 1ba01075e..2fcd5750a 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java
@@ -39,11 +39,13 @@ public class DictionaryMaker {
private final static String OPTION_VERSION_2 = "-2";
private final static String OPTION_INPUT_SOURCE = "-s";
private final static String OPTION_INPUT_BIGRAM_XML = "-b";
+ private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
private final static String OPTION_OUTPUT_BINARY = "-d";
private final static String OPTION_OUTPUT_XML = "-x";
private final static String OPTION_HELP = "-h";
public final String mInputBinary;
public final String mInputUnigramXml;
+ public final String mInputShortcutXml;
public final String mInputBigramXml;
public final String mOutputBinary;
public final String mOutputXml;
@@ -72,8 +74,9 @@ public class DictionaryMaker {
private void displayHelp() {
MakedictLog.i("Usage: makedict "
- + "[-s <unigrams.xml> [-b <bigrams.xml>] | -s <binary input>] "
- + " [-d <binary output>] [-x <xml output>] [-2]\n"
+ + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
+ + "| -s <binary input>] "
+ + "[-d <binary output>] [-x <xml output>] [-2]\n"
+ "\n"
+ " Converts a source dictionary file to one or several outputs.\n"
+ " Source can be an XML file, with an optional XML bigrams file, or a\n"
@@ -90,6 +93,7 @@ public class DictionaryMaker {
}
String inputBinary = null;
String inputUnigramXml = null;
+ String inputShortcutXml = null;
String inputBigramXml = null;
String outputBinary = null;
String outputXml = null;
@@ -105,7 +109,8 @@ public class DictionaryMaker {
} else {
// All these options need an argument
if (args.isEmpty()) {
- throw new RuntimeException("Option " + arg + " requires an argument");
+ throw new IllegalArgumentException("Option " + arg + " is unknown or "
+ + "requires an argument");
}
String filename = args.get(0);
args.remove(0);
@@ -115,12 +120,16 @@ public class DictionaryMaker {
} else {
inputUnigramXml = filename;
}
+ } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
+ inputShortcutXml = filename;
} else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
inputBigramXml = filename;
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
outputBinary = filename;
} else if (OPTION_OUTPUT_XML.equals(arg)) {
outputXml = filename;
+ } else {
+ throw new IllegalArgumentException("Unknown option : " + arg);
}
}
} else {
@@ -133,13 +142,14 @@ public class DictionaryMaker {
} else if (null == outputBinary) {
outputBinary = arg;
} else {
- throw new RuntimeException("Several output binary files specified");
+ throw new IllegalArgumentException("Several output binary files specified");
}
}
}
mInputBinary = inputBinary;
mInputUnigramXml = inputUnigramXml;
+ mInputShortcutXml = inputShortcutXml;
mInputBigramXml = inputBigramXml;
mOutputBinary = outputBinary;
mOutputXml = outputXml;
@@ -167,7 +177,7 @@ public class DictionaryMaker {
if (null != args.mInputBinary) {
return readBinaryFile(args.mInputBinary);
} else if (null != args.mInputUnigramXml) {
- return readXmlFile(args.mInputUnigramXml, args.mInputBigramXml);
+ return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml);
} else {
throw new RuntimeException("No input file specified");
}
@@ -192,6 +202,7 @@ public class DictionaryMaker {
* Read a dictionary from a unigram XML file, and optionally a bigram XML file.
*
* @param unigramXmlFilename the name of the unigram XML file. May not be null.
+ * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none.
* @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
* @return the read dictionary.
* @throws FileNotFoundException if one of the files can't be found
@@ -200,12 +211,14 @@ public class DictionaryMaker {
* @throws ParserConfigurationException if the system can't create a SAX parser
*/
private static FusionDictionary readXmlFile(final String unigramXmlFilename,
- final String bigramXmlFilename) throws FileNotFoundException, SAXException,
- IOException, ParserConfigurationException {
+ final String shortcutXmlFilename, final String bigramXmlFilename)
+ throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
+ final FileInputStream shortcuts = null == shortcutXmlFilename ? null :
+ new FileInputStream(new File(shortcutXmlFilename));
final FileInputStream bigrams = null == bigramXmlFilename ? null :
new FileInputStream(new File(bigramXmlFilename));
- return XmlDictInputOutput.readDictionaryXml(unigrams, bigrams);
+ return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
}
/**
diff --git a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
index f6220eea2..08143d3ea 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
@@ -68,7 +68,7 @@ public class FusionDictionary implements Iterable<Word> {
}
/**
- * A group of characters, with a frequency, shortcuts, bigrams, and children.
+ * A group of characters, with a frequency, shortcut targets, bigrams, and children.
*
* This is the central class of the in-memory representation. A CharGroup is what can
* be seen as a traditional "trie node", except it can hold several characters at the
@@ -82,25 +82,39 @@ public class FusionDictionary implements Iterable<Word> {
public static class CharGroup {
public static final int NOT_A_TERMINAL = -1;
final int mChars[];
+ final ArrayList<WeightedString> mShortcutTargets;
final ArrayList<WeightedString> mBigrams;
final int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
+ final boolean mIsShortcutOnly; // Only valid if this is a terminal.
Node mChildren;
// The two following members to help with binary generation
int mCachedSize;
int mCachedAddress;
- public CharGroup(final int[] chars,
- final ArrayList<WeightedString> bigrams, final int frequency) {
+ public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isShortcutOnly) {
mChars = chars;
mFrequency = frequency;
+ mIsShortcutOnly = isShortcutOnly;
+ if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
+ throw new RuntimeException("A node must be a terminal to be a shortcut only");
+ }
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
}
- public CharGroup(final int[] chars,
- final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
+ public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams, final int frequency, final Node children,
+ final boolean isShortcutOnly) {
mChars = chars;
mFrequency = frequency;
+ mIsShortcutOnly = isShortcutOnly;
+ if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
+ throw new RuntimeException("A node must be a terminal to be a shortcut only");
+ }
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = children;
}
@@ -150,13 +164,31 @@ public class FusionDictionary implements Iterable<Word> {
static private int[] getCodePoints(String word) {
final int wordLength = word.length();
int[] array = new int[word.codePointCount(0, wordLength)];
- for (int i = 0; i < wordLength; ++i) {
+ for (int i = 0; i < wordLength; i = word.offsetByCodePoints(i, 1)) {
array[i] = word.codePointAt(i);
}
return array;
}
/**
+ * Helper method to add all words in a list as 0-frequency entries
+ *
+ * These words are added when shortcuts targets or bigrams are not found in the dictionary
+ * yet. The same words may be added later with an actual frequency - this is handled by
+ * the private version of add().
+ */
+ private void addNeutralWords(final ArrayList<WeightedString> words) {
+ if (null != words) {
+ for (WeightedString word : words) {
+ final CharGroup t = findWordInTree(mRoot, word.mWord);
+ if (null == t) {
+ add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */);
+ }
+ }
+ }
+ }
+
+ /**
* Helper method to add a word as a string.
*
* This method adds a word to the dictionary with the given frequency. Optional
@@ -165,18 +197,19 @@ public class FusionDictionary implements Iterable<Word> {
*
* @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255].
+ * @param shortcutTargets a list of shortcut targets for this word, or null.
* @param bigrams a list of bigrams, or null.
*/
- public void add(String word, int frequency, ArrayList<WeightedString> bigrams) {
+ public void add(final String word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams) {
+ if (null != shortcutTargets) {
+ addNeutralWords(shortcutTargets);
+ }
if (null != bigrams) {
- for (WeightedString bigram : bigrams) {
- final CharGroup t = findWordInTree(mRoot, bigram.mWord);
- if (null == t) {
- add(getCodePoints(bigram.mWord), 0, null);
- }
- }
+ addNeutralWords(bigrams);
}
- add(getCodePoints(word), frequency, bigrams);
+ add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */);
}
/**
@@ -198,16 +231,37 @@ public class FusionDictionary implements Iterable<Word> {
}
/**
+ * Helper method to add a shortcut that should not be a dictionary word.
+ *
+ * @param word the word to add.
+ * @param frequency the frequency of the word, in the range [0..255].
+ * @param shortcutTargets a list of shortcut targets. May not be null.
+ */
+ public void addShortcutOnly(final String word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets) {
+ if (null == shortcutTargets) {
+ throw new RuntimeException("Can't add a shortcut without targets");
+ }
+ addNeutralWords(shortcutTargets);
+ add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */);
+ }
+
+ /**
* Add a word to this dictionary.
*
- * The bigrams, if any, have to be in the dictionary already. If they aren't,
+ * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't,
* an exception is thrown.
*
* @param word the word, as an int array.
* @param frequency the frequency of the word, in the range [0..255].
+ * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param bigrams an optional list of bigrams for this word (null if none).
+ * @param isShortcutOnly whether this should be a shortcut only.
*/
- private void add(int[] word, int frequency, ArrayList<WeightedString> bigrams) {
+ private void add(final int[] word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams,
+ final boolean isShortcutOnly) {
assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot;
int charIndex = 0;
@@ -231,7 +285,8 @@ public class FusionDictionary implements Iterable<Word> {
// No node at this point to accept the word. Create one.
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup(
- Arrays.copyOfRange(word, charIndex, word.length), bigrams, frequency);
+ Arrays.copyOfRange(word, charIndex, word.length),
+ shortcutTargets, bigrams, frequency, isShortcutOnly);
currentNode.mData.add(insertionIndex, newGroup);
checkStack(currentNode);
} else {
@@ -245,7 +300,8 @@ public class FusionDictionary implements Iterable<Word> {
+ new String(word, 0, word.length));
} else {
final CharGroup newNode = new CharGroup(currentGroup.mChars,
- bigrams, frequency, currentGroup.mChildren);
+ shortcutTargets, bigrams, frequency, currentGroup.mChildren,
+ isShortcutOnly);
currentNode.mData.set(nodeIndex, newNode);
checkStack(currentNode);
}
@@ -254,13 +310,13 @@ public class FusionDictionary implements Iterable<Word> {
// We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- bigrams, frequency);
+ shortcutTargets, bigrams, frequency, isShortcutOnly);
currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode);
}
} else {
if (0 == differentCharIndex) {
- // Exact same word. Check the frequency is 0 or -1, and update.
+ // Exact same word. Check the frequency is 0 or NOT_A_TERMINAL, and update.
if (0 != frequency) {
if (0 < currentGroup.mFrequency) {
throw new RuntimeException("This word already exists with frequency "
@@ -268,7 +324,9 @@ public class FusionDictionary implements Iterable<Word> {
+ new String(word, 0, word.length));
}
final CharGroup newGroup = new CharGroup(word,
- currentGroup.mBigrams, frequency, currentGroup.mChildren);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams,
+ frequency, currentGroup.mChildren,
+ currentGroup.mIsShortcutOnly && isShortcutOnly);
currentNode.mData.set(nodeIndex, newGroup);
}
} else {
@@ -277,22 +335,27 @@ public class FusionDictionary implements Iterable<Word> {
Node newChildren = new Node();
final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
- currentGroup.mChars.length),
- currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
+ currentGroup.mChars.length), currentGroup.mShortcutTargets,
+ currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren,
+ currentGroup.mIsShortcutOnly);
newChildren.mData.add(newOldWord);
final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- bigrams, frequency, newChildren);
+ shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly);
} else {
+ // isShortcutOnly makes no sense for non-terminal nodes. The following node
+ // is non-terminal (frequency 0 in FusionDictionary representation) so we
+ // pass false for isShortcutOnly
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- null, -1, newChildren);
+ null, null, -1, newChildren, false /* isShortcutOnly */);
final CharGroup newWord = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex,
- word.length), bigrams, frequency);
+ word.length), shortcutTargets, bigrams, frequency,
+ isShortcutOnly);
final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
@@ -355,7 +418,8 @@ public class FusionDictionary implements Iterable<Word> {
*/
private static int findInsertionIndex(final Node node, int character) {
final List data = node.mData;
- final CharGroup reference = new CharGroup(new int[] { character }, null, 0);
+ final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0,
+ false /* isShortcutOnly */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1;
}
@@ -399,6 +463,16 @@ public class FusionDictionary implements Iterable<Word> {
}
/**
+ * Helper method to find out whether a word is in the dict or not.
+ */
+ public boolean hasWord(final String s) {
+ if (null == s || "".equals(s)) {
+ throw new RuntimeException("Can't search for a null or empty string");
+ }
+ return null != findWordInTree(mRoot, s);
+ }
+
+ /**
* Recursively count the number of character groups in a given branch of the trie.
*
* @param node the parent node.
@@ -573,7 +647,8 @@ public class FusionDictionary implements Iterable<Word> {
}
if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
- currentGroup.mBigrams);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams,
+ currentGroup.mIsShortcutOnly);
} else {
mPositions.removeLast();
currentPos = mPositions.getLast();
diff --git a/tools/makedict/src/com/android/inputmethod/latin/Word.java b/tools/makedict/src/com/android/inputmethod/latin/Word.java
index 916165a41..cf6116f91 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/Word.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/Word.java
@@ -28,12 +28,18 @@ import java.util.ArrayList;
public class Word implements Comparable<Word> {
final String mWord;
final int mFrequency;
+ final boolean mIsShortcutOnly;
+ final ArrayList<WeightedString> mShortcutTargets;
final ArrayList<WeightedString> mBigrams;
- public Word(String word, int frequency, ArrayList<WeightedString> bigrams) {
+ public Word(final String word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams, final boolean isShortcutOnly) {
mWord = word;
mFrequency = frequency;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
+ mIsShortcutOnly = isShortcutOnly;
}
/**
@@ -60,6 +66,7 @@ public class Word implements Comparable<Word> {
if (!(o instanceof Word)) return false;
Word w = (Word)o;
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
+ && mShortcutTargets.equals(w.mShortcutTargets)
&& mBigrams.equals(w.mBigrams);
}
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
index 35a7b51d6..77c536668 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
@@ -42,8 +42,12 @@ public class XmlDictInputOutput {
private static final String WORD_TAG = "w";
private static final String BIGRAM_TAG = "bigram";
+ private static final String SHORTCUT_TAG = "shortcut";
private static final String FREQUENCY_ATTR = "f";
private static final String WORD_ATTR = "word";
+ private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly";
+
+ private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
/**
* SAX handler for a unigram XML file.
@@ -61,6 +65,7 @@ public class XmlDictInputOutput {
int mState; // the state of the parser
int mFreq; // the currently read freq
String mWord; // the current word
+ final HashMap<String, ArrayList<WeightedString>> mShortcutsMap;
final HashMap<String, ArrayList<WeightedString>> mBigramsMap;
/**
@@ -69,9 +74,11 @@ public class XmlDictInputOutput {
* @param dict the dictionary to construct.
* @param bigrams the bigrams as a map. This may be empty, but may not be null.
*/
- public UnigramHandler(FusionDictionary dict,
- HashMap<String, ArrayList<WeightedString>> bigrams) {
+ public UnigramHandler(final FusionDictionary dict,
+ final HashMap<String, ArrayList<WeightedString>> shortcuts,
+ final HashMap<String, ArrayList<WeightedString>> bigrams) {
mDictionary = dict;
+ mShortcutsMap = shortcuts;
mBigramsMap = bigrams;
mWord = "";
mState = START;
@@ -107,47 +114,96 @@ public class XmlDictInputOutput {
@Override
public void endElement(String uri, String localName, String qName) {
if (WORD == mState) {
- mDictionary.add(mWord, mFreq, mBigramsMap.get(mWord));
+ mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), mBigramsMap.get(mWord));
mState = START;
}
}
}
+ static private class AssociativeListHandler extends DefaultHandler {
+ private final String SRC_TAG;
+ private final String SRC_ATTRIBUTE;
+ private final String DST_TAG;
+ private final String DST_ATTRIBUTE;
+ private final String DST_FREQ;
+
+ // In this version of the XML file, the bigram frequency is given as an int 0..XML_MAX
+ private final static int XML_MAX = 256;
+ // In memory and in the binary dictionary the bigram frequency is 0..MEMORY_MAX
+ private final static int MEMORY_MAX = 16;
+ private final static int XML_TO_MEMORY_RATIO = XML_MAX / MEMORY_MAX;
+
+ private String mSrc;
+ private final HashMap<String, ArrayList<WeightedString>> mAssocMap;
+
+ public AssociativeListHandler(final String srcTag, final String srcAttribute,
+ final String dstTag, final String dstAttribute, final String dstFreq) {
+ SRC_TAG = srcTag;
+ SRC_ATTRIBUTE = srcAttribute;
+ DST_TAG = dstTag;
+ DST_ATTRIBUTE = dstAttribute;
+ DST_FREQ = dstFreq;
+ mSrc = null;
+ mAssocMap = new HashMap<String, ArrayList<WeightedString>>();
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attrs) {
+ if (SRC_TAG.equals(localName)) {
+ mSrc = attrs.getValue(uri, SRC_ATTRIBUTE);
+ } else if (DST_TAG.equals(localName)) {
+ String dst = attrs.getValue(uri, DST_ATTRIBUTE);
+ int freq = Integer.parseInt(attrs.getValue(uri, DST_FREQ));
+ WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
+ ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
+ if (null == bigramList) bigramList = new ArrayList<WeightedString>();
+ bigramList.add(bigram);
+ mAssocMap.put(mSrc, bigramList);
+ }
+ }
+
+ public HashMap<String, ArrayList<WeightedString>> getAssocMap() {
+ return mAssocMap;
+ }
+ }
+
/**
* SAX handler for a bigram XML file.
*/
- static private class BigramHandler extends DefaultHandler {
+ static private class BigramHandler extends AssociativeListHandler {
private final static String BIGRAM_W1_TAG = "bi";
private final static String BIGRAM_W2_TAG = "w";
private final static String BIGRAM_W1_ATTRIBUTE = "w1";
private final static String BIGRAM_W2_ATTRIBUTE = "w2";
private final static String BIGRAM_FREQ_ATTRIBUTE = "p";
- String mW1;
- final HashMap<String, ArrayList<WeightedString>> mBigramsMap;
-
public BigramHandler() {
- mW1 = null;
- mBigramsMap = new HashMap<String, ArrayList<WeightedString>>();
+ super(BIGRAM_W1_TAG, BIGRAM_W1_ATTRIBUTE, BIGRAM_W2_TAG, BIGRAM_W2_ATTRIBUTE,
+ BIGRAM_FREQ_ATTRIBUTE);
}
- @Override
- public void startElement(String uri, String localName, String qName, Attributes attrs) {
- if (BIGRAM_W1_TAG.equals(localName)) {
- mW1 = attrs.getValue(uri, BIGRAM_W1_ATTRIBUTE);
- } else if (BIGRAM_W2_TAG.equals(localName)) {
- String w2 = attrs.getValue(uri, BIGRAM_W2_ATTRIBUTE);
- int freq = Integer.parseInt(attrs.getValue(uri, BIGRAM_FREQ_ATTRIBUTE));
- WeightedString bigram = new WeightedString(w2, freq / 8);
- ArrayList<WeightedString> bigramList = mBigramsMap.get(mW1);
- if (null == bigramList) bigramList = new ArrayList<WeightedString>();
- bigramList.add(bigram);
- mBigramsMap.put(mW1, bigramList);
- }
+ public HashMap<String, ArrayList<WeightedString>> getBigramMap() {
+ return getAssocMap();
}
+ }
- public HashMap<String, ArrayList<WeightedString>> getBigramMap() {
- return mBigramsMap;
+ /**
+ * SAX handler for a shortcut XML file.
+ */
+ static private class ShortcutHandler extends AssociativeListHandler {
+ private final static String ENTRY_TAG = "entry";
+ private final static String ENTRY_ATTRIBUTE = "shortcut";
+ private final static String TARGET_TAG = "target";
+ private final static String REPLACEMENT_ATTRIBUTE = "replacement";
+ private final static String TARGET_PRIORITY_ATTRIBUTE = "priority";
+
+ public ShortcutHandler() {
+ super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE,
+ TARGET_PRIORITY_ATTRIBUTE);
+ }
+
+ public HashMap<String, ArrayList<WeightedString>> getShortcutMap() {
+ return getAssocMap();
}
}
@@ -158,9 +214,12 @@ public class XmlDictInputOutput {
* representation.
*
* @param unigrams the file to read the data from.
+ * @param shortcuts the file to read the shortcuts from, or null.
+ * @param bigrams the file to read the bigrams from, or null.
* @return the in-memory representation of the dictionary.
*/
- public static FusionDictionary readDictionaryXml(InputStream unigrams, InputStream bigrams)
+ public static FusionDictionary readDictionaryXml(final InputStream unigrams,
+ final InputStream shortcuts, final InputStream bigrams)
throws SAXException, IOException, ParserConfigurationException {
final SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
@@ -168,10 +227,23 @@ public class XmlDictInputOutput {
final BigramHandler bigramHandler = new BigramHandler();
if (null != bigrams) parser.parse(bigrams, bigramHandler);
+ final ShortcutHandler shortcutHandler = new ShortcutHandler();
+ if (null != shortcuts) parser.parse(shortcuts, shortcutHandler);
+
final FusionDictionary dict = new FusionDictionary();
final UnigramHandler unigramHandler =
- new UnigramHandler(dict, bigramHandler.getBigramMap());
+ new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
+ bigramHandler.getBigramMap());
parser.parse(unigrams, unigramHandler);
+
+ final HashMap<String, ArrayList<WeightedString>> shortcutMap =
+ shortcutHandler.getShortcutMap();
+ for (final String shortcut : shortcutMap.keySet()) {
+ if (dict.hasWord(shortcut)) continue;
+ // TODO: list a frequency in the shortcut file and use it here, instead of
+ // a constant freq
+ dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
+ }
return dict;
}
@@ -204,9 +276,20 @@ public class XmlDictInputOutput {
}
// TODO: use an XMLSerializer if this gets big
destination.write("<wordlist format=\"2\">\n");
+ destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
for (Word word : set) {
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
- + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
+ + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\" " + SHORTCUT_ONLY_ATTR
+ + "=\"" + word.mIsShortcutOnly + "\">");
+ if (null != word.mShortcutTargets) {
+ destination.write("\n");
+ for (WeightedString target : word.mShortcutTargets) {
+ destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
+ + target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ + ">\n");
+ }
+ destination.write(" ");
+ }
if (null != word.mBigrams) {
destination.write("\n");
for (WeightedString bigram : word.mBigrams) {
diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
index 79cf14b2b..6ac046bbf 100644
--- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
+++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
@@ -39,11 +39,11 @@ public class BinaryDictInputOutputTest extends TestCase {
// that it does not contain any duplicates.
public void testFlattenNodes() {
final FusionDictionary dict = new FusionDictionary();
- dict.add("foo", 1, null);
- dict.add("fta", 1, null);
- dict.add("ftb", 1, null);
- dict.add("bar", 1, null);
- dict.add("fool", 1, null);
+ dict.add("foo", 1, null, null);
+ dict.add("fta", 1, null, null);
+ dict.add("ftb", 1, null, null);
+ dict.add("bar", 1, null, null);
+ dict.add("fool", 1, null, null);
final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
assertEquals(4, result.size());
while (!result.isEmpty()) {