aboutsummaryrefslogtreecommitdiffstats
path: root/tools/dicttool
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dicttool')
-rw-r--r--tools/dicttool/Android.mk31
-rw-r--r--tools/dicttool/etc/Android.mk19
-rwxr-xr-xtools/dicttool/etc/dicttool_aosp62
-rwxr-xr-xtools/dicttool/etc/makedict_aosp18
-rw-r--r--tools/dicttool/etc/manifest.txt1
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java22
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java26
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java98
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java336
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java94
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/Info.java36
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/Makedict.java40
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/MakedictLog.java44
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java371
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java59
-rwxr-xr-xtools/dicttool/tests/etc/test-dicttool.sh16
16 files changed, 1273 insertions, 0 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
new file mode 100644
index 000000000..b0b47af00
--- /dev/null
+++ b/tools/dicttool/Android.mk
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+MAKEDICT_CORE_SOURCE_DIRECTORY := ../../java/src/com/android/inputmethod/latin/makedict
+
+LOCAL_MAIN_SRC_FILES := $(call all-java-files-under,$(MAKEDICT_CORE_SOURCE_DIRECTORY))
+LOCAL_TOOL_SRC_FILES := $(call all-java-files-under,src)
+LOCAL_SRC_FILES := $(LOCAL_TOOL_SRC_FILES) \
+ $(filter-out $(addprefix %/, $(notdir $(LOCAL_TOOL_SRC_FILES))), $(LOCAL_MAIN_SRC_FILES)) \
+ $(call all-java-files-under,tests)
+LOCAL_JAR_MANIFEST := etc/manifest.txt
+LOCAL_MODULE := dicttool_aosp
+LOCAL_JAVA_LIBRARIES := junit
+
+include $(BUILD_HOST_JAVA_LIBRARY)
+include $(LOCAL_PATH)/etc/Android.mk
diff --git a/tools/dicttool/etc/Android.mk b/tools/dicttool/etc/Android.mk
new file mode 100644
index 000000000..0c611b7e9
--- /dev/null
+++ b/tools/dicttool/etc/Android.mk
@@ -0,0 +1,19 @@
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_PREBUILT_EXECUTABLES := dicttool_aosp makedict_aosp
+include $(BUILD_HOST_PREBUILT)
diff --git a/tools/dicttool/etc/dicttool_aosp b/tools/dicttool/etc/dicttool_aosp
new file mode 100755
index 000000000..a4879a279
--- /dev/null
+++ b/tools/dicttool/etc/dicttool_aosp
@@ -0,0 +1,62 @@
+#!/bin/sh
+# Copyright 2011, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Set up prog to be the path of this script, including following symlinks,
+# and set up progdir to be the fully-qualified pathname of its directory.
+prog="$0"
+while [ -h "${prog}" ]; do
+ newProg=`/bin/ls -ld "${prog}"`
+ newProg=`expr "${newProg}" : ".* -> \(.*\)$"`
+ if expr "x${newProg}" : 'x/' >/dev/null; then
+ prog="${newProg}"
+ else
+ progdir=`dirname "${prog}"`
+ prog="${progdir}/${newProg}"
+ fi
+done
+oldwd=`pwd`
+progdir=`dirname "${prog}"`
+cd "${progdir}"
+progdir=`pwd`
+prog="${progdir}"/`basename "${prog}"`
+cd "${oldwd}"
+
+jarfile=dicttool_aosp.jar
+frameworkdir="$progdir"
+if [ ! -r "$frameworkdir/$jarfile" ]
+then
+ frameworkdir=`dirname "$progdir"`/tools/lib
+ libdir=`dirname "$progdir"`/tools/lib
+fi
+if [ ! -r "$frameworkdir/$jarfile" ]
+then
+ frameworkdir=`dirname "$progdir"`/framework
+ libdir=`dirname "$progdir"`/lib
+fi
+if [ ! -r "$frameworkdir/$jarfile" ]
+then
+ echo `basename "$prog"`": can't find $jarfile"
+ exit 1
+fi
+
+if [ "$OSTYPE" = "cygwin" ] ; then
+ jarpath=`cygpath -w "$frameworkdir/$jarfile"`
+ progdir=`cygpath -w "$progdir"`
+else
+ jarpath="$frameworkdir/$jarfile"
+fi
+
+# might need more memory, e.g. -Xmx128M
+exec java -ea -jar "$jarpath" "$@"
diff --git a/tools/dicttool/etc/makedict_aosp b/tools/dicttool/etc/makedict_aosp
new file mode 100755
index 000000000..095c50538
--- /dev/null
+++ b/tools/dicttool/etc/makedict_aosp
@@ -0,0 +1,18 @@
+#!/bin/sh
+# Copyright (C) 2012, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dicttool supports making the dictionary using the 'makedict' command and
+# the same arguments that the old 'makedict' command used to accept.
+dicttool_aosp makedict $@
diff --git a/tools/dicttool/etc/manifest.txt b/tools/dicttool/etc/manifest.txt
new file mode 100644
index 000000000..67c85214c
--- /dev/null
+++ b/tools/dicttool/etc/manifest.txt
@@ -0,0 +1 @@
+Main-Class: com.android.inputmethod.latin.dicttool.Dicttool
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java
new file mode 100644
index 000000000..8d4eb751b
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java
@@ -0,0 +1,22 @@
+/**
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+public class AdditionalCommandList {
+ public static void populate() {
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java
new file mode 100644
index 000000000..d16b069fe
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+public class CommandList {
+ public static void populate() {
+ Dicttool.addCommand("info", Info.class);
+ Dicttool.addCommand("compress", Compress.Compressor.class);
+ Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
+ Dicttool.addCommand("makedict", Makedict.class);
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java
new file mode 100644
index 000000000..3cb0a12c4
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java
@@ -0,0 +1,98 @@
+/**
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+public class Compress {
+
+ private static OutputStream getCompressedStream(final OutputStream out)
+ throws java.io.IOException {
+ return new GZIPOutputStream(out);
+ }
+
+ private static InputStream getUncompressedStream(final InputStream in) throws IOException {
+ return new GZIPInputStream(in);
+ }
+
+ public static void copy(final InputStream input, final OutputStream output) throws IOException {
+ final byte[] buffer = new byte[1000];
+ for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer))
+ output.write(buffer, 0, readBytes);
+ input.close();
+ output.close();
+ }
+
+ static public class Compressor extends Dicttool.Command {
+ public static final String COMMAND = "compress";
+ public static final String STDIN_OR_STDOUT = "-";
+
+ public Compressor() {
+ }
+
+ public String getHelp() {
+ return COMMAND + " <src_filename> <dst_filename>: "
+ + "Compresses a file using gzip compression";
+ }
+
+ public void run() throws IOException {
+ if (mArgs.length > 2) {
+ throw new RuntimeException("Too many arguments for command " + COMMAND);
+ }
+ final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
+ final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
+ final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
+ : new FileInputStream(new File(inFilename));
+ final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
+ : new FileOutputStream(new File(outFilename));
+ copy(input, new GZIPOutputStream(output));
+ }
+ }
+
+ static public class Uncompressor extends Dicttool.Command {
+ public static final String COMMAND = "uncompress";
+ public static final String STDIN_OR_STDOUT = "-";
+
+ public Uncompressor() {
+ }
+
+ public String getHelp() {
+ return COMMAND + " <src_filename> <dst_filename>: "
+ + "Uncompresses a file compressed with gzip compression";
+ }
+
+ public void run() throws IOException {
+ if (mArgs.length > 2) {
+ throw new RuntimeException("Too many arguments for command " + COMMAND);
+ }
+ final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
+ final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
+ final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
+ : new FileInputStream(new File(inFilename));
+ final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
+ : new FileOutputStream(new File(outFilename));
+ copy(new GZIPInputStream(input), output);
+ }
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
new file mode 100644
index 000000000..fbfc1dabb
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.MakedictLog;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.Arrays;
+import java.util.LinkedList;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+/**
+ * Main class/method for DictionaryMaker.
+ */
+public class DictionaryMaker {
+
+ static class Arguments {
+ private final static String OPTION_VERSION_2 = "-2";
+ private final static String OPTION_INPUT_SOURCE = "-s";
+ private final static String OPTION_INPUT_BIGRAM_XML = "-b";
+ private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
+ private final static String OPTION_OUTPUT_BINARY = "-d";
+ private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1";
+ private final static String OPTION_OUTPUT_XML = "-x";
+ private final static String OPTION_HELP = "-h";
+ public final String mInputBinary;
+ public final String mInputUnigramXml;
+ public final String mInputShortcutXml;
+ public final String mInputBigramXml;
+ public final String mOutputBinary;
+ public final String mOutputBinaryFormat1;
+ public final String mOutputXml;
+
+ private void checkIntegrity() throws IOException {
+ checkHasExactlyOneInput();
+ checkHasAtLeastOneOutput();
+ checkNotSameFile(mInputBinary, mOutputBinary);
+ checkNotSameFile(mInputBinary, mOutputBinaryFormat1);
+ checkNotSameFile(mInputBinary, mOutputXml);
+ checkNotSameFile(mInputUnigramXml, mOutputBinary);
+ checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1);
+ checkNotSameFile(mInputUnigramXml, mOutputXml);
+ checkNotSameFile(mInputShortcutXml, mOutputBinary);
+ checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1);
+ checkNotSameFile(mInputShortcutXml, mOutputXml);
+ checkNotSameFile(mInputBigramXml, mOutputBinary);
+ checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1);
+ checkNotSameFile(mInputBigramXml, mOutputXml);
+ checkNotSameFile(mOutputBinary, mOutputBinaryFormat1);
+ checkNotSameFile(mOutputBinary, mOutputXml);
+ checkNotSameFile(mOutputBinaryFormat1, mOutputXml);
+ }
+
+ private void checkHasExactlyOneInput() {
+ if (null == mInputUnigramXml && null == mInputBinary) {
+ throw new RuntimeException("No input file specified");
+ } else if (null != mInputUnigramXml && null != mInputBinary) {
+ throw new RuntimeException("Both input XML and binary specified");
+ } else if (null != mInputBinary && null != mInputBigramXml) {
+ throw new RuntimeException("Cannot specify a binary input and a separate bigram "
+ + "file");
+ }
+ }
+
+ private void checkHasAtLeastOneOutput() {
+ if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) {
+ throw new RuntimeException("No output specified");
+ }
+ }
+
+ /**
+ * Utility method that throws an exception if path1 and path2 point to the same file.
+ */
+ private static void checkNotSameFile(final String path1, final String path2)
+ throws IOException {
+ if (null == path1 || null == path2) return;
+ if (new File(path1).getCanonicalPath().equals(new File(path2).getCanonicalPath())) {
+ throw new RuntimeException(path1 + " and " + path2 + " are the same file: "
+ + " refusing to process.");
+ }
+ }
+
+ private void displayHelp() {
+ MakedictLog.i(getHelp());
+ }
+
+ public static String getHelp() {
+ return "Usage: makedict "
+ + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] "
+ + "| -s <binary input>] [-d <binary output format version 2>] "
+ + "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
+ + "\n"
+ + " Converts a source dictionary file to one or several outputs.\n"
+ + " Source can be an XML file, with an optional XML bigrams file, or a\n"
+ + " binary dictionary file.\n"
+ + " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n"
+ + " are supported. All three can be output at the same time, but the same\n"
+ + " output format cannot be specified several times. The behavior is\n"
+ + " unspecified if the same file is specified for input and output, or for\n"
+ + " several outputs.";
+ }
+
+ public Arguments(String[] argsArray) throws IOException {
+ final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
+ if (args.isEmpty()) {
+ displayHelp();
+ }
+ String inputBinary = null;
+ String inputUnigramXml = null;
+ String inputShortcutXml = null;
+ String inputBigramXml = null;
+ String outputBinary = null;
+ String outputBinaryFormat1 = null;
+ String outputXml = null;
+
+ while (!args.isEmpty()) {
+ final String arg = args.get(0);
+ args.remove(0);
+ if (arg.charAt(0) == '-') {
+ if (OPTION_VERSION_2.equals(arg)) {
+ // Do nothing, this is the default
+ } else if (OPTION_HELP.equals(arg)) {
+ displayHelp();
+ } else {
+ // All these options need an argument
+ if (args.isEmpty()) {
+ throw new IllegalArgumentException("Option " + arg + " is unknown or "
+ + "requires an argument");
+ }
+ String filename = args.get(0);
+ args.remove(0);
+ if (OPTION_INPUT_SOURCE.equals(arg)) {
+ if (BinaryDictInputOutput.isBinaryDictionary(filename)) {
+ inputBinary = filename;
+ } else {
+ inputUnigramXml = filename;
+ }
+ } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
+ inputShortcutXml = filename;
+ } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
+ inputBigramXml = filename;
+ } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
+ outputBinary = filename;
+ } else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) {
+ outputBinaryFormat1 = filename;
+ } else if (OPTION_OUTPUT_XML.equals(arg)) {
+ outputXml = filename;
+ } else {
+ throw new IllegalArgumentException("Unknown option : " + arg);
+ }
+ }
+ } else {
+ if (null == inputBinary && null == inputUnigramXml) {
+ if (BinaryDictInputOutput.isBinaryDictionary(arg)) {
+ inputBinary = arg;
+ } else {
+ inputUnigramXml = arg;
+ }
+ } else if (null == outputBinary) {
+ outputBinary = arg;
+ } else {
+ throw new IllegalArgumentException("Several output binary files specified");
+ }
+ }
+ }
+
+ mInputBinary = inputBinary;
+ mInputUnigramXml = inputUnigramXml;
+ mInputShortcutXml = inputShortcutXml;
+ mInputBigramXml = inputBigramXml;
+ mOutputBinary = outputBinary;
+ mOutputBinaryFormat1 = outputBinaryFormat1;
+ mOutputXml = outputXml;
+ checkIntegrity();
+ }
+ }
+
+ public static void main(String[] args)
+ throws FileNotFoundException, ParserConfigurationException, SAXException, IOException,
+ UnsupportedFormatException {
+ final Arguments parsedArgs = new Arguments(args);
+ FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs);
+ writeOutputToParsedArgs(parsedArgs, dictionary);
+ }
+
+ /**
+ * Invoke the right input method according to args.
+ *
+ * @param args the parsed command line arguments.
+ * @return the read dictionary.
+ */
+ private static FusionDictionary readInputFromParsedArgs(final Arguments args)
+ throws IOException, UnsupportedFormatException, ParserConfigurationException,
+ SAXException, FileNotFoundException {
+ if (null != args.mInputBinary) {
+ return readBinaryFile(args.mInputBinary);
+ } else if (null != args.mInputUnigramXml) {
+ return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml);
+ } else {
+ throw new RuntimeException("No input file specified");
+ }
+ }
+
+ /**
+ * Read a dictionary from the name of a binary file.
+ *
+ * @param binaryFilename the name of the file in the binary dictionary format.
+ * @return the read dictionary.
+ * @throws FileNotFoundException if the file can't be found
+ * @throws IOException if the input file can't be read
+ * @throws UnsupportedFormatException if the binary file is not in the expected format
+ */
+ private static FusionDictionary readBinaryFile(final String binaryFilename)
+ throws FileNotFoundException, IOException, UnsupportedFormatException {
+ FileInputStream inStream = null;
+
+ try {
+ final File file = new File(binaryFilename);
+ inStream = new FileInputStream(file);
+ final ByteBuffer buffer = inStream.getChannel().map(
+ FileChannel.MapMode.READ_ONLY, 0, file.length());
+ return BinaryDictInputOutput.readDictionaryBinary(buffer, null);
+ } finally {
+ if (inStream != null) {
+ try {
+ inStream.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+ }
+
+ /**
+ * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
+ *
+ * @param unigramXmlFilename the name of the unigram XML file. May not be null.
+ * @param shortcutXmlFilename the name of the shortcut/whitelist XML file, or null if none.
+ * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
+ * @return the read dictionary.
+ * @throws FileNotFoundException if one of the files can't be found
+ * @throws SAXException if one or more of the XML files is not well-formed
+ * @throws IOException if one the input files can't be read
+ * @throws ParserConfigurationException if the system can't create a SAX parser
+ */
+ private static FusionDictionary readXmlFile(final String unigramXmlFilename,
+ final String shortcutXmlFilename, final String bigramXmlFilename)
+ throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
+ final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
+ final FileInputStream shortcuts = null == shortcutXmlFilename ? null :
+ new FileInputStream(new File(shortcutXmlFilename));
+ final FileInputStream bigrams = null == bigramXmlFilename ? null :
+ new FileInputStream(new File(bigramXmlFilename));
+ return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
+ }
+
+ /**
+ * Invoke the right output method according to args.
+ *
+ * This will write the passed dictionary to the file(s) passed in the command line arguments.
+ * @param args the parsed arguments.
+ * @param dict the file to output.
+ * @throws FileNotFoundException if one of the output files can't be created.
+ * @throws IOException if one of the output files can't be written to.
+ */
+ private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
+ throws FileNotFoundException, IOException, UnsupportedFormatException,
+ IllegalArgumentException {
+ if (null != args.mOutputBinary) {
+ writeBinaryDictionary(args.mOutputBinary, dict, 2);
+ }
+ if (null != args.mOutputBinaryFormat1) {
+ writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1);
+ }
+ if (null != args.mOutputXml) {
+ writeXmlDictionary(args.mOutputXml, dict);
+ }
+ }
+
+ /**
+ * Write the dictionary in binary format to the specified filename.
+ *
+ * @param outputFilename the name of the file to write to.
+ * @param dict the dictionary to write.
+ * @param version the binary format version to use.
+ * @throws FileNotFoundException if the output file can't be created.
+ * @throws IOException if the output file can't be written to.
+ */
+ private static void writeBinaryDictionary(final String outputFilename,
+ final FusionDictionary dict, final int version)
+ throws FileNotFoundException, IOException, UnsupportedFormatException {
+ final File outputFile = new File(outputFilename);
+ BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
+ version);
+ }
+
+ /**
+ * Write the dictionary in XML format to the specified filename.
+ *
+ * @param outputFilename the name of the file to write to.
+ * @param dict the dictionary to write.
+ * @throws FileNotFoundException if the output file can't be created.
+ * @throws IOException if the output file can't be written to.
+ */
+ private static void writeXmlDictionary(final String outputFilename,
+ final FusionDictionary dict) throws FileNotFoundException, IOException {
+ XmlDictInputOutput.writeDictionaryXml(new FileWriter(outputFilename), dict);
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java
new file mode 100644
index 000000000..bf417fb5a
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java
@@ -0,0 +1,94 @@
+/**
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import java.util.Arrays;
+import java.util.HashMap;
+
+public class Dicttool {
+
+ public static abstract class Command {
+ protected String[] mArgs;
+ public void setArgs(String[] args) throws IllegalArgumentException {
+ mArgs = args;
+ }
+ abstract public String getHelp();
+ abstract public void run() throws Exception;
+ }
+ static HashMap<String, Class<? extends Command>> sCommands =
+ new HashMap<String, Class<? extends Command>>();
+ static {
+ CommandList.populate();
+ AdditionalCommandList.populate();
+ }
+ public static void addCommand(final String commandName, final Class<? extends Command> cls) {
+ sCommands.put(commandName, cls);
+ }
+
+ private static Command getCommandInstance(final String commandName) {
+ try {
+ return sCommands.get(commandName).newInstance();
+ } catch (InstantiationException e) {
+ throw new RuntimeException(commandName + " is not installed");
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(commandName + " is not installed");
+ }
+ }
+
+ private static void help() {
+ System.out.println("Syntax: dicttool <command [arguments]>\nAvailable commands:\n");
+ for (final String commandName : sCommands.keySet()) {
+ System.out.println("*** " + commandName);
+ System.out.println(getCommandInstance(commandName).getHelp());
+ System.out.println("");
+ }
+ }
+
+ private static boolean isCommand(final String commandName) {
+ return sCommands.containsKey(commandName);
+ }
+
+ private Command getCommand(final String[] arguments) {
+ final String commandName = arguments[0];
+ if (!isCommand(commandName)) {
+ throw new RuntimeException("Unknown command : " + commandName);
+ }
+ final Command command = getCommandInstance(commandName);
+ final String[] argsArray = Arrays.copyOfRange(arguments, 1, arguments.length);
+ command.setArgs(argsArray);
+ return command;
+ }
+
+ private void execute(final String[] arguments) {
+ final Command command = getCommand(arguments);
+ try {
+ command.run();
+ } catch (Exception e) {
+ System.out.println("Exception while processing command "
+ + command.getClass().getSimpleName() + " : " + e);
+ return;
+ }
+ }
+
+ public static void main(final String[] arguments) {
+ if (0 == arguments.length) {
+ help();
+ return;
+ }
+ new Dicttool().execute(arguments);
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/Info.java
new file mode 100644
index 000000000..e59261706
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/Info.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+public class Info extends Dicttool.Command {
+ public static final String COMMAND = "info";
+
+ public Info() {
+ }
+
+ public String getHelp() {
+ return "info <filename>: prints various information about a dictionary file";
+ }
+
+ public void run() {
+ // TODO: implement this
+ if (mArgs.length < 1) {
+ throw new RuntimeException("Not enough arguments for command " + COMMAND);
+ }
+ System.out.println("Not implemented yet");
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/Makedict.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/Makedict.java
new file mode 100644
index 000000000..c004cfbe4
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/Makedict.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import javax.xml.parsers.ParserConfigurationException;
+import org.xml.sax.SAXException;
+
+public class Makedict extends Dicttool.Command {
+ public static final String COMMAND = "makedict";
+
+ public Makedict() {
+ }
+
+ public String getHelp() {
+ return DictionaryMaker.Arguments.getHelp();
+ }
+
+ public void run() throws FileNotFoundException, IOException, ParserConfigurationException,
+ SAXException, UnsupportedFormatException {
+ DictionaryMaker.main(mArgs);
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/MakedictLog.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/MakedictLog.java
new file mode 100644
index 000000000..7eccff2b4
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/MakedictLog.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+/**
+ * Wrapper to redirect log events to the right output medium.
+ */
+public class MakedictLog {
+ public static final boolean DBG = true;
+
+ private static void print(String message) {
+ System.out.println(message);
+ }
+
+ public static void d(String message) {
+ print(message);
+ }
+
+ public static void i(String message) {
+ print(message);
+ }
+
+ public static void w(String message) {
+ print(message);
+ }
+
+ public static void e(String message) {
+ print(message);
+ }
+}
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
new file mode 100644
index 000000000..9ce8c4934
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.makedict.Word;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.TreeSet;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Reads and writes XML files for a FusionDictionary.
+ *
+ * All functions in this class are static.
+ */
+public class XmlDictInputOutput {
+
+ private static final String ROOT_TAG = "wordlist";
+ private static final String WORD_TAG = "w";
+ private static final String BIGRAM_TAG = "bigram";
+ private static final String SHORTCUT_TAG = "shortcut";
+ private static final String FREQUENCY_ATTR = "f";
+ private static final String WORD_ATTR = "word";
+
+ private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
+
+ private static final String OPTIONS_KEY = "options";
+ private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
+ private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
+
+ /**
+ * SAX handler for a unigram XML file.
+ */
+ static private class UnigramHandler extends DefaultHandler {
+ // Parser states
+ private static final int NONE = 0;
+ private static final int START = 1;
+ private static final int WORD = 2;
+ private static final int BIGRAM = 4;
+ private static final int END = 5;
+ private static final int UNKNOWN = 6;
+
+ FusionDictionary mDictionary;
+ int mState; // the state of the parser
+ int mFreq; // the currently read freq
+ String mWord; // the current word
+ final HashMap<String, ArrayList<WeightedString>> mShortcutsMap;
+
+ /**
+ * Create the handler.
+ *
+ * @param shortcuts the shortcuts as a map. This may be empty, but may not be null.
+ */
+ public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts) {
+ mDictionary = null;
+ mShortcutsMap = shortcuts;
+ mWord = "";
+ mState = START;
+ mFreq = 0;
+ }
+
+ public FusionDictionary getFinalDictionary() {
+ final FusionDictionary dict = mDictionary;
+ for (final String shortcutOnly : mShortcutsMap.keySet()) {
+ if (dict.hasWord(shortcutOnly)) continue;
+ dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly));
+ }
+ mDictionary = null;
+ mShortcutsMap.clear();
+ mWord = "";
+ mState = START;
+ mFreq = 0;
+ return dict;
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attrs) {
+ if (WORD_TAG.equals(localName)) {
+ mState = WORD;
+ mWord = "";
+ for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
+ final String attrName = attrs.getLocalName(attrIndex);
+ if (FREQUENCY_ATTR.equals(attrName)) {
+ mFreq = Integer.parseInt(attrs.getValue(attrIndex));
+ }
+ }
+ } else if (ROOT_TAG.equals(localName)) {
+ final HashMap<String, String> attributes = new HashMap<String, String>();
+ for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
+ final String attrName = attrs.getLocalName(attrIndex);
+ attributes.put(attrName, attrs.getValue(attrIndex));
+ }
+ final String optionsString = attributes.get(OPTIONS_KEY);
+ final boolean processUmlauts =
+ GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
+ final boolean processLigatures =
+ FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
+ mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes,
+ processUmlauts, processLigatures));
+ } else {
+ mState = UNKNOWN;
+ }
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length) {
+ if (WORD == mState) {
+ // The XML parser is free to return text in arbitrary chunks one after the
+ // other. In particular, this happens in some implementations when it finds
+ // an escape code like "&amp;".
+ mWord += String.copyValueOf(ch, start, length);
+ }
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName) {
+ if (WORD == mState) {
+ mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord));
+ mState = START;
+ }
+ }
+ }
+
+ static private class AssociativeListHandler extends DefaultHandler {
+ private final String SRC_TAG;
+ private final String SRC_ATTRIBUTE;
+ private final String DST_TAG;
+ private final String DST_ATTRIBUTE;
+ private final String DST_FREQ;
+
+ // In this version of the XML file, the bigram frequency is given as an int 0..XML_MAX
+ private final static int XML_MAX = 256;
+ // In memory and in the binary dictionary the bigram frequency is 0..MEMORY_MAX
+ private final static int MEMORY_MAX = 256;
+ private final static int XML_TO_MEMORY_RATIO = XML_MAX / MEMORY_MAX;
+
+ private String mSrc;
+ private final HashMap<String, ArrayList<WeightedString>> mAssocMap;
+
+ public AssociativeListHandler(final String srcTag, final String srcAttribute,
+ final String dstTag, final String dstAttribute, final String dstFreq) {
+ SRC_TAG = srcTag;
+ SRC_ATTRIBUTE = srcAttribute;
+ DST_TAG = dstTag;
+ DST_ATTRIBUTE = dstAttribute;
+ DST_FREQ = dstFreq;
+ mSrc = null;
+ mAssocMap = new HashMap<String, ArrayList<WeightedString>>();
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attrs) {
+ if (SRC_TAG.equals(localName)) {
+ mSrc = attrs.getValue(uri, SRC_ATTRIBUTE);
+ } else if (DST_TAG.equals(localName)) {
+ String dst = attrs.getValue(uri, DST_ATTRIBUTE);
+ int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ));
+ WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
+ ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
+ if (null == bigramList) bigramList = new ArrayList<WeightedString>();
+ bigramList.add(bigram);
+ mAssocMap.put(mSrc, bigramList);
+ }
+ }
+
+ protected int getValueFromFreqString(final String freqString) {
+ return Integer.parseInt(freqString);
+ }
+
+ // This may return an empty map, but will never return null.
+ public HashMap<String, ArrayList<WeightedString>> getAssocMap() {
+ return mAssocMap;
+ }
+ }
+
+ /**
+ * SAX handler for a bigram XML file.
+ */
+ static private class BigramHandler extends AssociativeListHandler {
+ private final static String BIGRAM_W1_TAG = "bi";
+ private final static String BIGRAM_W2_TAG = "w";
+ private final static String BIGRAM_W1_ATTRIBUTE = "w1";
+ private final static String BIGRAM_W2_ATTRIBUTE = "w2";
+ private final static String BIGRAM_FREQ_ATTRIBUTE = "p";
+
+ public BigramHandler() {
+ super(BIGRAM_W1_TAG, BIGRAM_W1_ATTRIBUTE, BIGRAM_W2_TAG, BIGRAM_W2_ATTRIBUTE,
+ BIGRAM_FREQ_ATTRIBUTE);
+ }
+
+ // As per getAssocMap(), this never returns null.
+ public HashMap<String, ArrayList<WeightedString>> getBigramMap() {
+ return getAssocMap();
+ }
+ }
+
+ /**
+ * SAX handler for a shortcut & whitelist XML file.
+ */
+ static private class ShortcutAndWhitelistHandler extends AssociativeListHandler {
+ private final static String ENTRY_TAG = "entry";
+ private final static String ENTRY_ATTRIBUTE = "shortcut";
+ private final static String TARGET_TAG = "target";
+ private final static String REPLACEMENT_ATTRIBUTE = "replacement";
+ private final static String TARGET_PRIORITY_ATTRIBUTE = "priority";
+ private final static String WHITELIST_MARKER = "whitelist";
+ private final static int WHITELIST_FREQ_VALUE = 15;
+ private final static int MIN_FREQ = 0;
+ private final static int MAX_FREQ = 14;
+
+ public ShortcutAndWhitelistHandler() {
+ super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE,
+ TARGET_PRIORITY_ATTRIBUTE);
+ }
+
+ @Override
+ protected int getValueFromFreqString(final String freqString) {
+ if (WHITELIST_MARKER.equals(freqString)) {
+ return WHITELIST_FREQ_VALUE;
+ } else {
+ final int intValue = super.getValueFromFreqString(freqString);
+ if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
+ throw new RuntimeException("Shortcut freq out of range. Accepted range is "
+ + MIN_FREQ + ".." + MAX_FREQ);
+ }
+ return intValue;
+ }
+ }
+
+ // As per getAssocMap(), this never returns null.
+ public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() {
+ return getAssocMap();
+ }
+ }
+
+ /**
+ * Reads a dictionary from an XML file.
+ *
+ * This is the public method that will parse an XML file and return the corresponding memory
+ * representation.
+ *
+ * @param unigrams the file to read the data from.
+ * @param shortcuts the file to read the shortcuts & whitelist from, or null.
+ * @param bigrams the file to read the bigrams from, or null.
+ * @return the in-memory representation of the dictionary.
+ */
+ public static FusionDictionary readDictionaryXml(final InputStream unigrams,
+ final InputStream shortcuts, final InputStream bigrams)
+ throws SAXException, IOException, ParserConfigurationException {
+ final SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ final SAXParser parser = factory.newSAXParser();
+ final BigramHandler bigramHandler = new BigramHandler();
+ if (null != bigrams) parser.parse(bigrams, bigramHandler);
+
+ final ShortcutAndWhitelistHandler shortcutAndWhitelistHandler =
+ new ShortcutAndWhitelistHandler();
+ if (null != shortcuts) parser.parse(shortcuts, shortcutAndWhitelistHandler);
+
+ final UnigramHandler unigramHandler =
+ new UnigramHandler(shortcutAndWhitelistHandler.getShortcutAndWhitelistMap());
+ parser.parse(unigrams, unigramHandler);
+ final FusionDictionary dict = unigramHandler.getFinalDictionary();
+ final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap();
+ for (final String firstWord : bigramMap.keySet()) {
+ if (!dict.hasWord(firstWord)) continue;
+ final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
+ for (final WeightedString bigram : bigramList) {
+ if (!dict.hasWord(bigram.mWord)) continue;
+ dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency);
+ }
+ }
+ return dict;
+ }
+
+ /**
+ * Reads a dictionary in the first, legacy XML format
+ *
+ * This method reads data from the parser and creates a new FusionDictionary with it.
+ * The format parsed by this method is the format used before Ice Cream Sandwich,
+ * which has no support for bigrams or shortcuts/whitelist.
+ * It is important to note that this method expects the parser to have already eaten
+ * the first, all-encompassing tag.
+ *
+ * @param xpp the parser to read the data from.
+ * @return the parsed dictionary.
+ */
+
+ /**
+ * Writes a dictionary to an XML file.
+ *
+ * The output format is the "second" format, which supports bigrams and shortcuts/whitelist.
+ *
+ * @param destination a destination stream to write to.
+ * @param dict the dictionary to write.
+ */
+ public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
+ throws IOException {
+ final TreeSet<Word> set = new TreeSet<Word>();
+ for (Word word : dict) {
+ set.add(word);
+ }
+ // TODO: use an XMLSerializer if this gets big
+ destination.write("<wordlist format=\"2\"");
+ final HashMap<String, String> options = dict.mOptions.mAttributes;
+ if (dict.mOptions.mGermanUmlautProcessing) {
+ destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\"");
+ } else if (dict.mOptions.mFrenchLigatureProcessing) {
+ destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\"");
+ }
+ for (final String key : dict.mOptions.mAttributes.keySet()) {
+ final String value = dict.mOptions.mAttributes.get(key);
+ destination.write(" " + key + "=\"" + value + "\"");
+ }
+ destination.write(">\n");
+ destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
+ for (Word word : set) {
+ destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
+ + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
+ if (null != word.mShortcutTargets) {
+ destination.write("\n");
+ for (WeightedString target : word.mShortcutTargets) {
+ destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
+ + target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ + ">\n");
+ }
+ destination.write(" ");
+ }
+ if (null != word.mBigrams) {
+ destination.write("\n");
+ for (WeightedString bigram : word.mBigrams) {
+ destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
+ + bigram.mFrequency + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n");
+ }
+ destination.write(" ");
+ }
+ destination.write("</" + WORD_TAG + ">\n");
+ }
+ destination.write("</wordlist>\n");
+ destination.close();
+ }
+}
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java
new file mode 100644
index 000000000..24042f120
--- /dev/null
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for BinaryDictInputOutput.
+ */
+public class BinaryDictInputOutputTest extends TestCase {
+
+ public void setUp() throws Exception {
+ super.setUp();
+ }
+
+ public void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ // Test the flattened array contains the expected number of nodes, and
+ // that it does not contain any duplicates.
+ public void testFlattenNodes() {
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new DictionaryOptions(new HashMap<String, String>(),
+ false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
+ dict.add("foo", 1, null);
+ dict.add("fta", 1, null);
+ dict.add("ftb", 1, null);
+ dict.add("bar", 1, null);
+ dict.add("fool", 1, null);
+ final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
+ assertEquals(4, result.size());
+ while (!result.isEmpty()) {
+ final Node n = result.remove(0);
+ assertFalse("Flattened array contained the same node twice", result.contains(n));
+ }
+ }
+
+}
diff --git a/tools/dicttool/tests/etc/test-dicttool.sh b/tools/dicttool/tests/etc/test-dicttool.sh
new file mode 100755
index 000000000..8834611cd
--- /dev/null
+++ b/tools/dicttool/tests/etc/test-dicttool.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Copyright 2012, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/dicttool_intermediates/classes junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest