aboutsummaryrefslogtreecommitdiffstats
path: root/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java')
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java320
1 files changed, 320 insertions, 0 deletions
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
new file mode 100644
index 000000000..9ebd3bbdd
--- /dev/null
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.MakedictLog;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+import java.util.LinkedList;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+/**
+ * Main class/method for DictionaryMaker.
+ */
+public class DictionaryMaker {
+
+ static class Arguments {
+ private final static String OPTION_VERSION_2 = "-2";
+ private final static String OPTION_INPUT_SOURCE = "-s";
+ private final static String OPTION_INPUT_BIGRAM_XML = "-b";
+ private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
+ private final static String OPTION_OUTPUT_BINARY = "-d";
+ private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1";
+ private final static String OPTION_OUTPUT_XML = "-x";
+ private final static String OPTION_HELP = "-h";
+ public final String mInputBinary;
+ public final String mInputUnigramXml;
+ public final String mInputShortcutXml;
+ public final String mInputBigramXml;
+ public final String mOutputBinary;
+ public final String mOutputBinaryFormat1;
+ public final String mOutputXml;
+
+ private void checkIntegrity() throws IOException {
+ checkHasExactlyOneInput();
+ checkHasAtLeastOneOutput();
+ checkNotSameFile(mInputBinary, mOutputBinary);
+ checkNotSameFile(mInputBinary, mOutputBinaryFormat1);
+ checkNotSameFile(mInputBinary, mOutputXml);
+ checkNotSameFile(mInputUnigramXml, mOutputBinary);
+ checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1);
+ checkNotSameFile(mInputUnigramXml, mOutputXml);
+ checkNotSameFile(mInputShortcutXml, mOutputBinary);
+ checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1);
+ checkNotSameFile(mInputShortcutXml, mOutputXml);
+ checkNotSameFile(mInputBigramXml, mOutputBinary);
+ checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1);
+ checkNotSameFile(mInputBigramXml, mOutputXml);
+ checkNotSameFile(mOutputBinary, mOutputBinaryFormat1);
+ checkNotSameFile(mOutputBinary, mOutputXml);
+ checkNotSameFile(mOutputBinaryFormat1, mOutputXml);
+ }
+
+ private void checkHasExactlyOneInput() {
+ if (null == mInputUnigramXml && null == mInputBinary) {
+ throw new RuntimeException("No input file specified");
+ } else if (null != mInputUnigramXml && null != mInputBinary) {
+ throw new RuntimeException("Both input XML and binary specified");
+ } else if (null != mInputBinary && null != mInputBigramXml) {
+ throw new RuntimeException("Cannot specify a binary input and a separate bigram "
+ + "file");
+ }
+ }
+
+ private void checkHasAtLeastOneOutput() {
+ if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) {
+ throw new RuntimeException("No output specified");
+ }
+ }
+
+ /**
+ * Utility method that throws an exception if path1 and path2 point to the same file.
+ */
+ private static void checkNotSameFile(final String path1, final String path2)
+ throws IOException {
+ if (null == path1 || null == path2) return;
+ if (new File(path1).getCanonicalPath().equals(new File(path2).getCanonicalPath())) {
+ throw new RuntimeException(path1 + " and " + path2 + " are the same file: "
+ + " refusing to process.");
+ }
+ }
+
+ private void displayHelp() {
+ MakedictLog.i(getHelp());
+ }
+
+ public static String getHelp() {
+ return "Usage: makedict "
+ + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
+ + "| -s <binary input>] [-d <binary output format version 2>] "
+ + "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
+ + "\n"
+ + " Converts a source dictionary file to one or several outputs.\n"
+ + " Source can be an XML file, with an optional XML bigrams file, or a\n"
+ + " binary dictionary file.\n"
+ + " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n"
+ + " are supported. All three can be output at the same time, but the same\n"
+ + " output format cannot be specified several times. The behavior is\n"
+ + " unspecified if the same file is specified for input and output, or for\n"
+ + " several outputs.";
+ }
+
+ public Arguments(String[] argsArray) throws IOException {
+ final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
+ if (args.isEmpty()) {
+ displayHelp();
+ }
+ String inputBinary = null;
+ String inputUnigramXml = null;
+ String inputShortcutXml = null;
+ String inputBigramXml = null;
+ String outputBinary = null;
+ String outputBinaryFormat1 = null;
+ String outputXml = null;
+
+ while (!args.isEmpty()) {
+ final String arg = args.get(0);
+ args.remove(0);
+ if (arg.charAt(0) == '-') {
+ if (OPTION_VERSION_2.equals(arg)) {
+ // Do nothing, this is the default
+ } else if (OPTION_HELP.equals(arg)) {
+ displayHelp();
+ } else {
+ // All these options need an argument
+ if (args.isEmpty()) {
+ throw new IllegalArgumentException("Option " + arg + " is unknown or "
+ + "requires an argument");
+ }
+ String filename = args.get(0);
+ args.remove(0);
+ if (OPTION_INPUT_SOURCE.equals(arg)) {
+ if (BinaryDictInputOutput.isBinaryDictionary(filename)) {
+ inputBinary = filename;
+ } else {
+ inputUnigramXml = filename;
+ }
+ } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
+ inputShortcutXml = filename;
+ } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
+ inputBigramXml = filename;
+ } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
+ outputBinary = filename;
+ } else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) {
+ outputBinaryFormat1 = filename;
+ } else if (OPTION_OUTPUT_XML.equals(arg)) {
+ outputXml = filename;
+ } else {
+ throw new IllegalArgumentException("Unknown option : " + arg);
+ }
+ }
+ } else {
+ if (null == inputBinary && null == inputUnigramXml) {
+ if (BinaryDictInputOutput.isBinaryDictionary(arg)) {
+ inputBinary = arg;
+ } else {
+ inputUnigramXml = arg;
+ }
+ } else if (null == outputBinary) {
+ outputBinary = arg;
+ } else {
+ throw new IllegalArgumentException("Several output binary files specified");
+ }
+ }
+ }
+
+ mInputBinary = inputBinary;
+ mInputUnigramXml = inputUnigramXml;
+ mInputShortcutXml = inputShortcutXml;
+ mInputBigramXml = inputBigramXml;
+ mOutputBinary = outputBinary;
+ mOutputBinaryFormat1 = outputBinaryFormat1;
+ mOutputXml = outputXml;
+ checkIntegrity();
+ }
+ }
+
+ public static void main(String[] args)
+ throws FileNotFoundException, ParserConfigurationException, SAXException, IOException,
+ UnsupportedFormatException {
+ final Arguments parsedArgs = new Arguments(args);
+ FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs);
+ writeOutputToParsedArgs(parsedArgs, dictionary);
+ }
+
+ /**
+ * Invoke the right input method according to args.
+ *
+ * @param args the parsed command line arguments.
+ * @return the read dictionary.
+ */
+ private static FusionDictionary readInputFromParsedArgs(final Arguments args)
+ throws IOException, UnsupportedFormatException, ParserConfigurationException,
+ SAXException, FileNotFoundException {
+ if (null != args.mInputBinary) {
+ return readBinaryFile(args.mInputBinary);
+ } else if (null != args.mInputUnigramXml) {
+ return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml);
+ } else {
+ throw new RuntimeException("No input file specified");
+ }
+ }
+
+ /**
+ * Read a dictionary from the name of a binary file.
+ *
+ * @param binaryFilename the name of the file in the binary dictionary format.
+ * @return the read dictionary.
+ * @throws FileNotFoundException if the file can't be found
+ * @throws IOException if the input file can't be read
+ * @throws UnsupportedFormatException if the binary file is not in the expected format
+ */
+ private static FusionDictionary readBinaryFile(final String binaryFilename)
+ throws FileNotFoundException, IOException, UnsupportedFormatException {
+ final RandomAccessFile inputFile = new RandomAccessFile(binaryFilename, "r");
+ return BinaryDictInputOutput.readDictionaryBinary(inputFile, null);
+ }
+
+ /**
+ * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
+ *
+ * @param unigramXmlFilename the name of the unigram XML file. May not be null.
+ * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none.
+ * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
+ * @return the read dictionary.
+ * @throws FileNotFoundException if one of the files can't be found
+ * @throws SAXException if one or more of the XML files is not well-formed
+ * @throws IOException if one the input files can't be read
+ * @throws ParserConfigurationException if the system can't create a SAX parser
+ */
+ private static FusionDictionary readXmlFile(final String unigramXmlFilename,
+ final String shortcutXmlFilename, final String bigramXmlFilename)
+ throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
+ final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
+ final FileInputStream shortcuts = null == shortcutXmlFilename ? null :
+ new FileInputStream(new File(shortcutXmlFilename));
+ final FileInputStream bigrams = null == bigramXmlFilename ? null :
+ new FileInputStream(new File(bigramXmlFilename));
+ return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
+ }
+
+ /**
+ * Invoke the right output method according to args.
+ *
+ * This will write the passed dictionary to the file(s) passed in the command line arguments.
+ * @param args the parsed arguments.
+ * @param dict the file to output.
+ * @throws FileNotFoundException if one of the output files can't be created.
+ * @throws IOException if one of the output files can't be written to.
+ */
+ private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
+ throws FileNotFoundException, IOException, UnsupportedFormatException,
+ IllegalArgumentException {
+ if (null != args.mOutputBinary) {
+ writeBinaryDictionary(args.mOutputBinary, dict, 2);
+ }
+ if (null != args.mOutputBinaryFormat1) {
+ writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1);
+ }
+ if (null != args.mOutputXml) {
+ writeXmlDictionary(args.mOutputXml, dict);
+ }
+ }
+
+ /**
+ * Write the dictionary in binary format to the specified filename.
+ *
+ * @param outputFilename the name of the file to write to.
+ * @param dict the dictionary to write.
+ * @param version the binary format version to use.
+ * @throws FileNotFoundException if the output file can't be created.
+ * @throws IOException if the output file can't be written to.
+ */
+ private static void writeBinaryDictionary(final String outputFilename,
+ final FusionDictionary dict, final int version)
+ throws FileNotFoundException, IOException, UnsupportedFormatException {
+ final File outputFile = new File(outputFilename);
+ BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
+ version);
+ }
+
+ /**
+ * Write the dictionary in XML format to the specified filename.
+ *
+ * @param outputFilename the name of the file to write to.
+ * @param dict the dictionary to write.
+ * @throws FileNotFoundException if the output file can't be created.
+ * @throws IOException if the output file can't be written to.
+ */
+ private static void writeXmlDictionary(final String outputFilename,
+ final FusionDictionary dict) throws FileNotFoundException, IOException {
+ XmlDictInputOutput.writeDictionaryXml(new FileWriter(outputFilename), dict);
+ }
+}