diff options
Diffstat (limited to 'tools/dicttool')
-rw-r--r-- | tools/dicttool/Android.mk | 2 | ||||
-rw-r--r-- | tools/dicttool/etc/Android.mk | 2 | ||||
-rwxr-xr-x | tools/dicttool/etc/dicttool_aosp (renamed from tools/dicttool/etc/dicttool) | 2 | ||||
-rwxr-xr-x | tools/dicttool/etc/makedict_aosp | 18 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java | 22 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java | 26 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java | 38 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java | 4 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java | 9 | ||||
-rw-r--r-- | tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java | 49 |
10 files changed, 136 insertions, 36 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index e9c11acc4..df8cb1030 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -24,7 +24,7 @@ LOCAL_SRC_FILES := $(LOCAL_TOOL_SRC_FILES) \ $(filter-out $(addprefix %/, $(notdir $(LOCAL_TOOL_SRC_FILES))), $(LOCAL_MAIN_SRC_FILES)) \ $(call all-java-files-under,tests) LOCAL_JAR_MANIFEST := etc/manifest.txt -LOCAL_MODULE := dicttool +LOCAL_MODULE := dicttool_aosp LOCAL_JAVA_LIBRARIES := junit LOCAL_MODULE_TAGS := eng diff --git a/tools/dicttool/etc/Android.mk b/tools/dicttool/etc/Android.mk index 03d4a96ee..8952827ab 100644 --- a/tools/dicttool/etc/Android.mk +++ b/tools/dicttool/etc/Android.mk @@ -16,5 +16,5 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE_TAGS := eng -LOCAL_PREBUILT_EXECUTABLES := dicttool +LOCAL_PREBUILT_EXECUTABLES := dicttool_aosp makedict_aosp include $(BUILD_HOST_PREBUILT) diff --git a/tools/dicttool/etc/dicttool b/tools/dicttool/etc/dicttool_aosp index 8a39694f7..a4879a279 100755 --- a/tools/dicttool/etc/dicttool +++ b/tools/dicttool/etc/dicttool_aosp @@ -33,7 +33,7 @@ progdir=`pwd` prog="${progdir}"/`basename "${prog}"` cd "${oldwd}" -jarfile=dicttool.jar +jarfile=dicttool_aosp.jar frameworkdir="$progdir" if [ ! -r "$frameworkdir/$jarfile" ] then diff --git a/tools/dicttool/etc/makedict_aosp b/tools/dicttool/etc/makedict_aosp new file mode 100755 index 000000000..095c50538 --- /dev/null +++ b/tools/dicttool/etc/makedict_aosp @@ -0,0 +1,18 @@ +#!/bin/sh +# Copyright (C) 2012, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Dicttool supports making the dictionary using the 'makedict' command and +# the same arguments that the old 'makedict' command used to accept. +dicttool_aosp makedict $@ diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java new file mode 100644 index 000000000..8d4eb751b --- /dev/null +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/AdditionalCommandList.java @@ -0,0 +1,22 @@ +/** + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.dicttool; + +public class AdditionalCommandList { + public static void populate() { + } +} diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java new file mode 100644 index 000000000..d16b069fe --- /dev/null +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/CommandList.java @@ -0,0 +1,26 @@ +/** + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.dicttool; + +public class CommandList { + public static void populate() { + Dicttool.addCommand("info", Info.class); + Dicttool.addCommand("compress", Compress.Compressor.class); + Dicttool.addCommand("uncompress", Compress.Uncompressor.class); + Dicttool.addCommand("makedict", Makedict.class); + } +} diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java index a76ec50e0..3cb0a12c4 100644 --- a/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/Compress.java @@ -46,46 +46,52 @@ public class Compress { static public class Compressor extends Dicttool.Command { public static final String COMMAND = "compress"; - private static final String SUFFIX = ".compressed"; + public static final String STDIN_OR_STDOUT = "-"; public Compressor() { } public String getHelp() { - return "compress <filename>: Compresses a file using gzip compression"; + return COMMAND + " <src_filename> <dst_filename>: " + + "Compresses a file using gzip compression"; } public void run() throws IOException { - if (mArgs.length < 1) { - throw new RuntimeException("Not enough arguments for command " + COMMAND); + if (mArgs.length > 2) { + throw new RuntimeException("Too many arguments for command " + COMMAND); } - final String inFilename = mArgs[0]; - final String outFilename = inFilename + SUFFIX; - final FileInputStream input = new FileInputStream(new File(inFilename)); - final FileOutputStream output = new FileOutputStream(new File(outFilename)); + final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT; + final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT; + final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in + : new FileInputStream(new File(inFilename)); + final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out + : new FileOutputStream(new File(outFilename)); copy(input, new GZIPOutputStream(output)); } } static public class Uncompressor extends Dicttool.Command { public static final String COMMAND = "uncompress"; - private static final String SUFFIX = ".uncompressed"; + public static final String STDIN_OR_STDOUT = "-"; public Uncompressor() { } public String getHelp() { - return "uncompress <filename>: Uncompresses a file compressed with gzip compression"; + return COMMAND + " <src_filename> <dst_filename>: " + + "Uncompresses a file compressed with gzip compression"; } public void run() throws IOException { - if (mArgs.length < 1) { - throw new RuntimeException("Not enough arguments for command " + COMMAND); + if (mArgs.length > 2) { + throw new RuntimeException("Too many arguments for command " + COMMAND); } - final String inFilename = mArgs[0]; - final String outFilename = inFilename + SUFFIX; - final FileInputStream input = new FileInputStream(new File(inFilename)); - final FileOutputStream output = new FileOutputStream(new File(outFilename)); + final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT; + final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT; + final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in + : new FileInputStream(new File(inFilename)); + final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out + : new FileOutputStream(new File(outFilename)); copy(new GZIPInputStream(input), output); } } diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java index 9ebd3bbdd..25e1740cb 100644 --- a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -112,7 +112,7 @@ public class DictionaryMaker { public static String getHelp() { return "Usage: makedict " - + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] " + + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] " + "| -s <binary input>] [-d <binary output format version 2>] " + "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n" + "\n" @@ -246,7 +246,7 @@ public class DictionaryMaker { * Read a dictionary from a unigram XML file, and optionally a bigram XML file. * * @param unigramXmlFilename the name of the unigram XML file. May not be null. - * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none. + * @param shortcutXmlFilename the name of the shortcut/whitelist XML file, or null if none. * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams. * @return the read dictionary. * @throws FileNotFoundException if one of the files can't be found diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java index c14ce7b88..bf417fb5a 100644 --- a/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/Dicttool.java @@ -32,10 +32,11 @@ public class Dicttool { static HashMap<String, Class<? extends Command>> sCommands = new HashMap<String, Class<? extends Command>>(); static { - sCommands.put("info", Info.class); - sCommands.put("compress", Compress.Compressor.class); - sCommands.put("uncompress", Compress.Uncompressor.class); - sCommands.put("makedict", Makedict.class); + CommandList.populate(); + AdditionalCommandList.populate(); + } + public static void addCommand(final String commandName, final Class<? extends Command> cls) { + sCommands.put(commandName, cls); } private static Command getCommandInstance(final String commandName) { diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index 8e2e73505..9ce8c4934 100644 --- a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -90,6 +90,10 @@ public class XmlDictInputOutput { public FusionDictionary getFinalDictionary() { final FusionDictionary dict = mDictionary; + for (final String shortcutOnly : mShortcutsMap.keySet()) { + if (dict.hasWord(shortcutOnly)) continue; + dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly)); + } mDictionary = null; mShortcutsMap.clear(); mWord = ""; @@ -179,7 +183,7 @@ public class XmlDictInputOutput { mSrc = attrs.getValue(uri, SRC_ATTRIBUTE); } else if (DST_TAG.equals(localName)) { String dst = attrs.getValue(uri, DST_ATTRIBUTE); - int freq = Integer.parseInt(attrs.getValue(uri, DST_FREQ)); + int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ)); WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO); ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc); if (null == bigramList) bigramList = new ArrayList<WeightedString>(); @@ -188,6 +192,10 @@ public class XmlDictInputOutput { } } + protected int getValueFromFreqString(final String freqString) { + return Integer.parseInt(freqString); + } + // This may return an empty map, but will never return null. public HashMap<String, ArrayList<WeightedString>> getAssocMap() { return mAssocMap; @@ -216,22 +224,40 @@ public class XmlDictInputOutput { } /** - * SAX handler for a shortcut XML file. + * SAX handler for a shortcut & whitelist XML file. */ - static private class ShortcutHandler extends AssociativeListHandler { + static private class ShortcutAndWhitelistHandler extends AssociativeListHandler { private final static String ENTRY_TAG = "entry"; private final static String ENTRY_ATTRIBUTE = "shortcut"; private final static String TARGET_TAG = "target"; private final static String REPLACEMENT_ATTRIBUTE = "replacement"; private final static String TARGET_PRIORITY_ATTRIBUTE = "priority"; + private final static String WHITELIST_MARKER = "whitelist"; + private final static int WHITELIST_FREQ_VALUE = 15; + private final static int MIN_FREQ = 0; + private final static int MAX_FREQ = 14; - public ShortcutHandler() { + public ShortcutAndWhitelistHandler() { super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE, TARGET_PRIORITY_ATTRIBUTE); } + @Override + protected int getValueFromFreqString(final String freqString) { + if (WHITELIST_MARKER.equals(freqString)) { + return WHITELIST_FREQ_VALUE; + } else { + final int intValue = super.getValueFromFreqString(freqString); + if (intValue < MIN_FREQ || intValue > MAX_FREQ) { + throw new RuntimeException("Shortcut freq out of range. Accepted range is " + + MIN_FREQ + ".." + MAX_FREQ); + } + return intValue; + } + } + // As per getAssocMap(), this never returns null. - public HashMap<String, ArrayList<WeightedString>> getShortcutMap() { + public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() { return getAssocMap(); } } @@ -243,7 +269,7 @@ public class XmlDictInputOutput { * representation. * * @param unigrams the file to read the data from. - * @param shortcuts the file to read the shortcuts from, or null. + * @param shortcuts the file to read the shortcuts & whitelist from, or null. * @param bigrams the file to read the bigrams from, or null. * @return the in-memory representation of the dictionary. */ @@ -256,11 +282,12 @@ public class XmlDictInputOutput { final BigramHandler bigramHandler = new BigramHandler(); if (null != bigrams) parser.parse(bigrams, bigramHandler); - final ShortcutHandler shortcutHandler = new ShortcutHandler(); - if (null != shortcuts) parser.parse(shortcuts, shortcutHandler); + final ShortcutAndWhitelistHandler shortcutAndWhitelistHandler = + new ShortcutAndWhitelistHandler(); + if (null != shortcuts) parser.parse(shortcuts, shortcutAndWhitelistHandler); final UnigramHandler unigramHandler = - new UnigramHandler(shortcutHandler.getShortcutMap()); + new UnigramHandler(shortcutAndWhitelistHandler.getShortcutAndWhitelistMap()); parser.parse(unigrams, unigramHandler); final FusionDictionary dict = unigramHandler.getFinalDictionary(); final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap(); @@ -280,7 +307,7 @@ public class XmlDictInputOutput { * * This method reads data from the parser and creates a new FusionDictionary with it. * The format parsed by this method is the format used before Ice Cream Sandwich, - * which has no support for bigrams or shortcuts. + * which has no support for bigrams or shortcuts/whitelist. * It is important to note that this method expects the parser to have already eaten * the first, all-encompassing tag. * @@ -291,7 +318,7 @@ public class XmlDictInputOutput { /** * Writes a dictionary to an XML file. * - * The output format is the "second" format, which supports bigrams and shortcuts. + * The output format is the "second" format, which supports bigrams and shortcuts/whitelist. * * @param destination a destination stream to write to. * @param dict the dictionary to write. |