diff options
Diffstat (limited to 'tools/dicttool/src')
12 files changed, 401 insertions, 408 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index e571bc21d..3ef03f4bd 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -17,20 +17,22 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; +import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.DictDecoder; -import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import org.xml.sax.SAXException; -import java.io.File; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.OutputStream; import java.util.ArrayList; @@ -51,14 +53,17 @@ public final class BinaryDictOffdeviceUtils { public final static String ENCRYPTION = "encrypted"; private final static int MAX_DECODE_DEPTH = 8; + private final static int COPY_BUFFER_SIZE = 8192; public static class DecoderChainSpec { - ArrayList<String> mDecoderSpec = new ArrayList<String>(); + ArrayList<String> mDecoderSpec = new ArrayList<>(); File mFile; + public DecoderChainSpec addStep(final String stepDescription) { mDecoderSpec.add(stepDescription); return this; } + public String describeChain() { final StringBuilder s = new StringBuilder("raw"); for (final String step : mDecoderSpec) { @@ -70,13 +75,10 @@ public final class BinaryDictOffdeviceUtils { } public static void copy(final InputStream input, final OutputStream output) throws IOException { - final byte[] buffer = new byte[1000]; - final BufferedInputStream in = new BufferedInputStream(input); - final BufferedOutputStream out = new BufferedOutputStream(output); - for (int readBytes = in.read(buffer); readBytes >= 0; readBytes = in.read(buffer)) + final byte[] buffer = new byte[COPY_BUFFER_SIZE]; + for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { output.write(buffer, 0, readBytes); - in.close(); - out.close(); + } } /** @@ -131,11 +133,15 @@ public final class BinaryDictOffdeviceUtils { try { final File dst = File.createTempFile(PREFIX, SUFFIX); dst.deleteOnExit(); - final FileOutputStream dstStream = new FileOutputStream(dst); - copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))), - new BufferedOutputStream(dstStream)); // #copy() closes the streams - return dst; - } catch (IOException e) { + try ( + final InputStream input = Compress.getUncompressedStream( + new BufferedInputStream(new FileInputStream(src))); + final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst)) + ) { + copy(input, output); + return dst; + } + } catch (final IOException e) { // Could not uncompress the file: presumably the file is simply not a compressed file return null; } @@ -150,20 +156,20 @@ public final class BinaryDictOffdeviceUtils { try { final File dst = File.createTempFile(PREFIX, SUFFIX); dst.deleteOnExit(); - final FileOutputStream dstStream = new FileOutputStream(dst); - copy(Crypt.getDecryptedStream(new BufferedInputStream(new FileInputStream(src))), - dstStream); // #copy() closes the streams - return dst; - } catch (IOException e) { + try ( + final InputStream input = Crypt.getDecryptedStream( + new BufferedInputStream(new FileInputStream(src))); + final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst)) + ) { + copy(input, output); + return dst; + } + } catch (final IOException e) { // Could not decrypt the file: presumably the file is simply not a crypted file return null; } } - static void crash(final String filename, final Exception e) { - throw new RuntimeException("Can't read file " + filename, e); - } - static FusionDictionary getDictionary(final String filename, final boolean report) { final File file = new File(filename); if (report) { @@ -172,44 +178,40 @@ public final class BinaryDictOffdeviceUtils { } try { if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) { - if (report) System.out.println("Format : XML unigram list"); + if (report) { + System.out.println("Format : XML unigram list"); + } return XmlDictInputOutput.readDictionaryXml( new BufferedInputStream(new FileInputStream(file)), null /* shortcuts */, null /* bigrams */); - } else { - final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); - if (null == decodedSpec) { - crash(filename, new RuntimeException( - filename + " does not seem to be a dictionary file")); - } else if (CombinedInputOutput.isCombinedDictionary( - decodedSpec.mFile.getAbsolutePath())){ - if (report) { - System.out.println("Format : Combined format"); - System.out.println("Packaging : " + decodedSpec.describeChain()); - System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); - } - return CombinedInputOutput.readDictionaryCombined( - new BufferedInputStream(new FileInputStream(decodedSpec.mFile))); - } else { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile, - DictDecoder.USE_BYTEARRAY); - if (report) { - System.out.println("Format : Binary dictionary format"); - System.out.println("Packaging : " + decodedSpec.describeChain()); - System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); - } - return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); + } + final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); + if (null == decodedSpec) { + throw new RuntimeException("Does not seem to be a dictionary file " + filename); + } + if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) { + if (report) { + System.out.println("Format : Combined format"); + System.out.println("Packaging : " + decodedSpec.describeChain()); + System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); } + try (final BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) { + return CombinedInputOutput.readDictionaryCombined(reader); + } + } + final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder( + decodedSpec.mFile, 0, decodedSpec.mFile.length(), + DictDecoder.USE_BYTEARRAY); + if (report) { + System.out.println("Format : Binary dictionary format"); + System.out.println("Packaging : " + decodedSpec.describeChain()); + System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); } - } catch (IOException e) { - crash(filename, e); - } catch (SAXException e) { - crash(filename, e); - } catch (ParserConfigurationException e) { - crash(filename, e); - } catch (UnsupportedFormatException e) { - crash(filename, e); + return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); + } catch (final IOException | SAXException | ParserConfigurationException | + UnsupportedFormatException e) { + throw new RuntimeException("Can't read file " + filename, e); } - return null; } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index 4b6716936..23cbee81c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -17,20 +17,18 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; +import com.android.inputmethod.latin.makedict.WeightedString; +import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.utils.CombinedFormatUtils; import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; +import java.io.BufferedWriter; import java.io.FileReader; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Writer; import java.util.ArrayList; import java.util.HashMap; import java.util.TreeSet; @@ -41,18 +39,10 @@ import java.util.TreeSet; * All functions in this class are static. */ public class CombinedInputOutput { - - private static final String DICTIONARY_TAG = "dictionary"; - private static final String BIGRAM_TAG = "bigram"; - private static final String SHORTCUT_TAG = "shortcut"; - private static final String FREQUENCY_TAG = "f"; - private static final String WORD_TAG = "word"; - private static final String NOT_A_WORD_TAG = "not_a_word"; private static final String WHITELIST_TAG = "whitelist"; private static final String OPTIONS_TAG = "options"; - private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing"; - private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing"; private static final String COMMENT_LINE_STARTER = "#"; + private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3; /** * Basic test to find out whether the file is in the combined format or not. @@ -63,26 +53,15 @@ public class CombinedInputOutput { * @return true if the file is in the combined format, false otherwise */ public static boolean isCombinedDictionary(final String filename) { - BufferedReader reader = null; - try { - reader = new BufferedReader(new FileReader(new File(filename))); + try (final BufferedReader reader = new BufferedReader(new FileReader(filename))) { String firstLine = reader.readLine(); while (firstLine.startsWith(COMMENT_LINE_STARTER)) { firstLine = reader.readLine(); } - return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); - } catch (FileNotFoundException e) { - return false; - } catch (IOException e) { + return firstLine.matches( + "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); + } catch (final IOException e) { return false; - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - // do nothing - } - } } } @@ -92,18 +71,17 @@ public class CombinedInputOutput { * This is the public method that will read a combined file and return the corresponding memory * representation. * - * @param source the file to read the data from. + * @param reader the buffered reader to read the data from. * @return the in-memory representation of the dictionary. */ - public static FusionDictionary readDictionaryCombined(final InputStream source) + public static FusionDictionary readDictionaryCombined(final BufferedReader reader) throws IOException { - final BufferedReader reader = new BufferedReader(new InputStreamReader(source, "UTF-8")); String headerLine = reader.readLine(); while (headerLine.startsWith(COMMENT_LINE_STARTER)) { headerLine = reader.readLine(); } final String header[] = headerLine.split(","); - final HashMap<String, String> attributes = new HashMap<String, String>(); + final HashMap<String, String> attributes = new HashMap<>(); for (String item : header) { final String keyValue[] = item.split("="); if (2 != keyValue.length) { @@ -112,53 +90,62 @@ public class CombinedInputOutput { attributes.put(keyValue[0], keyValue[1]); } - final boolean processUmlauts = - GERMAN_UMLAUT_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG)); - final boolean processLigatures = - FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG)); attributes.remove(OPTIONS_TAG); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions( - attributes, processUmlauts, processLigatures)); + final FusionDictionary dict = + new FusionDictionary(new PtNodeArray(), new DictionaryOptions(attributes)); String line; String word = null; - int freq = 0; + ProbabilityInfo probabilityInfo = new ProbabilityInfo(0); boolean isNotAWord = false; - ArrayList<WeightedString> bigrams = new ArrayList<WeightedString>(); - ArrayList<WeightedString> shortcuts = new ArrayList<WeightedString>(); + ArrayList<WeightedString> bigrams = new ArrayList<>(); + ArrayList<WeightedString> shortcuts = new ArrayList<>(); while (null != (line = reader.readLine())) { if (line.startsWith(COMMENT_LINE_STARTER)) continue; final String args[] = line.trim().split(","); - if (args[0].matches(WORD_TAG + "=.*")) { + if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (null != word) { - dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); + dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, + isNotAWord); for (WeightedString s : bigrams) { - dict.setBigram(word, s.mWord, s.mFrequency); + dict.setBigram(word, s.mWord, s.mProbabilityInfo); } } - if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>(); - if (!bigrams.isEmpty()) bigrams = new ArrayList<WeightedString>(); + if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>(); + if (!bigrams.isEmpty()) bigrams = new ArrayList<>(); isNotAWord = false; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (WORD_TAG.equals(params[0])) { + if (CombinedFormatUtils.WORD_TAG.equals(params[0])) { word = params[1]; - } else if (FREQUENCY_TAG.equals(params[0])) { - freq = Integer.parseInt(params[1]); - } else if (NOT_A_WORD_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { + probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), + probabilityInfo.mTimestamp, probabilityInfo.mLevel, + probabilityInfo.mCount); + } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { + final String[] historicalInfoParams = + params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + line); + } + probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability, + Integer.parseInt(historicalInfoParams[0]), + Integer.parseInt(historicalInfoParams[1]), + Integer.parseInt(historicalInfoParams[2])); + } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) { isNotAWord = "true".equals(params[1]); } } - } else if (args[0].matches(SHORTCUT_TAG + "=.*")) { + } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) { String shortcut = null; int shortcutFreq = 0; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (SHORTCUT_TAG.equals(params[0])) { + if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) { shortcut = params[1]; - } else if (FREQUENCY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { shortcutFreq = WHITELIST_TAG.equals(params[1]) ? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY : Integer.parseInt(params[1]); @@ -169,29 +156,42 @@ public class CombinedInputOutput { } else { throw new RuntimeException("Wrong format : " + line); } - } else if (args[0].matches(BIGRAM_TAG + "=.*")) { + } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) { String secondWordOfBigram = null; - int bigramFreq = 0; + ProbabilityInfo bigramProbabilityInfo = new ProbabilityInfo(0); for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (BIGRAM_TAG.equals(params[0])) { + if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) { secondWordOfBigram = params[1]; - } else if (FREQUENCY_TAG.equals(params[0])) { - bigramFreq = Integer.parseInt(params[1]); + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { + bigramProbabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), + bigramProbabilityInfo.mTimestamp, bigramProbabilityInfo.mLevel, + bigramProbabilityInfo.mCount); + } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { + final String[] historicalInfoParams = + params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + line); + } + bigramProbabilityInfo = new ProbabilityInfo( + bigramProbabilityInfo.mProbability, + Integer.parseInt(historicalInfoParams[0]), + Integer.parseInt(historicalInfoParams[1]), + Integer.parseInt(historicalInfoParams[2])); } } if (null != secondWordOfBigram) { - bigrams.add(new WeightedString(secondWordOfBigram, bigramFreq)); + bigrams.add(new WeightedString(secondWordOfBigram, bigramProbabilityInfo)); } else { throw new RuntimeException("Wrong format : " + line); } } } if (null != word) { - dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); + dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); for (WeightedString s : bigrams) { - dict.setBigram(word, s.mWord, s.mFrequency); + dict.setBigram(word, s.mWord, s.mProbabilityInfo); } } @@ -201,48 +201,19 @@ public class CombinedInputOutput { /** * Writes a dictionary to a combined file. * - * @param destination a destination stream to write to. + * @param destination a destination writer. * @param dict the dictionary to write. */ - public static void writeDictionaryCombined(Writer destination, FusionDictionary dict) - throws IOException { - final TreeSet<Word> set = new TreeSet<Word>(); - for (Word word : dict) { - set.add(word); // This for ordering by frequency, then by asciibetic order - } - final HashMap<String, String> options = dict.mOptions.mAttributes; - destination.write(DICTIONARY_TAG + "="); - if (options.containsKey(DICTIONARY_TAG)) { - destination.write(options.get(DICTIONARY_TAG)); - options.remove(DICTIONARY_TAG); + public static void writeDictionaryCombined(final BufferedWriter destination, + final FusionDictionary dict) throws IOException { + final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>(); + for (final WordProperty wordProperty : dict) { + // This for ordering by frequency, then by asciibetic order + wordPropertiesInDict.add(wordProperty); } - if (dict.mOptions.mGermanUmlautProcessing) { - destination.write("," + OPTIONS_TAG + "=" + GERMAN_UMLAUT_PROCESSING_OPTION); - } else if (dict.mOptions.mFrenchLigatureProcessing) { - destination.write("," + OPTIONS_TAG + "=" + FRENCH_LIGATURE_PROCESSING_OPTION); - } - for (final String key : dict.mOptions.mAttributes.keySet()) { - final String value = dict.mOptions.mAttributes.get(key); - destination.write("," + key + "=" + value); - } - destination.write("\n"); - for (Word word : set) { - destination.write(" " + WORD_TAG + "=" + word.mWord + "," - + FREQUENCY_TAG + "=" + word.mFrequency - + (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n")); - if (null != word.mShortcutTargets) { - for (WeightedString target : word.mShortcutTargets) { - destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + "," - + FREQUENCY_TAG + "=" + target.mFrequency + "\n"); - } - } - if (null != word.mBigrams) { - for (WeightedString bigram : word.mBigrams) { - destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + "," - + FREQUENCY_TAG + "=" + bigram.mFrequency + "\n"); - } - } + destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes)); + for (final WordProperty wordProperty : wordPropertiesInDict) { + destination.write(CombinedFormatUtils.formatWordProperty(wordProperty)); } - destination.close(); } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java index b7f48b522..728a159a0 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java @@ -16,11 +16,6 @@ package com.android.inputmethod.latin.dicttool; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -32,8 +27,7 @@ public class Compress { // This container class is not publicly instantiable. } - public static OutputStream getCompressedStream(final OutputStream out) - throws java.io.IOException { + public static OutputStream getCompressedStream(final OutputStream out) throws IOException { return new GZIPOutputStream(out); } @@ -43,7 +37,6 @@ public class Compress { static public class Compressor extends Dicttool.Command { public static final String COMMAND = "compress"; - public static final String STDIN_OR_STDOUT = "-"; public Compressor() { } @@ -61,17 +54,18 @@ public class Compress { } final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT; final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT; - final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in - : new BufferedInputStream(new FileInputStream(new File(inFilename))); - final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out - : new BufferedOutputStream(new FileOutputStream(new File(outFilename))); - BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output)); + try ( + final InputStream input = getFileInputStreamOrStdIn(inFilename); + final OutputStream compressedOutput = getCompressedStream( + getFileOutputStreamOrStdOut(outFilename)) + ) { + BinaryDictOffdeviceUtils.copy(input, compressedOutput); + } } } static public class Uncompressor extends Dicttool.Command { public static final String COMMAND = "uncompress"; - public static final String STDIN_OR_STDOUT = "-"; public Uncompressor() { } @@ -89,11 +83,13 @@ public class Compress { } final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT; final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT; - final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in - : new BufferedInputStream(new FileInputStream(new File(inFilename))); - final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out - : new BufferedOutputStream(new FileOutputStream(new File(outFilename))); - BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output); + try ( + final InputStream uncompressedInput = getUncompressedStream( + getFileInputStreamOrStdIn(inFilename)); + final OutputStream output = getFileOutputStreamOrStdOut(outFilename) + ) { + BinaryDictOffdeviceUtils.copy(uncompressedInput, output); + } } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index 5c7e8b4f2..3d0557b5c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -17,28 +17,33 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; +import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.MakedictLog; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; +import com.android.inputmethod.latin.makedict.Ver2DictEncoder; import com.android.inputmethod.latin.makedict.Ver4DictEncoder; +import org.xml.sax.SAXException; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.util.Arrays; import java.util.LinkedList; import javax.xml.parsers.ParserConfigurationException; -import org.xml.sax.SAXException; - /** * Main class/method for DictionaryMaker. */ @@ -46,7 +51,6 @@ public class DictionaryMaker { static class Arguments { private static final String OPTION_VERSION_2 = "-2"; - private static final String OPTION_VERSION_3 = "-3"; private static final String OPTION_VERSION_4 = "-4"; private static final String OPTION_INPUT_SOURCE = "-s"; private static final String OPTION_INPUT_BIGRAM_XML = "-b"; @@ -138,7 +142,7 @@ public class DictionaryMaker { } public Arguments(String[] argsArray) throws IOException { - final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray)); + final LinkedList<String> args = new LinkedList<>(Arrays.asList(argsArray)); if (args.isEmpty()) { displayHelp(); } @@ -158,10 +162,8 @@ public class DictionaryMaker { if (arg.charAt(0) == '-') { if (OPTION_VERSION_2.equals(arg)) { // Do nothing, this is the default - } else if (OPTION_VERSION_3.equals(arg)) { - outputBinaryFormatVersion = 3; } else if (OPTION_VERSION_4.equals(arg)) { - outputBinaryFormatVersion = 4; + outputBinaryFormatVersion = FormatSpec.VERSION4; } else if (OPTION_HELP.equals(arg)) { displayHelp(); } else { @@ -267,8 +269,8 @@ public class DictionaryMaker { private static FusionDictionary readBinaryFile(final String binaryFilename) throws FileNotFoundException, IOException, UnsupportedFormatException { final File file = new File(binaryFilename); - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); - return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); + final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length()); + return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); } /** @@ -281,22 +283,21 @@ public class DictionaryMaker { */ private static FusionDictionary readCombinedFile(final String combinedFilename) throws FileNotFoundException, IOException { - FileInputStream inStream = null; - try { - final File file = new File(combinedFilename); - inStream = new FileInputStream(file); - return CombinedInputOutput.readDictionaryCombined(inStream); - } finally { - if (null != inStream) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } + try (final BufferedReader reader = new BufferedReader(new InputStreamReader( + new FileInputStream(combinedFilename), "UTF-8")) + ) { + return CombinedInputOutput.readDictionaryCombined(reader); } } + private static BufferedInputStream getBufferedFileInputStream(final String filename) + throws FileNotFoundException { + if (filename == null) { + return null; + } + return new BufferedInputStream(new FileInputStream(filename)); + } + /** * Read a dictionary from a unigram XML file, and optionally a bigram XML file. * @@ -312,12 +313,13 @@ public class DictionaryMaker { private static FusionDictionary readXmlFile(final String unigramXmlFilename, final String shortcutXmlFilename, final String bigramXmlFilename) throws FileNotFoundException, SAXException, IOException, ParserConfigurationException { - final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename)); - final FileInputStream shortcuts = null == shortcutXmlFilename ? null : - new FileInputStream(new File(shortcutXmlFilename)); - final FileInputStream bigrams = null == bigramXmlFilename ? null : - new FileInputStream(new File(bigramXmlFilename)); - return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams); + try ( + final BufferedInputStream unigrams = getBufferedFileInputStream(unigramXmlFilename); + final BufferedInputStream shortcuts = getBufferedFileInputStream(shortcutXmlFilename); + final BufferedInputStream bigrams = getBufferedFileInputStream(bigramXmlFilename); + ) { + return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams); + } } /** @@ -358,10 +360,10 @@ public class DictionaryMaker { final File outputFile = new File(outputFilename); final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version); final DictEncoder dictEncoder; - if (version == 4) { + if (version == FormatSpec.VERSION4) { dictEncoder = new Ver4DictEncoder(outputFile); } else { - dictEncoder = new Ver3DictEncoder(outputFile); + dictEncoder = new Ver2DictEncoder(outputFile); } dictEncoder.writeDictionary(dict, formatOptions); } @@ -376,8 +378,9 @@ public class DictionaryMaker { */ private static void writeXmlDictionary(final String outputFilename, final FusionDictionary dict) throws FileNotFoundException, IOException { - XmlDictInputOutput.writeDictionaryXml(new BufferedWriter(new FileWriter(outputFilename)), - dict); + try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) { + XmlDictInputOutput.writeDictionaryXml(writer, dict); + } } /** @@ -390,7 +393,8 @@ public class DictionaryMaker { */ private static void writeCombinedDictionary(final String outputFilename, final FusionDictionary dict) throws FileNotFoundException, IOException { - CombinedInputOutput.writeDictionaryCombined( - new BufferedWriter(new FileWriter(outputFilename)), dict); + try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) { + CombinedInputOutput.writeDictionaryCombined(writer, dict); + } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java index cacee5268..e49b35084 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java @@ -16,24 +16,63 @@ package com.android.inputmethod.latin.dicttool; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; import java.util.Arrays; import java.util.HashMap; public class Dicttool { public static abstract class Command { + public static final String STDIN_OR_STDOUT = "-"; protected String[] mArgs; + public void setArgs(String[] args) throws IllegalArgumentException { mArgs = args; } + + protected static InputStream getFileInputStreamOrStdIn(final String inFilename) + throws FileNotFoundException { + if (STDIN_OR_STDOUT.equals(inFilename)) { + return System.in; + } + return getFileInputStream(new File(inFilename)); + } + + protected static InputStream getFileInputStream(final File inFile) + throws FileNotFoundException { + return new BufferedInputStream(new FileInputStream(inFile)); + } + + protected static OutputStream getFileOutputStreamOrStdOut(final String outFilename) + throws FileNotFoundException { + if (STDIN_OR_STDOUT.equals(outFilename)) { + return System.out; + } + return getFileOutputStream(new File(outFilename)); + } + + protected static OutputStream getFileOutputStream(final File outFile) + throws FileNotFoundException { + return new BufferedOutputStream(new FileOutputStream(outFile)); + } + abstract public String getHelp(); abstract public void run() throws Exception; } - static HashMap<String, Class<? extends Command>> sCommands = - new HashMap<String, Class<? extends Command>>(); + + static HashMap<String, Class<? extends Command>> sCommands = new HashMap<>(); + static { CommandList.populate(); } + public static void addCommand(final String commandName, final Class<? extends Command> cls) { sCommands.put(commandName, cls); } @@ -61,7 +100,7 @@ public class Dicttool { return sCommands.containsKey(commandName); } - private Command getCommand(final String[] arguments) { + private static Command getCommand(final String[] arguments) { final String commandName = arguments[0]; if (!isCommand(commandName)) { throw new RuntimeException("Unknown command : " + commandName); @@ -77,7 +116,7 @@ public class Dicttool { * @param arguments the arguments passed to dicttool. * @return 0 for success, an error code otherwise (always 1 at the moment) */ - private int execute(final String[] arguments) { + private static int execute(final String[] arguments) { final Command command = getCommand(arguments); try { command.run(); @@ -96,6 +135,6 @@ public class Dicttool { return; } // Exit with the success/error code from #execute() as status. - System.exit(new Dicttool().execute(arguments)); + System.exit(execute(arguments)); } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 66fd084cd..94d1ae8bb 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -18,8 +18,8 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WeightedString; +import com.android.inputmethod.latin.makedict.WordProperty; import java.util.Arrays; import java.util.ArrayList; @@ -85,20 +85,7 @@ public class Diff extends Dicttool.Command { private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) { boolean hasDifferences = false; - if (dict0.mOptions.mFrenchLigatureProcessing != dict1.mOptions.mFrenchLigatureProcessing) { - System.out.println(" French ligature processing : " - + dict0.mOptions.mFrenchLigatureProcessing + " <=> " - + dict1.mOptions.mFrenchLigatureProcessing); - hasDifferences = true; - } - else if (dict0.mOptions.mGermanUmlautProcessing != dict1.mOptions.mGermanUmlautProcessing) { - System.out.println(" German umlaut processing : " - + dict0.mOptions.mGermanUmlautProcessing + " <=> " - + dict1.mOptions.mGermanUmlautProcessing); - hasDifferences = true; - } - final HashMap<String, String> options1 = - new HashMap<String, String>(dict1.mOptions.mAttributes); + final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes); for (final String optionKey : dict0.mOptions.mAttributes.keySet()) { if (!dict0.mOptions.mAttributes.get(optionKey).equals( dict1.mOptions.mAttributes.get(optionKey))) { @@ -120,42 +107,47 @@ public class Diff extends Dicttool.Command { private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) { boolean hasDifferences = false; - for (final Word word0 : dict0) { - final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray, - word0.mWord); - if (null == word1) { + for (final WordProperty word0Property : dict0) { + final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray, + word0Property.mWord); + if (null == word1PtNode) { // This word is not in dict1 - System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency); + System.out.println("Deleted: " + word0Property.mWord + " " + + word0Property.getProbability()); hasDifferences = true; } else { // We found the word. Compare frequencies, shortcuts, bigrams - if (word0.mFrequency != word1.getFrequency()) { - System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency - + " -> " + word1.getFrequency()); + if (word0Property.getProbability() != word1PtNode.getProbability()) { + System.out.println("Probability changed: " + word0Property.mWord + " " + + word0Property.getProbability() + " -> " + + word1PtNode.getProbability()); hasDifferences = true; } - if (word0.mIsNotAWord != word1.getIsNotAWord()) { - System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord - + " -> " + word1.getIsNotAWord()); + if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) { + System.out.println("Not a word: " + word0Property.mWord + " " + + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord()); hasDifferences = true; } - if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) { - System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry - + " -> " + word1.getIsBlacklistEntry()); + if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) { + System.out.println("Blacklist: " + word0Property.mWord + " " + + word0Property.mIsBlacklistEntry + " -> " + + word1PtNode.getIsBlacklistEntry()); hasDifferences = true; } - hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord, - "Bigram", word0.mBigrams, word1.getBigrams()); - hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord, - "Shortcut", word0.mShortcutTargets, word1.getShortcutTargets()); + hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, + "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); + hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, + "Shortcut", word0Property.mShortcutTargets, + word1PtNode.getShortcutTargets()); } } - for (final Word word1 : dict1) { - final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray, - word1.mWord); - if (null == word0) { + for (final WordProperty word1Property : dict1) { + final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray, + word1Property.mWord); + if (null == word0PtNode) { // This word is not in dict0 - System.out.println("Added: " + word1.mWord + " " + word1.mFrequency); + System.out.println("Added: " + word1Property.mWord + " " + + word1Property.getProbability()); hasDifferences = true; } } @@ -171,7 +163,7 @@ public class Diff extends Dicttool.Command { if (null == list0) return false; for (final WeightedString attribute0 : list0) { System.out.println(type + " removed: " + word + " " + attribute0.mWord + " " - + attribute0.mFrequency); + + attribute0.getProbability()); } return true; } @@ -187,8 +179,8 @@ public class Diff extends Dicttool.Command { for (final WeightedString attribute1 : list1) { if (attribute0.mWord.equals(attribute1.mWord)) { System.out.println(type + " freq changed: " + word + " " - + attribute0.mWord + " " + attribute0.mFrequency + " -> " - + attribute1.mFrequency); + + attribute0.mWord + " " + attribute0.getProbability() + " -> " + + attribute1.getProbability()); list1.remove(attribute1); foundString = true; break; @@ -197,7 +189,7 @@ public class Diff extends Dicttool.Command { if (!foundString) { // We come here if we haven't found any matching string. System.out.println(type + " removed: " + word + " " + attribute0.mWord + " " - + attribute0.mFrequency); + + attribute0.getProbability()); } } else { list1.remove(attribute0); @@ -209,7 +201,7 @@ public class Diff extends Dicttool.Command { for (final WeightedString attribute1 : list1) { hasDifferences = true; System.out.println(type + " added: " + word + " " + attribute1.mWord + " " - + attribute1.mFrequency); + + attribute1.getProbability()); } return hasDifferences; } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 350f42772..9b2567fd3 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -19,8 +19,8 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WeightedString; +import com.android.inputmethod.latin.makedict.WordProperty; import java.util.Arrays; import java.util.ArrayList; @@ -43,15 +43,16 @@ public class Info extends Dicttool.Command { int bigramCount = 0; int shortcutCount = 0; int whitelistCount = 0; - for (final Word w : dict) { + for (final WordProperty wordProperty : dict) { ++wordCount; - if (null != w.mBigrams) { - bigramCount += w.mBigrams.size(); + if (null != wordProperty.mBigrams) { + bigramCount += wordProperty.mBigrams.size(); } - if (null != w.mShortcutTargets) { - shortcutCount += w.mShortcutTargets.size(); - for (WeightedString shortcutTarget : w.mShortcutTargets) { - if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency) { + if (null != wordProperty.mShortcutTargets) { + shortcutCount += wordProperty.mShortcutTargets.size(); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY + == shortcutTarget.getProbability()) { ++whitelistCount; } } @@ -71,7 +72,7 @@ public class Info extends Dicttool.Command { return; } System.out.println("Word: " + word); - System.out.println(" Freq: " + ptNode.getFrequency()); + System.out.println(" Freq: " + ptNode.getProbability()); if (ptNode.getIsNotAWord()) { System.out.println(" Is not a word"); } @@ -84,8 +85,9 @@ public class Info extends Dicttool.Command { } else { for (final WeightedString shortcutTarget : shortcutTargets) { System.out.println(" Shortcut target: " + shortcutTarget.mWord + " (" - + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency - ? "whitelist" : shortcutTarget.mFrequency) + ")"); + + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY + == shortcutTarget.getProbability() ? + "whitelist" : shortcutTarget.getProbability()) + ")"); } } final ArrayList<WeightedString> bigrams = ptNode.getBigrams(); @@ -93,7 +95,8 @@ public class Info extends Dicttool.Command { System.out.println(" No bigrams"); } else { for (final WeightedString bigram : bigrams) { - System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")"); + System.out.println( + " Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")"); } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java index dff3387be..1f6798269 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java @@ -21,8 +21,9 @@ import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; public class Package { private Package() { @@ -86,9 +87,13 @@ public class Package { } System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); - final FileOutputStream dstStream = new FileOutputStream(new File(mArgs[1])); - BinaryDictOffdeviceUtils.copy(new BufferedInputStream( - new FileInputStream(decodedSpec.mFile)), new BufferedOutputStream(dstStream)); + try ( + final InputStream input = getFileInputStream(decodedSpec.mFile); + final OutputStream output = new BufferedOutputStream( + getFileOutputStreamOrStdOut(mArgs[1])) + ) { + BinaryDictOffdeviceUtils.copy(input, output); + } } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java index 9174238da..b6383d788 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java @@ -18,31 +18,43 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests; import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests; -import com.android.inputmethod.latin.makedict.BinaryDictIOUtilsTests; import com.android.inputmethod.latin.makedict.FusionDictionaryTest; +import com.android.inputmethod.latin.utils.FileUtils; +import java.io.File; +import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.nio.file.Files; import java.util.ArrayList; /** * Dicttool command implementing self-tests. */ public class Test extends Dicttool.Command { + private static final String getTmpDir() { + try { + return Files.createTempDirectory("dicttool").toString(); + } catch (IOException e) { + throw new RuntimeException("Can't get temporary directory", e); + } + } + private static final String TEST_TMP_DIR_BASE = getTmpDir(); + public static final File TEST_TMP_DIR = new File(TEST_TMP_DIR_BASE); public static final String COMMAND = "test"; + private static final int DEFAULT_MAX_UNIGRAMS = 1500; private long mSeed = System.currentTimeMillis(); - private int mMaxUnigrams = BinaryDictIOUtilsTests.DEFAULT_MAX_UNIGRAMS; + private int mMaxUnigrams = DEFAULT_MAX_UNIGRAMS; private static final Class<?>[] sClassesToTest = { BinaryDictOffdeviceUtilsTests.class, FusionDictionaryTest.class, BinaryDictDecoderEncoderTests.class, BinaryDictEncoderFlattenTreeTests.class, - BinaryDictIOUtilsTests.class }; - private ArrayList<Method> mAllTestMethods = new ArrayList<Method>(); - private ArrayList<String> mUsedTestMethods = new ArrayList<String>(); + private ArrayList<Method> mAllTestMethods = new ArrayList<>(); + private ArrayList<String> mUsedTestMethods = new ArrayList<>(); public Test() { for (final Class<?> c : sClassesToTest) { @@ -57,8 +69,12 @@ public class Test extends Dicttool.Command { @Override public String getHelp() { - final StringBuilder s = new StringBuilder("test [-s seed] [-m maxUnigrams] [testName...]\n" - + "If seed is not specified, the current time is used.\nTest list is:\n"); + final StringBuilder s = new StringBuilder( + "test [-s seed] [-m maxUnigrams] [-n] [testName...]\n" + + "If seed is not specified, the current time is used.\n" + + "If -n option is provided, do not delete temporary files in " + + TEST_TMP_DIR_BASE + "/*.\n" + + "Test list is:\n"); for (final Method m : mAllTestMethods) { s.append(" "); s.append(m.getName()); @@ -71,17 +87,26 @@ public class Test extends Dicttool.Command { public void run() throws IllegalAccessException, InstantiationException, InvocationTargetException { int i = 0; + boolean deleteTmpDir = true; while (i < mArgs.length) { final String arg = mArgs[i++]; if ("-s".equals(arg)) { mSeed = Long.parseLong(mArgs[i++]); } else if ("-m".equals(arg)) { mMaxUnigrams = Integer.parseInt(mArgs[i++]); + } else if ("-n".equals(arg)) { + deleteTmpDir = false; } else { mUsedTestMethods.add(arg); } } - runChosenTests(); + try { + runChosenTests(); + } finally { + if (deleteTmpDir) { + FileUtils.deleteRecursively(TEST_TMP_DIR); + } + } } private void runChosenTests() throws IllegalAccessException, InstantiationException, diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index 4e99bf979..bdec44761 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -16,19 +16,23 @@ package com.android.inputmethod.latin.dicttool; +import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; +import com.android.inputmethod.latin.makedict.WeightedString; +import com.android.inputmethod.latin.makedict.WordProperty; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import java.io.BufferedInputStream; import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; +import java.io.BufferedWriter; +import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.Writer; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.TreeSet; @@ -37,10 +41,6 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - /** * Reads and writes XML files for a FusionDictionary. * @@ -52,14 +52,10 @@ public class XmlDictInputOutput { private static final String WORD_TAG = "w"; private static final String BIGRAM_TAG = "bigram"; private static final String SHORTCUT_TAG = "shortcut"; - private static final String FREQUENCY_ATTR = "f"; + private static final String PROBABILITY_ATTR = "f"; private static final String WORD_ATTR = "word"; private static final String NOT_A_WORD_ATTR = "not_a_word"; - private static final String OPTIONS_KEY = "options"; - private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing"; - private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing"; - /** * SAX handler for a unigram XML file. */ @@ -68,6 +64,7 @@ public class XmlDictInputOutput { private static final int START = 1; private static final int WORD = 2; private static final int UNKNOWN = 3; + private static final int SHORTCUT_ONLY_WORD_PROBABILITY = 1; FusionDictionary mDictionary; int mState; // the state of the parser @@ -92,7 +89,8 @@ public class XmlDictInputOutput { final FusionDictionary dict = mDictionary; for (final String shortcutOnly : mShortcutsMap.keySet()) { if (dict.hasWord(shortcutOnly)) continue; - dict.add(shortcutOnly, 1, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */); + dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY), + mShortcutsMap.get(shortcutOnly), true /* isNotAWord */); } mDictionary = null; mShortcutsMap.clear(); @@ -109,23 +107,18 @@ public class XmlDictInputOutput { mWord = ""; for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { final String attrName = attrs.getLocalName(attrIndex); - if (FREQUENCY_ATTR.equals(attrName)) { + if (PROBABILITY_ATTR.equals(attrName)) { mFreq = Integer.parseInt(attrs.getValue(attrIndex)); } } } else if (ROOT_TAG.equals(localName)) { - final HashMap<String, String> attributes = new HashMap<String, String>(); + final HashMap<String, String> attributes = new HashMap<>(); for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { final String attrName = attrs.getLocalName(attrIndex); attributes.put(attrName, attrs.getValue(attrIndex)); } - final String optionsString = attributes.get(OPTIONS_KEY); - final boolean processUmlauts = - GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString); - final boolean processLigatures = - FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString); mDictionary = new FusionDictionary(new PtNodeArray(), - new DictionaryOptions(attributes, processUmlauts, processLigatures)); + new DictionaryOptions(attributes)); } else { mState = UNKNOWN; } @@ -144,7 +137,8 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { - mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */); + mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord), + false /* isNotAWord */); mState = START; } } @@ -174,7 +168,7 @@ public class XmlDictInputOutput { DST_ATTRIBUTE = dstAttribute; DST_FREQ = dstFreq; mSrc = null; - mAssocMap = new HashMap<String, ArrayList<WeightedString>>(); + mAssocMap = new HashMap<>(); } @Override @@ -186,7 +180,7 @@ public class XmlDictInputOutput { int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ)); WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO); ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc); - if (null == bigramList) bigramList = new ArrayList<WeightedString>(); + if (null == bigramList) bigramList = new ArrayList<>(); bigramList.add(bigram); mAssocMap.put(mSrc, bigramList); } @@ -246,14 +240,13 @@ public class XmlDictInputOutput { protected int getValueFromFreqString(final String freqString) { if (WHITELIST_MARKER.equals(freqString)) { return WHITELIST_FREQ_VALUE; - } else { - final int intValue = super.getValueFromFreqString(freqString); - if (intValue < MIN_FREQ || intValue > MAX_FREQ) { - throw new RuntimeException("Shortcut freq out of range. Accepted range is " - + MIN_FREQ + ".." + MAX_FREQ); - } - return intValue; } + final int intValue = super.getValueFromFreqString(freqString); + if (intValue < MIN_FREQ || intValue > MAX_FREQ) { + throw new RuntimeException("Shortcut freq out of range. Accepted range is " + + MIN_FREQ + ".." + MAX_FREQ); + } + return intValue; } // As per getAssocMap(), this never returns null. @@ -271,23 +264,12 @@ public class XmlDictInputOutput { * @return true if the file is in the unigram XML format, false otherwise */ public static boolean isXmlUnigramDictionary(final String filename) { - BufferedReader reader = null; - try { - reader = new BufferedReader(new FileReader(new File(filename))); + try (final BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(filename), "UTF-8"))) { final String firstLine = reader.readLine(); return firstLine.matches("^\\s*<wordlist .*>\\s*$"); - } catch (FileNotFoundException e) { + } catch (final IOException e) { return false; - } catch (IOException e) { - return false; - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - // do nothing - } - } } } @@ -302,8 +284,8 @@ public class XmlDictInputOutput { * @param bigrams the file to read the bigrams from, or null. * @return the in-memory representation of the dictionary. */ - public static FusionDictionary readDictionaryXml(final InputStream unigrams, - final InputStream shortcuts, final InputStream bigrams) + public static FusionDictionary readDictionaryXml(final BufferedInputStream unigrams, + final BufferedInputStream shortcuts, final BufferedInputStream bigrams) throws SAXException, IOException, ParserConfigurationException { final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); @@ -325,7 +307,7 @@ public class XmlDictInputOutput { final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord); for (final WeightedString bigram : bigramList) { if (!dict.hasWord(bigram.mWord)) continue; - dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency); + dict.setBigram(firstWord, bigram.mWord, bigram.mProbabilityInfo); } } return dict; @@ -352,44 +334,40 @@ public class XmlDictInputOutput { * @param destination a destination stream to write to. * @param dict the dictionary to write. */ - public static void writeDictionaryXml(Writer destination, FusionDictionary dict) - throws IOException { - final TreeSet<Word> set = new TreeSet<Word>(); - for (Word word : dict) { - set.add(word); + public static void writeDictionaryXml(final BufferedWriter destination, + final FusionDictionary dict) throws IOException { + final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>(); + for (WordProperty wordProperty : dict) { + wordPropertiesInDict.add(wordProperty); } // TODO: use an XMLSerializer if this gets big destination.write("<wordlist format=\"2\""); - final HashMap<String, String> options = dict.mOptions.mAttributes; - if (dict.mOptions.mGermanUmlautProcessing) { - destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\""); - } else if (dict.mOptions.mFrenchLigatureProcessing) { - destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\""); - } for (final String key : dict.mOptions.mAttributes.keySet()) { final String value = dict.mOptions.mAttributes.get(key); destination.write(" " + key + "=\"" + value + "\""); } destination.write(">\n"); destination.write("<!-- Warning: there is no code to read this format yet. -->\n"); - for (Word word : set) { - destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" " - + FREQUENCY_ATTR + "=\"" + word.mFrequency - + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">"); - if (null != word.mShortcutTargets) { + for (WordProperty wordProperty : wordPropertiesInDict) { + destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord + + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() + + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + + "\">"); + if (null != wordProperty.mShortcutTargets) { destination.write("\n"); - for (WeightedString target : word.mShortcutTargets) { - destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\"" - + target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG + for (WeightedString target : wordProperty.mShortcutTargets) { + destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" + + target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG + ">\n"); } destination.write(" "); } - if (null != word.mBigrams) { + if (null != wordProperty.mBigrams) { destination.write("\n"); - for (WeightedString bigram : word.mBigrams) { - destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\"" - + bigram.mFrequency + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n"); + for (WeightedString bigram : wordProperty.mBigrams) { + destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" + + bigram.getProbability() + "\">" + bigram.mWord + + "</" + BIGRAM_TAG + ">\n"); } destination.write(" "); } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java b/tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java deleted file mode 100644 index 7eccff2b4..000000000 --- a/tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.latin.makedict; - -/** - * Wrapper to redirect log events to the right output medium. - */ -public class MakedictLog { - public static final boolean DBG = true; - - private static void print(String message) { - System.out.println(message); - } - - public static void d(String message) { - print(message); - } - - public static void i(String message) { - print(message); - } - - public static void w(String message) { - print(message); - } - - public static void e(String message) { - print(message); - } -} diff --git a/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java b/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java new file mode 100644 index 000000000..a4ad6b514 --- /dev/null +++ b/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.personalization; + +public class PersonalizationHelper { + public static void currentTimeChangedForTesting(final int currentTimestamp) { + } +} |