aboutsummaryrefslogtreecommitdiffstats
path: root/tools/dicttool/src
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dicttool/src')
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java114
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java179
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java34
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java74
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java49
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java80
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java29
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java13
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java41
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java130
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java44
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java22
12 files changed, 401 insertions, 408 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index e571bc21d..3ef03f4bd 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -17,20 +17,22 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
+import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
-import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import org.xml.sax.SAXException;
-import java.io.File;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
@@ -51,14 +53,17 @@ public final class BinaryDictOffdeviceUtils {
public final static String ENCRYPTION = "encrypted";
private final static int MAX_DECODE_DEPTH = 8;
+ private final static int COPY_BUFFER_SIZE = 8192;
public static class DecoderChainSpec {
- ArrayList<String> mDecoderSpec = new ArrayList<String>();
+ ArrayList<String> mDecoderSpec = new ArrayList<>();
File mFile;
+
public DecoderChainSpec addStep(final String stepDescription) {
mDecoderSpec.add(stepDescription);
return this;
}
+
public String describeChain() {
final StringBuilder s = new StringBuilder("raw");
for (final String step : mDecoderSpec) {
@@ -70,13 +75,10 @@ public final class BinaryDictOffdeviceUtils {
}
public static void copy(final InputStream input, final OutputStream output) throws IOException {
- final byte[] buffer = new byte[1000];
- final BufferedInputStream in = new BufferedInputStream(input);
- final BufferedOutputStream out = new BufferedOutputStream(output);
- for (int readBytes = in.read(buffer); readBytes >= 0; readBytes = in.read(buffer))
+ final byte[] buffer = new byte[COPY_BUFFER_SIZE];
+ for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
output.write(buffer, 0, readBytes);
- in.close();
- out.close();
+ }
}
/**
@@ -131,11 +133,15 @@ public final class BinaryDictOffdeviceUtils {
try {
final File dst = File.createTempFile(PREFIX, SUFFIX);
dst.deleteOnExit();
- final FileOutputStream dstStream = new FileOutputStream(dst);
- copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
- new BufferedOutputStream(dstStream)); // #copy() closes the streams
- return dst;
- } catch (IOException e) {
+ try (
+ final InputStream input = Compress.getUncompressedStream(
+ new BufferedInputStream(new FileInputStream(src)));
+ final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
+ ) {
+ copy(input, output);
+ return dst;
+ }
+ } catch (final IOException e) {
// Could not uncompress the file: presumably the file is simply not a compressed file
return null;
}
@@ -150,20 +156,20 @@ public final class BinaryDictOffdeviceUtils {
try {
final File dst = File.createTempFile(PREFIX, SUFFIX);
dst.deleteOnExit();
- final FileOutputStream dstStream = new FileOutputStream(dst);
- copy(Crypt.getDecryptedStream(new BufferedInputStream(new FileInputStream(src))),
- dstStream); // #copy() closes the streams
- return dst;
- } catch (IOException e) {
+ try (
+ final InputStream input = Crypt.getDecryptedStream(
+ new BufferedInputStream(new FileInputStream(src)));
+ final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
+ ) {
+ copy(input, output);
+ return dst;
+ }
+ } catch (final IOException e) {
// Could not decrypt the file: presumably the file is simply not a crypted file
return null;
}
}
- static void crash(final String filename, final Exception e) {
- throw new RuntimeException("Can't read file " + filename, e);
- }
-
static FusionDictionary getDictionary(final String filename, final boolean report) {
final File file = new File(filename);
if (report) {
@@ -172,44 +178,40 @@ public final class BinaryDictOffdeviceUtils {
}
try {
if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
- if (report) System.out.println("Format : XML unigram list");
+ if (report) {
+ System.out.println("Format : XML unigram list");
+ }
return XmlDictInputOutput.readDictionaryXml(
new BufferedInputStream(new FileInputStream(file)),
null /* shortcuts */, null /* bigrams */);
- } else {
- final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
- if (null == decodedSpec) {
- crash(filename, new RuntimeException(
- filename + " does not seem to be a dictionary file"));
- } else if (CombinedInputOutput.isCombinedDictionary(
- decodedSpec.mFile.getAbsolutePath())){
- if (report) {
- System.out.println("Format : Combined format");
- System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
- }
- return CombinedInputOutput.readDictionaryCombined(
- new BufferedInputStream(new FileInputStream(decodedSpec.mFile)));
- } else {
- final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile,
- DictDecoder.USE_BYTEARRAY);
- if (report) {
- System.out.println("Format : Binary dictionary format");
- System.out.println("Packaging : " + decodedSpec.describeChain());
- System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
- }
- return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
+ }
+ final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
+ if (null == decodedSpec) {
+ throw new RuntimeException("Does not seem to be a dictionary file " + filename);
+ }
+ if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
+ if (report) {
+ System.out.println("Format : Combined format");
+ System.out.println("Packaging : " + decodedSpec.describeChain());
+ System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
}
+ try (final BufferedReader reader = new BufferedReader(
+ new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
+ return CombinedInputOutput.readDictionaryCombined(reader);
+ }
+ }
+ final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
+ decodedSpec.mFile, 0, decodedSpec.mFile.length(),
+ DictDecoder.USE_BYTEARRAY);
+ if (report) {
+ System.out.println("Format : Binary dictionary format");
+ System.out.println("Packaging : " + decodedSpec.describeChain());
+ System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
}
- } catch (IOException e) {
- crash(filename, e);
- } catch (SAXException e) {
- crash(filename, e);
- } catch (ParserConfigurationException e) {
- crash(filename, e);
- } catch (UnsupportedFormatException e) {
- crash(filename, e);
+ return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
+ } catch (final IOException | SAXException | ParserConfigurationException |
+ UnsupportedFormatException e) {
+ throw new RuntimeException("Can't read file " + filename, e);
}
- return null;
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index 4b6716936..23cbee81c 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -17,20 +17,18 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
-import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.ProbabilityInfo;
+import com.android.inputmethod.latin.makedict.WeightedString;
+import com.android.inputmethod.latin.makedict.WordProperty;
+import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeSet;
@@ -41,18 +39,10 @@ import java.util.TreeSet;
* All functions in this class are static.
*/
public class CombinedInputOutput {
-
- private static final String DICTIONARY_TAG = "dictionary";
- private static final String BIGRAM_TAG = "bigram";
- private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_TAG = "f";
- private static final String WORD_TAG = "word";
- private static final String NOT_A_WORD_TAG = "not_a_word";
private static final String WHITELIST_TAG = "whitelist";
private static final String OPTIONS_TAG = "options";
- private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
- private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
private static final String COMMENT_LINE_STARTER = "#";
+ private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;
/**
* Basic test to find out whether the file is in the combined format or not.
@@ -63,26 +53,15 @@ public class CombinedInputOutput {
* @return true if the file is in the combined format, false otherwise
*/
public static boolean isCombinedDictionary(final String filename) {
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new FileReader(new File(filename)));
+ try (final BufferedReader reader = new BufferedReader(new FileReader(filename))) {
String firstLine = reader.readLine();
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
firstLine = reader.readLine();
}
- return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
- } catch (FileNotFoundException e) {
- return false;
- } catch (IOException e) {
+ return firstLine.matches(
+ "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
+ } catch (final IOException e) {
return false;
- } finally {
- if (reader != null) {
- try {
- reader.close();
- } catch (IOException e) {
- // do nothing
- }
- }
}
}
@@ -92,18 +71,17 @@ public class CombinedInputOutput {
* This is the public method that will read a combined file and return the corresponding memory
* representation.
*
- * @param source the file to read the data from.
+ * @param reader the buffered reader to read the data from.
* @return the in-memory representation of the dictionary.
*/
- public static FusionDictionary readDictionaryCombined(final InputStream source)
+ public static FusionDictionary readDictionaryCombined(final BufferedReader reader)
throws IOException {
- final BufferedReader reader = new BufferedReader(new InputStreamReader(source, "UTF-8"));
String headerLine = reader.readLine();
while (headerLine.startsWith(COMMENT_LINE_STARTER)) {
headerLine = reader.readLine();
}
final String header[] = headerLine.split(",");
- final HashMap<String, String> attributes = new HashMap<String, String>();
+ final HashMap<String, String> attributes = new HashMap<>();
for (String item : header) {
final String keyValue[] = item.split("=");
if (2 != keyValue.length) {
@@ -112,53 +90,62 @@ public class CombinedInputOutput {
attributes.put(keyValue[0], keyValue[1]);
}
- final boolean processUmlauts =
- GERMAN_UMLAUT_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
- final boolean processLigatures =
- FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
attributes.remove(OPTIONS_TAG);
- final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
- attributes, processUmlauts, processLigatures));
+ final FusionDictionary dict =
+ new FusionDictionary(new PtNodeArray(), new DictionaryOptions(attributes));
String line;
String word = null;
- int freq = 0;
+ ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
boolean isNotAWord = false;
- ArrayList<WeightedString> bigrams = new ArrayList<WeightedString>();
- ArrayList<WeightedString> shortcuts = new ArrayList<WeightedString>();
+ ArrayList<WeightedString> bigrams = new ArrayList<>();
+ ArrayList<WeightedString> shortcuts = new ArrayList<>();
while (null != (line = reader.readLine())) {
if (line.startsWith(COMMENT_LINE_STARTER)) continue;
final String args[] = line.trim().split(",");
- if (args[0].matches(WORD_TAG + "=.*")) {
+ if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
- dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
+ dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
+ isNotAWord);
for (WeightedString s : bigrams) {
- dict.setBigram(word, s.mWord, s.mFrequency);
+ dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
}
- if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>();
- if (!bigrams.isEmpty()) bigrams = new ArrayList<WeightedString>();
+ if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
+ if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
isNotAWord = false;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (WORD_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
word = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
- freq = Integer.parseInt(params[1]);
- } else if (NOT_A_WORD_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
+ probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+ probabilityInfo.mTimestamp, probabilityInfo.mLevel,
+ probabilityInfo.mCount);
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
+ Integer.parseInt(historicalInfoParams[0]),
+ Integer.parseInt(historicalInfoParams[1]),
+ Integer.parseInt(historicalInfoParams[2]));
+ } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
isNotAWord = "true".equals(params[1]);
}
}
- } else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
String shortcut = null;
int shortcutFreq = 0;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (SHORTCUT_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
shortcut = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
shortcutFreq = WHITELIST_TAG.equals(params[1])
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
: Integer.parseInt(params[1]);
@@ -169,29 +156,42 @@ public class CombinedInputOutput {
} else {
throw new RuntimeException("Wrong format : " + line);
}
- } else if (args[0].matches(BIGRAM_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
String secondWordOfBigram = null;
- int bigramFreq = 0;
+ ProbabilityInfo bigramProbabilityInfo = new ProbabilityInfo(0);
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (BIGRAM_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
secondWordOfBigram = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
- bigramFreq = Integer.parseInt(params[1]);
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
+ bigramProbabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+ bigramProbabilityInfo.mTimestamp, bigramProbabilityInfo.mLevel,
+ bigramProbabilityInfo.mCount);
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ bigramProbabilityInfo = new ProbabilityInfo(
+ bigramProbabilityInfo.mProbability,
+ Integer.parseInt(historicalInfoParams[0]),
+ Integer.parseInt(historicalInfoParams[1]),
+ Integer.parseInt(historicalInfoParams[2]));
}
}
if (null != secondWordOfBigram) {
- bigrams.add(new WeightedString(secondWordOfBigram, bigramFreq));
+ bigrams.add(new WeightedString(secondWordOfBigram, bigramProbabilityInfo));
} else {
throw new RuntimeException("Wrong format : " + line);
}
}
}
if (null != word) {
- dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
+ dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) {
- dict.setBigram(word, s.mWord, s.mFrequency);
+ dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
}
@@ -201,48 +201,19 @@ public class CombinedInputOutput {
/**
* Writes a dictionary to a combined file.
*
- * @param destination a destination stream to write to.
+ * @param destination a destination writer.
* @param dict the dictionary to write.
*/
- public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
- throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word); // This for ordering by frequency, then by asciibetic order
- }
- final HashMap<String, String> options = dict.mOptions.mAttributes;
- destination.write(DICTIONARY_TAG + "=");
- if (options.containsKey(DICTIONARY_TAG)) {
- destination.write(options.get(DICTIONARY_TAG));
- options.remove(DICTIONARY_TAG);
+ public static void writeDictionaryCombined(final BufferedWriter destination,
+ final FusionDictionary dict) throws IOException {
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
+ for (final WordProperty wordProperty : dict) {
+ // This for ordering by frequency, then by asciibetic order
+ wordPropertiesInDict.add(wordProperty);
}
- if (dict.mOptions.mGermanUmlautProcessing) {
- destination.write("," + OPTIONS_TAG + "=" + GERMAN_UMLAUT_PROCESSING_OPTION);
- } else if (dict.mOptions.mFrenchLigatureProcessing) {
- destination.write("," + OPTIONS_TAG + "=" + FRENCH_LIGATURE_PROCESSING_OPTION);
- }
- for (final String key : dict.mOptions.mAttributes.keySet()) {
- final String value = dict.mOptions.mAttributes.get(key);
- destination.write("," + key + "=" + value);
- }
- destination.write("\n");
- for (Word word : set) {
- destination.write(" " + WORD_TAG + "=" + word.mWord + ","
- + FREQUENCY_TAG + "=" + word.mFrequency
- + (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
- if (null != word.mShortcutTargets) {
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
- + FREQUENCY_TAG + "=" + target.mFrequency + "\n");
- }
- }
- if (null != word.mBigrams) {
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
- + FREQUENCY_TAG + "=" + bigram.mFrequency + "\n");
- }
- }
+ destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
+ for (final WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
}
- destination.close();
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
index b7f48b522..728a159a0 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
@@ -16,11 +16,6 @@
package com.android.inputmethod.latin.dicttool;
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -32,8 +27,7 @@ public class Compress {
// This container class is not publicly instantiable.
}
- public static OutputStream getCompressedStream(final OutputStream out)
- throws java.io.IOException {
+ public static OutputStream getCompressedStream(final OutputStream out) throws IOException {
return new GZIPOutputStream(out);
}
@@ -43,7 +37,6 @@ public class Compress {
static public class Compressor extends Dicttool.Command {
public static final String COMMAND = "compress";
- public static final String STDIN_OR_STDOUT = "-";
public Compressor() {
}
@@ -61,17 +54,18 @@ public class Compress {
}
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
- final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
- : new BufferedInputStream(new FileInputStream(new File(inFilename)));
- final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
- : new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
- BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
+ try (
+ final InputStream input = getFileInputStreamOrStdIn(inFilename);
+ final OutputStream compressedOutput = getCompressedStream(
+ getFileOutputStreamOrStdOut(outFilename))
+ ) {
+ BinaryDictOffdeviceUtils.copy(input, compressedOutput);
+ }
}
}
static public class Uncompressor extends Dicttool.Command {
public static final String COMMAND = "uncompress";
- public static final String STDIN_OR_STDOUT = "-";
public Uncompressor() {
}
@@ -89,11 +83,13 @@ public class Compress {
}
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
- final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
- : new BufferedInputStream(new FileInputStream(new File(inFilename)));
- final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
- : new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
- BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
+ try (
+ final InputStream uncompressedInput = getUncompressedStream(
+ getFileInputStreamOrStdIn(inFilename));
+ final OutputStream output = getFileOutputStreamOrStdOut(outFilename)
+ ) {
+ BinaryDictOffdeviceUtils.copy(uncompressedInput, output);
+ }
}
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 5c7e8b4f2..3d0557b5c 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -17,28 +17,33 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
+import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.MakedictLog;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
-import com.android.inputmethod.latin.makedict.Ver3DictEncoder;
+import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
import com.android.inputmethod.latin.makedict.Ver4DictEncoder;
+import org.xml.sax.SAXException;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.LinkedList;
import javax.xml.parsers.ParserConfigurationException;
-import org.xml.sax.SAXException;
-
/**
* Main class/method for DictionaryMaker.
*/
@@ -46,7 +51,6 @@ public class DictionaryMaker {
static class Arguments {
private static final String OPTION_VERSION_2 = "-2";
- private static final String OPTION_VERSION_3 = "-3";
private static final String OPTION_VERSION_4 = "-4";
private static final String OPTION_INPUT_SOURCE = "-s";
private static final String OPTION_INPUT_BIGRAM_XML = "-b";
@@ -138,7 +142,7 @@ public class DictionaryMaker {
}
public Arguments(String[] argsArray) throws IOException {
- final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
+ final LinkedList<String> args = new LinkedList<>(Arrays.asList(argsArray));
if (args.isEmpty()) {
displayHelp();
}
@@ -158,10 +162,8 @@ public class DictionaryMaker {
if (arg.charAt(0) == '-') {
if (OPTION_VERSION_2.equals(arg)) {
// Do nothing, this is the default
- } else if (OPTION_VERSION_3.equals(arg)) {
- outputBinaryFormatVersion = 3;
} else if (OPTION_VERSION_4.equals(arg)) {
- outputBinaryFormatVersion = 4;
+ outputBinaryFormatVersion = FormatSpec.VERSION4;
} else if (OPTION_HELP.equals(arg)) {
displayHelp();
} else {
@@ -267,8 +269,8 @@ public class DictionaryMaker {
private static FusionDictionary readBinaryFile(final String binaryFilename)
throws FileNotFoundException, IOException, UnsupportedFormatException {
final File file = new File(binaryFilename);
- final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
- return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
+ final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length());
+ return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
}
/**
@@ -281,22 +283,21 @@ public class DictionaryMaker {
*/
private static FusionDictionary readCombinedFile(final String combinedFilename)
throws FileNotFoundException, IOException {
- FileInputStream inStream = null;
- try {
- final File file = new File(combinedFilename);
- inStream = new FileInputStream(file);
- return CombinedInputOutput.readDictionaryCombined(inStream);
- } finally {
- if (null != inStream) {
- try {
- inStream.close();
- } catch (IOException e) {
- // do nothing
- }
- }
+ try (final BufferedReader reader = new BufferedReader(new InputStreamReader(
+ new FileInputStream(combinedFilename), "UTF-8"))
+ ) {
+ return CombinedInputOutput.readDictionaryCombined(reader);
}
}
+ private static BufferedInputStream getBufferedFileInputStream(final String filename)
+ throws FileNotFoundException {
+ if (filename == null) {
+ return null;
+ }
+ return new BufferedInputStream(new FileInputStream(filename));
+ }
+
/**
* Read a dictionary from a unigram XML file, and optionally a bigram XML file.
*
@@ -312,12 +313,13 @@ public class DictionaryMaker {
private static FusionDictionary readXmlFile(final String unigramXmlFilename,
final String shortcutXmlFilename, final String bigramXmlFilename)
throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
- final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
- final FileInputStream shortcuts = null == shortcutXmlFilename ? null :
- new FileInputStream(new File(shortcutXmlFilename));
- final FileInputStream bigrams = null == bigramXmlFilename ? null :
- new FileInputStream(new File(bigramXmlFilename));
- return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
+ try (
+ final BufferedInputStream unigrams = getBufferedFileInputStream(unigramXmlFilename);
+ final BufferedInputStream shortcuts = getBufferedFileInputStream(shortcutXmlFilename);
+ final BufferedInputStream bigrams = getBufferedFileInputStream(bigramXmlFilename);
+ ) {
+ return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
+ }
}
/**
@@ -358,10 +360,10 @@ public class DictionaryMaker {
final File outputFile = new File(outputFilename);
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
final DictEncoder dictEncoder;
- if (version == 4) {
+ if (version == FormatSpec.VERSION4) {
dictEncoder = new Ver4DictEncoder(outputFile);
} else {
- dictEncoder = new Ver3DictEncoder(outputFile);
+ dictEncoder = new Ver2DictEncoder(outputFile);
}
dictEncoder.writeDictionary(dict, formatOptions);
}
@@ -376,8 +378,9 @@ public class DictionaryMaker {
*/
private static void writeXmlDictionary(final String outputFilename,
final FusionDictionary dict) throws FileNotFoundException, IOException {
- XmlDictInputOutput.writeDictionaryXml(new BufferedWriter(new FileWriter(outputFilename)),
- dict);
+ try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) {
+ XmlDictInputOutput.writeDictionaryXml(writer, dict);
+ }
}
/**
@@ -390,7 +393,8 @@ public class DictionaryMaker {
*/
private static void writeCombinedDictionary(final String outputFilename,
final FusionDictionary dict) throws FileNotFoundException, IOException {
- CombinedInputOutput.writeDictionaryCombined(
- new BufferedWriter(new FileWriter(outputFilename)), dict);
+ try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) {
+ CombinedInputOutput.writeDictionaryCombined(writer, dict);
+ }
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java
index cacee5268..e49b35084 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Dicttool.java
@@ -16,24 +16,63 @@
package com.android.inputmethod.latin.dicttool;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashMap;
public class Dicttool {
public static abstract class Command {
+ public static final String STDIN_OR_STDOUT = "-";
protected String[] mArgs;
+
public void setArgs(String[] args) throws IllegalArgumentException {
mArgs = args;
}
+
+ protected static InputStream getFileInputStreamOrStdIn(final String inFilename)
+ throws FileNotFoundException {
+ if (STDIN_OR_STDOUT.equals(inFilename)) {
+ return System.in;
+ }
+ return getFileInputStream(new File(inFilename));
+ }
+
+ protected static InputStream getFileInputStream(final File inFile)
+ throws FileNotFoundException {
+ return new BufferedInputStream(new FileInputStream(inFile));
+ }
+
+ protected static OutputStream getFileOutputStreamOrStdOut(final String outFilename)
+ throws FileNotFoundException {
+ if (STDIN_OR_STDOUT.equals(outFilename)) {
+ return System.out;
+ }
+ return getFileOutputStream(new File(outFilename));
+ }
+
+ protected static OutputStream getFileOutputStream(final File outFile)
+ throws FileNotFoundException {
+ return new BufferedOutputStream(new FileOutputStream(outFile));
+ }
+
abstract public String getHelp();
abstract public void run() throws Exception;
}
- static HashMap<String, Class<? extends Command>> sCommands =
- new HashMap<String, Class<? extends Command>>();
+
+ static HashMap<String, Class<? extends Command>> sCommands = new HashMap<>();
+
static {
CommandList.populate();
}
+
public static void addCommand(final String commandName, final Class<? extends Command> cls) {
sCommands.put(commandName, cls);
}
@@ -61,7 +100,7 @@ public class Dicttool {
return sCommands.containsKey(commandName);
}
- private Command getCommand(final String[] arguments) {
+ private static Command getCommand(final String[] arguments) {
final String commandName = arguments[0];
if (!isCommand(commandName)) {
throw new RuntimeException("Unknown command : " + commandName);
@@ -77,7 +116,7 @@ public class Dicttool {
* @param arguments the arguments passed to dicttool.
* @return 0 for success, an error code otherwise (always 1 at the moment)
*/
- private int execute(final String[] arguments) {
+ private static int execute(final String[] arguments) {
final Command command = getCommand(arguments);
try {
command.run();
@@ -96,6 +135,6 @@ public class Dicttool {
return;
}
// Exit with the success/error code from #execute() as status.
- System.exit(new Dicttool().execute(arguments));
+ System.exit(execute(arguments));
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index 66fd084cd..94d1ae8bb 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -18,8 +18,8 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WeightedString;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -85,20 +85,7 @@ public class Diff extends Dicttool.Command {
private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
- if (dict0.mOptions.mFrenchLigatureProcessing != dict1.mOptions.mFrenchLigatureProcessing) {
- System.out.println(" French ligature processing : "
- + dict0.mOptions.mFrenchLigatureProcessing + " <=> "
- + dict1.mOptions.mFrenchLigatureProcessing);
- hasDifferences = true;
- }
- else if (dict0.mOptions.mGermanUmlautProcessing != dict1.mOptions.mGermanUmlautProcessing) {
- System.out.println(" German umlaut processing : "
- + dict0.mOptions.mGermanUmlautProcessing + " <=> "
- + dict1.mOptions.mGermanUmlautProcessing);
- hasDifferences = true;
- }
- final HashMap<String, String> options1 =
- new HashMap<String, String>(dict1.mOptions.mAttributes);
+ final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes);
for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
if (!dict0.mOptions.mAttributes.get(optionKey).equals(
dict1.mOptions.mAttributes.get(optionKey))) {
@@ -120,42 +107,47 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
- for (final Word word0 : dict0) {
- final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
- word0.mWord);
- if (null == word1) {
+ for (final WordProperty word0Property : dict0) {
+ final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
+ word0Property.mWord);
+ if (null == word1PtNode) {
// This word is not in dict1
- System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
+ System.out.println("Deleted: " + word0Property.mWord + " "
+ + word0Property.getProbability());
hasDifferences = true;
} else {
// We found the word. Compare frequencies, shortcuts, bigrams
- if (word0.mFrequency != word1.getFrequency()) {
- System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
- + " -> " + word1.getFrequency());
+ if (word0Property.getProbability() != word1PtNode.getProbability()) {
+ System.out.println("Probability changed: " + word0Property.mWord + " "
+ + word0Property.getProbability() + " -> "
+ + word1PtNode.getProbability());
hasDifferences = true;
}
- if (word0.mIsNotAWord != word1.getIsNotAWord()) {
- System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
- + " -> " + word1.getIsNotAWord());
+ if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
+ System.out.println("Not a word: " + word0Property.mWord + " "
+ + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
hasDifferences = true;
}
- if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
- System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
- + " -> " + word1.getIsBlacklistEntry());
+ if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
+ System.out.println("Blacklist: " + word0Property.mWord + " "
+ + word0Property.mIsBlacklistEntry + " -> "
+ + word1PtNode.getIsBlacklistEntry());
hasDifferences = true;
}
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Bigram", word0.mBigrams, word1.getBigrams());
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Shortcut", word0.mShortcutTargets, word1.getShortcutTargets());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Shortcut", word0Property.mShortcutTargets,
+ word1PtNode.getShortcutTargets());
}
}
- for (final Word word1 : dict1) {
- final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
- word1.mWord);
- if (null == word0) {
+ for (final WordProperty word1Property : dict1) {
+ final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
+ word1Property.mWord);
+ if (null == word0PtNode) {
// This word is not in dict0
- System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
+ System.out.println("Added: " + word1Property.mWord + " "
+ + word1Property.getProbability());
hasDifferences = true;
}
}
@@ -171,7 +163,7 @@ public class Diff extends Dicttool.Command {
if (null == list0) return false;
for (final WeightedString attribute0 : list0) {
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
- + attribute0.mFrequency);
+ + attribute0.getProbability());
}
return true;
}
@@ -187,8 +179,8 @@ public class Diff extends Dicttool.Command {
for (final WeightedString attribute1 : list1) {
if (attribute0.mWord.equals(attribute1.mWord)) {
System.out.println(type + " freq changed: " + word + " "
- + attribute0.mWord + " " + attribute0.mFrequency + " -> "
- + attribute1.mFrequency);
+ + attribute0.mWord + " " + attribute0.getProbability() + " -> "
+ + attribute1.getProbability());
list1.remove(attribute1);
foundString = true;
break;
@@ -197,7 +189,7 @@ public class Diff extends Dicttool.Command {
if (!foundString) {
// We come here if we haven't found any matching string.
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
- + attribute0.mFrequency);
+ + attribute0.getProbability());
}
} else {
list1.remove(attribute0);
@@ -209,7 +201,7 @@ public class Diff extends Dicttool.Command {
for (final WeightedString attribute1 : list1) {
hasDifferences = true;
System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
- + attribute1.mFrequency);
+ + attribute1.getProbability());
}
return hasDifferences;
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 350f42772..9b2567fd3 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -19,8 +19,8 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WeightedString;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -43,15 +43,16 @@ public class Info extends Dicttool.Command {
int bigramCount = 0;
int shortcutCount = 0;
int whitelistCount = 0;
- for (final Word w : dict) {
+ for (final WordProperty wordProperty : dict) {
++wordCount;
- if (null != w.mBigrams) {
- bigramCount += w.mBigrams.size();
+ if (null != wordProperty.mBigrams) {
+ bigramCount += wordProperty.mBigrams.size();
}
- if (null != w.mShortcutTargets) {
- shortcutCount += w.mShortcutTargets.size();
- for (WeightedString shortcutTarget : w.mShortcutTargets) {
- if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency) {
+ if (null != wordProperty.mShortcutTargets) {
+ shortcutCount += wordProperty.mShortcutTargets.size();
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
+ == shortcutTarget.getProbability()) {
++whitelistCount;
}
}
@@ -71,7 +72,7 @@ public class Info extends Dicttool.Command {
return;
}
System.out.println("Word: " + word);
- System.out.println(" Freq: " + ptNode.getFrequency());
+ System.out.println(" Freq: " + ptNode.getProbability());
if (ptNode.getIsNotAWord()) {
System.out.println(" Is not a word");
}
@@ -84,8 +85,9 @@ public class Info extends Dicttool.Command {
} else {
for (final WeightedString shortcutTarget : shortcutTargets) {
System.out.println(" Shortcut target: " + shortcutTarget.mWord + " ("
- + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency
- ? "whitelist" : shortcutTarget.mFrequency) + ")");
+ + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
+ == shortcutTarget.getProbability() ?
+ "whitelist" : shortcutTarget.getProbability()) + ")");
}
}
final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
@@ -93,7 +95,8 @@ public class Info extends Dicttool.Command {
System.out.println(" No bigrams");
} else {
for (final WeightedString bigram : bigrams) {
- System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")");
+ System.out.println(
+ " Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")");
}
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
index dff3387be..1f6798269 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
@@ -21,8 +21,9 @@ import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
public class Package {
private Package() {
@@ -86,9 +87,13 @@ public class Package {
}
System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
- final FileOutputStream dstStream = new FileOutputStream(new File(mArgs[1]));
- BinaryDictOffdeviceUtils.copy(new BufferedInputStream(
- new FileInputStream(decodedSpec.mFile)), new BufferedOutputStream(dstStream));
+ try (
+ final InputStream input = getFileInputStream(decodedSpec.mFile);
+ final OutputStream output = new BufferedOutputStream(
+ getFileOutputStreamOrStdOut(mArgs[1]))
+ ) {
+ BinaryDictOffdeviceUtils.copy(input, output);
+ }
}
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index 9174238da..b6383d788 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -18,31 +18,43 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
-import com.android.inputmethod.latin.makedict.BinaryDictIOUtilsTests;
import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
+import com.android.inputmethod.latin.utils.FileUtils;
+import java.io.File;
+import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.nio.file.Files;
import java.util.ArrayList;
/**
* Dicttool command implementing self-tests.
*/
public class Test extends Dicttool.Command {
+ private static final String getTmpDir() {
+ try {
+ return Files.createTempDirectory("dicttool").toString();
+ } catch (IOException e) {
+ throw new RuntimeException("Can't get temporary directory", e);
+ }
+ }
+ private static final String TEST_TMP_DIR_BASE = getTmpDir();
+ public static final File TEST_TMP_DIR = new File(TEST_TMP_DIR_BASE);
public static final String COMMAND = "test";
+ private static final int DEFAULT_MAX_UNIGRAMS = 1500;
private long mSeed = System.currentTimeMillis();
- private int mMaxUnigrams = BinaryDictIOUtilsTests.DEFAULT_MAX_UNIGRAMS;
+ private int mMaxUnigrams = DEFAULT_MAX_UNIGRAMS;
private static final Class<?>[] sClassesToTest = {
BinaryDictOffdeviceUtilsTests.class,
FusionDictionaryTest.class,
BinaryDictDecoderEncoderTests.class,
BinaryDictEncoderFlattenTreeTests.class,
- BinaryDictIOUtilsTests.class
};
- private ArrayList<Method> mAllTestMethods = new ArrayList<Method>();
- private ArrayList<String> mUsedTestMethods = new ArrayList<String>();
+ private ArrayList<Method> mAllTestMethods = new ArrayList<>();
+ private ArrayList<String> mUsedTestMethods = new ArrayList<>();
public Test() {
for (final Class<?> c : sClassesToTest) {
@@ -57,8 +69,12 @@ public class Test extends Dicttool.Command {
@Override
public String getHelp() {
- final StringBuilder s = new StringBuilder("test [-s seed] [-m maxUnigrams] [testName...]\n"
- + "If seed is not specified, the current time is used.\nTest list is:\n");
+ final StringBuilder s = new StringBuilder(
+ "test [-s seed] [-m maxUnigrams] [-n] [testName...]\n"
+ + "If seed is not specified, the current time is used.\n"
+ + "If -n option is provided, do not delete temporary files in "
+ + TEST_TMP_DIR_BASE + "/*.\n"
+ + "Test list is:\n");
for (final Method m : mAllTestMethods) {
s.append(" ");
s.append(m.getName());
@@ -71,17 +87,26 @@ public class Test extends Dicttool.Command {
public void run() throws IllegalAccessException, InstantiationException,
InvocationTargetException {
int i = 0;
+ boolean deleteTmpDir = true;
while (i < mArgs.length) {
final String arg = mArgs[i++];
if ("-s".equals(arg)) {
mSeed = Long.parseLong(mArgs[i++]);
} else if ("-m".equals(arg)) {
mMaxUnigrams = Integer.parseInt(mArgs[i++]);
+ } else if ("-n".equals(arg)) {
+ deleteTmpDir = false;
} else {
mUsedTestMethods.add(arg);
}
}
- runChosenTests();
+ try {
+ runChosenTests();
+ } finally {
+ if (deleteTmpDir) {
+ FileUtils.deleteRecursively(TEST_TMP_DIR);
+ }
+ }
}
private void runChosenTests() throws IllegalAccessException, InstantiationException,
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index 4e99bf979..bdec44761 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -16,19 +16,23 @@
package com.android.inputmethod.latin.dicttool;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
-import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.ProbabilityInfo;
+import com.android.inputmethod.latin.makedict.WeightedString;
+import com.android.inputmethod.latin.makedict.WordProperty;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.Writer;
+import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeSet;
@@ -37,10 +41,6 @@ import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
/**
* Reads and writes XML files for a FusionDictionary.
*
@@ -52,14 +52,10 @@ public class XmlDictInputOutput {
private static final String WORD_TAG = "w";
private static final String BIGRAM_TAG = "bigram";
private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_ATTR = "f";
+ private static final String PROBABILITY_ATTR = "f";
private static final String WORD_ATTR = "word";
private static final String NOT_A_WORD_ATTR = "not_a_word";
- private static final String OPTIONS_KEY = "options";
- private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
- private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
-
/**
* SAX handler for a unigram XML file.
*/
@@ -68,6 +64,7 @@ public class XmlDictInputOutput {
private static final int START = 1;
private static final int WORD = 2;
private static final int UNKNOWN = 3;
+ private static final int SHORTCUT_ONLY_WORD_PROBABILITY = 1;
FusionDictionary mDictionary;
int mState; // the state of the parser
@@ -92,7 +89,8 @@ public class XmlDictInputOutput {
final FusionDictionary dict = mDictionary;
for (final String shortcutOnly : mShortcutsMap.keySet()) {
if (dict.hasWord(shortcutOnly)) continue;
- dict.add(shortcutOnly, 1, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
+ dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY),
+ mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
}
mDictionary = null;
mShortcutsMap.clear();
@@ -109,23 +107,18 @@ public class XmlDictInputOutput {
mWord = "";
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex);
- if (FREQUENCY_ATTR.equals(attrName)) {
+ if (PROBABILITY_ATTR.equals(attrName)) {
mFreq = Integer.parseInt(attrs.getValue(attrIndex));
}
}
} else if (ROOT_TAG.equals(localName)) {
- final HashMap<String, String> attributes = new HashMap<String, String>();
+ final HashMap<String, String> attributes = new HashMap<>();
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex);
attributes.put(attrName, attrs.getValue(attrIndex));
}
- final String optionsString = attributes.get(OPTIONS_KEY);
- final boolean processUmlauts =
- GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
- final boolean processLigatures =
- FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
mDictionary = new FusionDictionary(new PtNodeArray(),
- new DictionaryOptions(attributes, processUmlauts, processLigatures));
+ new DictionaryOptions(attributes));
} else {
mState = UNKNOWN;
}
@@ -144,7 +137,8 @@ public class XmlDictInputOutput {
@Override
public void endElement(String uri, String localName, String qName) {
if (WORD == mState) {
- mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */);
+ mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord),
+ false /* isNotAWord */);
mState = START;
}
}
@@ -174,7 +168,7 @@ public class XmlDictInputOutput {
DST_ATTRIBUTE = dstAttribute;
DST_FREQ = dstFreq;
mSrc = null;
- mAssocMap = new HashMap<String, ArrayList<WeightedString>>();
+ mAssocMap = new HashMap<>();
}
@Override
@@ -186,7 +180,7 @@ public class XmlDictInputOutput {
int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ));
WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
- if (null == bigramList) bigramList = new ArrayList<WeightedString>();
+ if (null == bigramList) bigramList = new ArrayList<>();
bigramList.add(bigram);
mAssocMap.put(mSrc, bigramList);
}
@@ -246,14 +240,13 @@ public class XmlDictInputOutput {
protected int getValueFromFreqString(final String freqString) {
if (WHITELIST_MARKER.equals(freqString)) {
return WHITELIST_FREQ_VALUE;
- } else {
- final int intValue = super.getValueFromFreqString(freqString);
- if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
- throw new RuntimeException("Shortcut freq out of range. Accepted range is "
- + MIN_FREQ + ".." + MAX_FREQ);
- }
- return intValue;
}
+ final int intValue = super.getValueFromFreqString(freqString);
+ if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
+ throw new RuntimeException("Shortcut freq out of range. Accepted range is "
+ + MIN_FREQ + ".." + MAX_FREQ);
+ }
+ return intValue;
}
// As per getAssocMap(), this never returns null.
@@ -271,23 +264,12 @@ public class XmlDictInputOutput {
* @return true if the file is in the unigram XML format, false otherwise
*/
public static boolean isXmlUnigramDictionary(final String filename) {
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new FileReader(new File(filename)));
+ try (final BufferedReader reader = new BufferedReader(
+ new InputStreamReader(new FileInputStream(filename), "UTF-8"))) {
final String firstLine = reader.readLine();
return firstLine.matches("^\\s*<wordlist .*>\\s*$");
- } catch (FileNotFoundException e) {
+ } catch (final IOException e) {
return false;
- } catch (IOException e) {
- return false;
- } finally {
- if (reader != null) {
- try {
- reader.close();
- } catch (IOException e) {
- // do nothing
- }
- }
}
}
@@ -302,8 +284,8 @@ public class XmlDictInputOutput {
* @param bigrams the file to read the bigrams from, or null.
* @return the in-memory representation of the dictionary.
*/
- public static FusionDictionary readDictionaryXml(final InputStream unigrams,
- final InputStream shortcuts, final InputStream bigrams)
+ public static FusionDictionary readDictionaryXml(final BufferedInputStream unigrams,
+ final BufferedInputStream shortcuts, final BufferedInputStream bigrams)
throws SAXException, IOException, ParserConfigurationException {
final SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
@@ -325,7 +307,7 @@ public class XmlDictInputOutput {
final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
for (final WeightedString bigram : bigramList) {
if (!dict.hasWord(bigram.mWord)) continue;
- dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency);
+ dict.setBigram(firstWord, bigram.mWord, bigram.mProbabilityInfo);
}
}
return dict;
@@ -352,44 +334,40 @@ public class XmlDictInputOutput {
* @param destination a destination stream to write to.
* @param dict the dictionary to write.
*/
- public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
- throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word);
+ public static void writeDictionaryXml(final BufferedWriter destination,
+ final FusionDictionary dict) throws IOException {
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
+ for (WordProperty wordProperty : dict) {
+ wordPropertiesInDict.add(wordProperty);
}
// TODO: use an XMLSerializer if this gets big
destination.write("<wordlist format=\"2\"");
- final HashMap<String, String> options = dict.mOptions.mAttributes;
- if (dict.mOptions.mGermanUmlautProcessing) {
- destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\"");
- } else if (dict.mOptions.mFrenchLigatureProcessing) {
- destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\"");
- }
for (final String key : dict.mOptions.mAttributes.keySet()) {
final String value = dict.mOptions.mAttributes.get(key);
destination.write(" " + key + "=\"" + value + "\"");
}
destination.write(">\n");
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
- for (Word word : set) {
- destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
- + FREQUENCY_ATTR + "=\"" + word.mFrequency
- + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
- if (null != word.mShortcutTargets) {
+ for (WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord
+ + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ + "\">");
+ if (null != wordProperty.mShortcutTargets) {
destination.write("\n");
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
- + target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ for (WeightedString target : wordProperty.mShortcutTargets) {
+ destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
+ + target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ ">\n");
}
destination.write(" ");
}
- if (null != word.mBigrams) {
+ if (null != wordProperty.mBigrams) {
destination.write("\n");
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
- + bigram.mFrequency + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n");
+ for (WeightedString bigram : wordProperty.mBigrams) {
+ destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ + bigram.getProbability() + "\">" + bigram.mWord
+ + "</" + BIGRAM_TAG + ">\n");
}
destination.write(" ");
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java b/tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java
deleted file mode 100644
index 7eccff2b4..000000000
--- a/tools/dicttool/src/com/android/inputmethod/latin/makedict/MakedictLog.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-/**
- * Wrapper to redirect log events to the right output medium.
- */
-public class MakedictLog {
- public static final boolean DBG = true;
-
- private static void print(String message) {
- System.out.println(message);
- }
-
- public static void d(String message) {
- print(message);
- }
-
- public static void i(String message) {
- print(message);
- }
-
- public static void w(String message) {
- print(message);
- }
-
- public static void e(String message) {
- print(message);
- }
-}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java b/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java
new file mode 100644
index 000000000..a4ad6b514
--- /dev/null
+++ b/tools/dicttool/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.personalization;
+
+public class PersonalizationHelper {
+ public static void currentTimeChangedForTesting(final int currentTimestamp) {
+ }
+}