diff options
Diffstat (limited to 'tools/dicttool')
5 files changed, 44 insertions, 25 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index 3e3d419e6..7f34ccf20 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -50,7 +50,7 @@ LATINIME_SRC_FILES_FOR_DICTTOOL := \ latin/Dictionary.java \ latin/InputPointers.java \ latin/LastComposedWord.java \ - latin/PrevWordsInfo.java \ + latin/NgramContext.java \ latin/SuggestedWords.java \ latin/WordComposer.java \ latin/settings/NativeSuggestOptions.java \ diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index 3d0557b5c..5dfb7bf11 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -59,6 +59,9 @@ public class DictionaryMaker { private static final String OPTION_OUTPUT_XML = "-x"; private static final String OPTION_OUTPUT_COMBINED = "-o"; private static final String OPTION_HELP = "-h"; + private static final String OPTION_CODE_POINT_TABLE = "-t"; + private static final String OPTION_CODE_POINT_TABLE_OFF = "off"; + private static final String OPTION_CODE_POINT_TABLE_ON = "on"; public final String mInputBinary; public final String mInputCombined; public final String mInputUnigramXml; @@ -68,6 +71,7 @@ public class DictionaryMaker { public final String mOutputXml; public final String mOutputCombined; public final int mOutputBinaryFormatVersion; + public final int mCodePointTableMode; private void checkIntegrity() throws IOException { checkHasExactlyOneInput(); @@ -131,7 +135,7 @@ public class DictionaryMaker { + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] " + "| [-s <combined format input]" + "| [-s <binary input>] [-d <binary output>] [-x <xml output>] " - + " [-o <combined output>]" + + " [-o <combined output>] [-t <code point table switch: on/off/auto>]" + "[-2] [-3] [-4]\n" + "\n" + " Converts a source dictionary file to one or several outputs.\n" @@ -154,7 +158,9 @@ public class DictionaryMaker { String outputBinary = null; String outputXml = null; String outputCombined = null; - int outputBinaryFormatVersion = 2; // the default version is 2. + int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201. + // Don't use code point table by default. + int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF; while (!args.isEmpty()) { final String arg = args.get(0); @@ -172,29 +178,38 @@ public class DictionaryMaker { throw new IllegalArgumentException("Option " + arg + " is unknown or " + "requires an argument"); } - String filename = args.get(0); + String argValue = args.get(0); args.remove(0); if (OPTION_INPUT_SOURCE.equals(arg)) { - if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) { - inputUnigramXml = filename; - } else if (CombinedInputOutput.isCombinedDictionary(filename)) { - inputCombined = filename; - } else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) { - inputBinary = filename; + if (XmlDictInputOutput.isXmlUnigramDictionary(argValue)) { + inputUnigramXml = argValue; + } else if (CombinedInputOutput.isCombinedDictionary(argValue)) { + inputCombined = argValue; + } else if (BinaryDictDecoderUtils.isBinaryDictionary(argValue)) { + inputBinary = argValue; } else { throw new IllegalArgumentException( - "Unknown format for file " + filename); + "Unknown format for file " + argValue); } } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) { - inputShortcutXml = filename; + inputShortcutXml = argValue; } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) { - inputBigramXml = filename; + inputBigramXml = argValue; } else if (OPTION_OUTPUT_BINARY.equals(arg)) { - outputBinary = filename; + outputBinary = argValue; } else if (OPTION_OUTPUT_XML.equals(arg)) { - outputXml = filename; + outputXml = argValue; } else if (OPTION_OUTPUT_COMBINED.equals(arg)) { - outputCombined = filename; + outputCombined = argValue; + } else if (OPTION_CODE_POINT_TABLE.equals(arg)) { + if (OPTION_CODE_POINT_TABLE_OFF.equals(argValue)) { + codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF; + } else if (OPTION_CODE_POINT_TABLE_ON.equals(argValue)) { + codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_ON; + } else { + throw new IllegalArgumentException( + "Unknown argument to -t option : " + argValue); + } } else { throw new IllegalArgumentException("Unknown option : " + arg); } @@ -225,6 +240,7 @@ public class DictionaryMaker { mOutputXml = outputXml; mOutputCombined = outputCombined; mOutputBinaryFormatVersion = outputBinaryFormatVersion; + mCodePointTableMode = codePointTableMode; checkIntegrity(); } } @@ -335,7 +351,8 @@ public class DictionaryMaker { throws FileNotFoundException, IOException, UnsupportedFormatException, IllegalArgumentException { if (null != args.mOutputBinary) { - writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion); + writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion, + args.mCodePointTableMode); } if (null != args.mOutputXml) { writeXmlDictionary(args.mOutputXml, dict); @@ -351,19 +368,21 @@ public class DictionaryMaker { * @param outputFilename the name of the file to write to. * @param dict the dictionary to write. * @param version the binary format version to use. + * @param codePointTableMode the value to decide how we treat the code point table. * @throws FileNotFoundException if the output file can't be created. * @throws IOException if the output file can't be written to. */ private static void writeBinaryDictionary(final String outputFilename, - final FusionDictionary dict, final int version) + final FusionDictionary dict, final int version, final int codePointTableMode) throws FileNotFoundException, IOException, UnsupportedFormatException { final File outputFile = new File(outputFilename); final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version); final DictEncoder dictEncoder; if (version == FormatSpec.VERSION4) { + // VERSION4 doesn't use the code point table. dictEncoder = new Ver4DictEncoder(outputFile); } else { - dictEncoder = new Ver2DictEncoder(outputFile); + dictEncoder = new Ver2DictEncoder(outputFile, codePointTableMode); } dictEncoder.writeDictionary(dict, formatOptions); } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 94d1ae8bb..c6818ce0c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command { hasDifferences = true; } hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, - "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); + "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams()); hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, "Shortcut", word0Property.mShortcutTargets, word1PtNode.getShortcutTargets()); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 9b2567fd3..2850e1ff6 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -45,8 +45,8 @@ public class Info extends Dicttool.Command { int whitelistCount = 0; for (final WordProperty wordProperty : dict) { ++wordCount; - if (null != wordProperty.mBigrams) { - bigramCount += wordProperty.mBigrams.size(); + if (wordProperty.mHasNgrams) { + bigramCount += wordProperty.mNgrams.size(); } if (null != wordProperty.mShortcutTargets) { shortcutCount += wordProperty.mShortcutTargets.size(); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index bdec44761..cd3ce70eb 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -353,7 +353,7 @@ public class XmlDictInputOutput { + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">"); - if (null != wordProperty.mShortcutTargets) { + if (wordProperty.mHasShortcuts) { destination.write("\n"); for (WeightedString target : wordProperty.mShortcutTargets) { destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" @@ -362,9 +362,9 @@ public class XmlDictInputOutput { } destination.write(" "); } - if (null != wordProperty.mBigrams) { + if (wordProperty.mHasNgrams) { destination.write("\n"); - for (WeightedString bigram : wordProperty.mBigrams) { + for (WeightedString bigram : wordProperty.getBigrams()) { destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" + bigram.getProbability() + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n"); |