aboutsummaryrefslogtreecommitdiffstats
path: root/tools/dicttool/src
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dicttool/src')
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java2
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java135
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java13
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java75
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java27
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java5
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java62
7 files changed, 147 insertions, 172 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index e571bc21d..d1df81b52 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -198,7 +198,7 @@ public final class BinaryDictOffdeviceUtils {
System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
}
- return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
+ return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
}
}
} catch (IOException e) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index 4b6716936..b6795ea6d 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -21,7 +21,9 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.ProbabilityInfo;
+import com.android.inputmethod.latin.makedict.WordProperty;
+import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.io.BufferedReader;
import java.io.File;
@@ -41,18 +43,10 @@ import java.util.TreeSet;
* All functions in this class are static.
*/
public class CombinedInputOutput {
-
- private static final String DICTIONARY_TAG = "dictionary";
- private static final String BIGRAM_TAG = "bigram";
- private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_TAG = "f";
- private static final String WORD_TAG = "word";
- private static final String NOT_A_WORD_TAG = "not_a_word";
private static final String WHITELIST_TAG = "whitelist";
private static final String OPTIONS_TAG = "options";
- private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
- private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
private static final String COMMENT_LINE_STARTER = "#";
+ private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;
/**
* Basic test to find out whether the file is in the combined format or not.
@@ -70,7 +64,8 @@ public class CombinedInputOutput {
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
firstLine = reader.readLine();
}
- return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
+ return firstLine.matches(
+ "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
@@ -112,28 +107,25 @@ public class CombinedInputOutput {
attributes.put(keyValue[0], keyValue[1]);
}
- final boolean processUmlauts =
- GERMAN_UMLAUT_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
- final boolean processLigatures =
- FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
attributes.remove(OPTIONS_TAG);
- final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
- attributes, processUmlauts, processLigatures));
+ final FusionDictionary dict =
+ new FusionDictionary(new PtNodeArray(), new DictionaryOptions(attributes));
String line;
String word = null;
- int freq = 0;
+ ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
boolean isNotAWord = false;
ArrayList<WeightedString> bigrams = new ArrayList<WeightedString>();
ArrayList<WeightedString> shortcuts = new ArrayList<WeightedString>();
while (null != (line = reader.readLine())) {
if (line.startsWith(COMMENT_LINE_STARTER)) continue;
final String args[] = line.trim().split(",");
- if (args[0].matches(WORD_TAG + "=.*")) {
+ if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
- dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
+ dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
+ isNotAWord);
for (WeightedString s : bigrams) {
- dict.setBigram(word, s.mWord, s.mFrequency);
+ dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
}
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>();
@@ -142,23 +134,35 @@ public class CombinedInputOutput {
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (WORD_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
word = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
- freq = Integer.parseInt(params[1]);
- } else if (NOT_A_WORD_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
+ probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+ probabilityInfo.mTimestamp, probabilityInfo.mLevel,
+ probabilityInfo.mCount);
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
+ Integer.parseInt(historicalInfoParams[0]),
+ Integer.parseInt(historicalInfoParams[1]),
+ Integer.parseInt(historicalInfoParams[2]));
+ } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
isNotAWord = "true".equals(params[1]);
}
}
- } else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
String shortcut = null;
int shortcutFreq = 0;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (SHORTCUT_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
shortcut = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
shortcutFreq = WHITELIST_TAG.equals(params[1])
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
: Integer.parseInt(params[1]);
@@ -169,29 +173,42 @@ public class CombinedInputOutput {
} else {
throw new RuntimeException("Wrong format : " + line);
}
- } else if (args[0].matches(BIGRAM_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
String secondWordOfBigram = null;
- int bigramFreq = 0;
+ ProbabilityInfo bigramProbabilityInfo = new ProbabilityInfo(0);
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (BIGRAM_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
secondWordOfBigram = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
- bigramFreq = Integer.parseInt(params[1]);
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
+ bigramProbabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+ bigramProbabilityInfo.mTimestamp, bigramProbabilityInfo.mLevel,
+ bigramProbabilityInfo.mCount);
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ bigramProbabilityInfo = new ProbabilityInfo(
+ bigramProbabilityInfo.mProbability,
+ Integer.parseInt(historicalInfoParams[0]),
+ Integer.parseInt(historicalInfoParams[1]),
+ Integer.parseInt(historicalInfoParams[2]));
}
}
if (null != secondWordOfBigram) {
- bigrams.add(new WeightedString(secondWordOfBigram, bigramFreq));
+ bigrams.add(new WeightedString(secondWordOfBigram, bigramProbabilityInfo));
} else {
throw new RuntimeException("Wrong format : " + line);
}
}
}
if (null != word) {
- dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
+ dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) {
- dict.setBigram(word, s.mWord, s.mFrequency);
+ dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
}
@@ -204,44 +221,16 @@ public class CombinedInputOutput {
* @param destination a destination stream to write to.
* @param dict the dictionary to write.
*/
- public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
- throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word); // This for ordering by frequency, then by asciibetic order
- }
- final HashMap<String, String> options = dict.mOptions.mAttributes;
- destination.write(DICTIONARY_TAG + "=");
- if (options.containsKey(DICTIONARY_TAG)) {
- destination.write(options.get(DICTIONARY_TAG));
- options.remove(DICTIONARY_TAG);
- }
- if (dict.mOptions.mGermanUmlautProcessing) {
- destination.write("," + OPTIONS_TAG + "=" + GERMAN_UMLAUT_PROCESSING_OPTION);
- } else if (dict.mOptions.mFrenchLigatureProcessing) {
- destination.write("," + OPTIONS_TAG + "=" + FRENCH_LIGATURE_PROCESSING_OPTION);
- }
- for (final String key : dict.mOptions.mAttributes.keySet()) {
- final String value = dict.mOptions.mAttributes.get(key);
- destination.write("," + key + "=" + value);
+ public static void writeDictionaryCombined(
+ final Writer destination, final FusionDictionary dict) throws IOException {
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
+ for (final WordProperty wordProperty : dict) {
+ // This for ordering by frequency, then by asciibetic order
+ wordPropertiesInDict.add(wordProperty);
}
- destination.write("\n");
- for (Word word : set) {
- destination.write(" " + WORD_TAG + "=" + word.mWord + ","
- + FREQUENCY_TAG + "=" + word.mFrequency
- + (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
- if (null != word.mShortcutTargets) {
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
- + FREQUENCY_TAG + "=" + target.mFrequency + "\n");
- }
- }
- if (null != word.mBigrams) {
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
- + FREQUENCY_TAG + "=" + bigram.mFrequency + "\n");
- }
- }
+ destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
+ for (final WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
}
destination.close();
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 5c7e8b4f2..80d71fc64 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -23,7 +23,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.MakedictLog;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
-import com.android.inputmethod.latin.makedict.Ver3DictEncoder;
+import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
import com.android.inputmethod.latin.makedict.Ver4DictEncoder;
import java.io.BufferedWriter;
@@ -46,7 +46,6 @@ public class DictionaryMaker {
static class Arguments {
private static final String OPTION_VERSION_2 = "-2";
- private static final String OPTION_VERSION_3 = "-3";
private static final String OPTION_VERSION_4 = "-4";
private static final String OPTION_INPUT_SOURCE = "-s";
private static final String OPTION_INPUT_BIGRAM_XML = "-b";
@@ -158,10 +157,8 @@ public class DictionaryMaker {
if (arg.charAt(0) == '-') {
if (OPTION_VERSION_2.equals(arg)) {
// Do nothing, this is the default
- } else if (OPTION_VERSION_3.equals(arg)) {
- outputBinaryFormatVersion = 3;
} else if (OPTION_VERSION_4.equals(arg)) {
- outputBinaryFormatVersion = 4;
+ outputBinaryFormatVersion = FormatSpec.VERSION4;
} else if (OPTION_HELP.equals(arg)) {
displayHelp();
} else {
@@ -268,7 +265,7 @@ public class DictionaryMaker {
throws FileNotFoundException, IOException, UnsupportedFormatException {
final File file = new File(binaryFilename);
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
- return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
+ return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
}
/**
@@ -358,10 +355,10 @@ public class DictionaryMaker {
final File outputFile = new File(outputFilename);
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
final DictEncoder dictEncoder;
- if (version == 4) {
+ if (version == FormatSpec.VERSION4) {
dictEncoder = new Ver4DictEncoder(outputFile);
} else {
- dictEncoder = new Ver3DictEncoder(outputFile);
+ dictEncoder = new Ver2DictEncoder(outputFile);
}
dictEncoder.writeDictionary(dict, formatOptions);
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index 66fd084cd..ce9b9f306 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -85,18 +85,6 @@ public class Diff extends Dicttool.Command {
private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
- if (dict0.mOptions.mFrenchLigatureProcessing != dict1.mOptions.mFrenchLigatureProcessing) {
- System.out.println(" French ligature processing : "
- + dict0.mOptions.mFrenchLigatureProcessing + " <=> "
- + dict1.mOptions.mFrenchLigatureProcessing);
- hasDifferences = true;
- }
- else if (dict0.mOptions.mGermanUmlautProcessing != dict1.mOptions.mGermanUmlautProcessing) {
- System.out.println(" German umlaut processing : "
- + dict0.mOptions.mGermanUmlautProcessing + " <=> "
- + dict1.mOptions.mGermanUmlautProcessing);
- hasDifferences = true;
- }
final HashMap<String, String> options1 =
new HashMap<String, String>(dict1.mOptions.mAttributes);
for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
@@ -120,42 +108,47 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
- for (final Word word0 : dict0) {
- final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
- word0.mWord);
- if (null == word1) {
+ for (final WordProperty word0Property : dict0) {
+ final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
+ word0Property.mWord);
+ if (null == word1PtNode) {
// This word is not in dict1
- System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
+ System.out.println("Deleted: " + word0Property.mWord + " "
+ + word0Property.getProbability());
hasDifferences = true;
} else {
// We found the word. Compare frequencies, shortcuts, bigrams
- if (word0.mFrequency != word1.getFrequency()) {
- System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
- + " -> " + word1.getFrequency());
+ if (word0Property.getProbability() != word1PtNode.getProbability()) {
+ System.out.println("Probability changed: " + word0Property.mWord + " "
+ + word0Property.getProbability() + " -> "
+ + word1PtNode.getProbability());
hasDifferences = true;
}
- if (word0.mIsNotAWord != word1.getIsNotAWord()) {
- System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
- + " -> " + word1.getIsNotAWord());
+ if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
+ System.out.println("Not a word: " + word0Property.mWord + " "
+ + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
hasDifferences = true;
}
- if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
- System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
- + " -> " + word1.getIsBlacklistEntry());
+ if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
+ System.out.println("Blacklist: " + word0Property.mWord + " "
+ + word0Property.mIsBlacklistEntry + " -> "
+ + word1PtNode.getIsBlacklistEntry());
hasDifferences = true;
}
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Bigram", word0.mBigrams, word1.getBigrams());
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Shortcut", word0.mShortcutTargets, word1.getShortcutTargets());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Shortcut", word0Property.mShortcutTargets,
+ word1PtNode.getShortcutTargets());
}
}
- for (final Word word1 : dict1) {
- final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
- word1.mWord);
- if (null == word0) {
+ for (final WordProperty word1Property : dict1) {
+ final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
+ word1Property.mWord);
+ if (null == word0PtNode) {
// This word is not in dict0
- System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
+ System.out.println("Added: " + word1Property.mWord + " "
+ + word1Property.getProbability());
hasDifferences = true;
}
}
@@ -171,7 +164,7 @@ public class Diff extends Dicttool.Command {
if (null == list0) return false;
for (final WeightedString attribute0 : list0) {
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
- + attribute0.mFrequency);
+ + attribute0.getProbability());
}
return true;
}
@@ -187,8 +180,8 @@ public class Diff extends Dicttool.Command {
for (final WeightedString attribute1 : list1) {
if (attribute0.mWord.equals(attribute1.mWord)) {
System.out.println(type + " freq changed: " + word + " "
- + attribute0.mWord + " " + attribute0.mFrequency + " -> "
- + attribute1.mFrequency);
+ + attribute0.mWord + " " + attribute0.getProbability() + " -> "
+ + attribute1.getProbability());
list1.remove(attribute1);
foundString = true;
break;
@@ -197,7 +190,7 @@ public class Diff extends Dicttool.Command {
if (!foundString) {
// We come here if we haven't found any matching string.
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
- + attribute0.mFrequency);
+ + attribute0.getProbability());
}
} else {
list1.remove(attribute0);
@@ -209,7 +202,7 @@ public class Diff extends Dicttool.Command {
for (final WeightedString attribute1 : list1) {
hasDifferences = true;
System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
- + attribute1.mFrequency);
+ + attribute1.getProbability());
}
return hasDifferences;
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 350f42772..178df5cec 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -43,15 +43,16 @@ public class Info extends Dicttool.Command {
int bigramCount = 0;
int shortcutCount = 0;
int whitelistCount = 0;
- for (final Word w : dict) {
+ for (final WordProperty wordProperty : dict) {
++wordCount;
- if (null != w.mBigrams) {
- bigramCount += w.mBigrams.size();
+ if (null != wordProperty.mBigrams) {
+ bigramCount += wordProperty.mBigrams.size();
}
- if (null != w.mShortcutTargets) {
- shortcutCount += w.mShortcutTargets.size();
- for (WeightedString shortcutTarget : w.mShortcutTargets) {
- if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency) {
+ if (null != wordProperty.mShortcutTargets) {
+ shortcutCount += wordProperty.mShortcutTargets.size();
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
+ == shortcutTarget.getProbability()) {
++whitelistCount;
}
}
@@ -71,7 +72,7 @@ public class Info extends Dicttool.Command {
return;
}
System.out.println("Word: " + word);
- System.out.println(" Freq: " + ptNode.getFrequency());
+ System.out.println(" Freq: " + ptNode.getProbability());
if (ptNode.getIsNotAWord()) {
System.out.println(" Is not a word");
}
@@ -84,8 +85,9 @@ public class Info extends Dicttool.Command {
} else {
for (final WeightedString shortcutTarget : shortcutTargets) {
System.out.println(" Shortcut target: " + shortcutTarget.mWord + " ("
- + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency
- ? "whitelist" : shortcutTarget.mFrequency) + ")");
+ + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
+ == shortcutTarget.getProbability() ?
+ "whitelist" : shortcutTarget.getProbability()) + ")");
}
}
final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
@@ -93,7 +95,8 @@ public class Info extends Dicttool.Command {
System.out.println(" No bigrams");
} else {
for (final WeightedString bigram : bigrams) {
- System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")");
+ System.out.println(
+ " Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")");
}
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
index 9174238da..48817b1b1 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java
@@ -18,7 +18,6 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
-import com.android.inputmethod.latin.makedict.BinaryDictIOUtilsTests;
import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
import java.lang.reflect.Constructor;
@@ -31,15 +30,15 @@ import java.util.ArrayList;
*/
public class Test extends Dicttool.Command {
public static final String COMMAND = "test";
+ private static final int DEFAULT_MAX_UNIGRAMS = 1500;
private long mSeed = System.currentTimeMillis();
- private int mMaxUnigrams = BinaryDictIOUtilsTests.DEFAULT_MAX_UNIGRAMS;
+ private int mMaxUnigrams = DEFAULT_MAX_UNIGRAMS;
private static final Class<?>[] sClassesToTest = {
BinaryDictOffdeviceUtilsTests.class,
FusionDictionaryTest.class,
BinaryDictDecoderEncoderTests.class,
BinaryDictEncoderFlattenTreeTests.class,
- BinaryDictIOUtilsTests.class
};
private ArrayList<Method> mAllTestMethods = new ArrayList<Method>();
private ArrayList<String> mUsedTestMethods = new ArrayList<String>();
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index 4e99bf979..2ac842a80 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -20,7 +20,8 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.ProbabilityInfo;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.io.BufferedReader;
import java.io.File;
@@ -52,13 +53,11 @@ public class XmlDictInputOutput {
private static final String WORD_TAG = "w";
private static final String BIGRAM_TAG = "bigram";
private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_ATTR = "f";
+ private static final String PROBABILITY_ATTR = "f";
private static final String WORD_ATTR = "word";
private static final String NOT_A_WORD_ATTR = "not_a_word";
private static final String OPTIONS_KEY = "options";
- private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
- private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
/**
* SAX handler for a unigram XML file.
@@ -68,6 +67,7 @@ public class XmlDictInputOutput {
private static final int START = 1;
private static final int WORD = 2;
private static final int UNKNOWN = 3;
+ private static final int SHORTCUT_ONLY_WORD_PROBABILITY = 1;
FusionDictionary mDictionary;
int mState; // the state of the parser
@@ -92,7 +92,8 @@ public class XmlDictInputOutput {
final FusionDictionary dict = mDictionary;
for (final String shortcutOnly : mShortcutsMap.keySet()) {
if (dict.hasWord(shortcutOnly)) continue;
- dict.add(shortcutOnly, 1, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
+ dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY),
+ mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
}
mDictionary = null;
mShortcutsMap.clear();
@@ -109,7 +110,7 @@ public class XmlDictInputOutput {
mWord = "";
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex);
- if (FREQUENCY_ATTR.equals(attrName)) {
+ if (PROBABILITY_ATTR.equals(attrName)) {
mFreq = Integer.parseInt(attrs.getValue(attrIndex));
}
}
@@ -120,12 +121,8 @@ public class XmlDictInputOutput {
attributes.put(attrName, attrs.getValue(attrIndex));
}
final String optionsString = attributes.get(OPTIONS_KEY);
- final boolean processUmlauts =
- GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
- final boolean processLigatures =
- FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
mDictionary = new FusionDictionary(new PtNodeArray(),
- new DictionaryOptions(attributes, processUmlauts, processLigatures));
+ new DictionaryOptions(attributes));
} else {
mState = UNKNOWN;
}
@@ -144,7 +141,8 @@ public class XmlDictInputOutput {
@Override
public void endElement(String uri, String localName, String qName) {
if (WORD == mState) {
- mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */);
+ mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord),
+ false /* isNotAWord */);
mState = START;
}
}
@@ -325,7 +323,7 @@ public class XmlDictInputOutput {
final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
for (final WeightedString bigram : bigramList) {
if (!dict.hasWord(bigram.mWord)) continue;
- dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency);
+ dict.setBigram(firstWord, bigram.mWord, bigram.mProbabilityInfo);
}
}
return dict;
@@ -354,42 +352,38 @@ public class XmlDictInputOutput {
*/
public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word);
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
+ for (WordProperty wordProperty : dict) {
+ wordPropertiesInDict.add(wordProperty);
}
// TODO: use an XMLSerializer if this gets big
destination.write("<wordlist format=\"2\"");
- final HashMap<String, String> options = dict.mOptions.mAttributes;
- if (dict.mOptions.mGermanUmlautProcessing) {
- destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\"");
- } else if (dict.mOptions.mFrenchLigatureProcessing) {
- destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\"");
- }
for (final String key : dict.mOptions.mAttributes.keySet()) {
final String value = dict.mOptions.mAttributes.get(key);
destination.write(" " + key + "=\"" + value + "\"");
}
destination.write(">\n");
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
- for (Word word : set) {
- destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
- + FREQUENCY_ATTR + "=\"" + word.mFrequency
- + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
- if (null != word.mShortcutTargets) {
+ for (WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord
+ + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ + "\">");
+ if (null != wordProperty.mShortcutTargets) {
destination.write("\n");
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
- + target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ for (WeightedString target : wordProperty.mShortcutTargets) {
+ destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
+ + target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ ">\n");
}
destination.write(" ");
}
- if (null != word.mBigrams) {
+ if (null != wordProperty.mBigrams) {
destination.write("\n");
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
- + bigram.mFrequency + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n");
+ for (WeightedString bigram : wordProperty.mBigrams) {
+ destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ + bigram.getProbability() + "\">" + bigram.mWord
+ + "</" + BIGRAM_TAG + ">\n");
}
destination.write(" ");
}