diff options
Diffstat (limited to 'tools')
6 files changed, 95 insertions, 103 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index adfb920dd..b83ce57e3 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -43,13 +43,13 @@ USED_TARGETTED_UTILS := \ $(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \ + $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/LocaleUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ResizableIntArray.java \ - $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java \ - $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/WordProperty.java + $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \ $(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmethod/latin/makedict/ diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index b9840607a..8d2f5fbbf 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -21,7 +21,8 @@ import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WordProperty; +import com.android.inputmethod.latin.utils.CombinedFormatUtils; import java.io.BufferedReader; import java.io.File; @@ -41,16 +42,10 @@ import java.util.TreeSet; * All functions in this class are static. */ public class CombinedInputOutput { - - private static final String DICTIONARY_TAG = "dictionary"; - private static final String BIGRAM_TAG = "bigram"; - private static final String SHORTCUT_TAG = "shortcut"; - private static final String FREQUENCY_TAG = "f"; - private static final String WORD_TAG = "word"; - private static final String NOT_A_WORD_TAG = "not_a_word"; private static final String WHITELIST_TAG = "whitelist"; private static final String OPTIONS_TAG = "options"; private static final String COMMENT_LINE_STARTER = "#"; + private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3; /** * Basic test to find out whether the file is in the combined format or not. @@ -68,7 +63,8 @@ public class CombinedInputOutput { while (firstLine.startsWith(COMMENT_LINE_STARTER)) { firstLine = reader.readLine(); } - return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); + return firstLine.matches( + "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*"); } catch (FileNotFoundException e) { return false; } catch (IOException e) { @@ -123,7 +119,7 @@ public class CombinedInputOutput { while (null != (line = reader.readLine())) { if (line.startsWith(COMMENT_LINE_STARTER)) continue; final String args[] = line.trim().split(","); - if (args[0].matches(WORD_TAG + "=.*")) { + if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (null != word) { dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); for (WeightedString s : bigrams) { @@ -136,23 +132,30 @@ public class CombinedInputOutput { for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (WORD_TAG.equals(params[0])) { + if (CombinedFormatUtils.WORD_TAG.equals(params[0])) { word = params[1]; - } else if (FREQUENCY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { freq = Integer.parseInt(params[1]); - } else if (NOT_A_WORD_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { + final String[] historicalInfoParams = + params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + line); + } + // TODO: Use parsed historical info. + } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) { isNotAWord = "true".equals(params[1]); } } - } else if (args[0].matches(SHORTCUT_TAG + "=.*")) { + } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) { String shortcut = null; int shortcutFreq = 0; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (SHORTCUT_TAG.equals(params[0])) { + if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) { shortcut = params[1]; - } else if (FREQUENCY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { shortcutFreq = WHITELIST_TAG.equals(params[1]) ? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY : Integer.parseInt(params[1]); @@ -163,16 +166,23 @@ public class CombinedInputOutput { } else { throw new RuntimeException("Wrong format : " + line); } - } else if (args[0].matches(BIGRAM_TAG + "=.*")) { + } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) { String secondWordOfBigram = null; int bigramFreq = 0; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (BIGRAM_TAG.equals(params[0])) { + if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) { secondWordOfBigram = params[1]; - } else if (FREQUENCY_TAG.equals(params[0])) { + } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { bigramFreq = Integer.parseInt(params[1]); + } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { + final String[] historicalInfoParams = + params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + line); + } + // TODO: Use parsed historical info. } } if (null != secondWordOfBigram) { @@ -198,39 +208,16 @@ public class CombinedInputOutput { * @param destination a destination stream to write to. * @param dict the dictionary to write. */ - public static void writeDictionaryCombined(Writer destination, FusionDictionary dict) - throws IOException { - final TreeSet<Word> set = new TreeSet<Word>(); - for (Word word : dict) { - set.add(word); // This for ordering by frequency, then by asciibetic order - } - final HashMap<String, String> options = dict.mOptions.mAttributes; - destination.write(DICTIONARY_TAG + "="); - if (options.containsKey(DICTIONARY_TAG)) { - destination.write(options.get(DICTIONARY_TAG)); - options.remove(DICTIONARY_TAG); - } - for (final String key : dict.mOptions.mAttributes.keySet()) { - final String value = dict.mOptions.mAttributes.get(key); - destination.write("," + key + "=" + value); + public static void writeDictionaryCombined( + final Writer destination, final FusionDictionary dict) throws IOException { + final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>(); + for (final WordProperty wordProperty : dict) { + // This for ordering by frequency, then by asciibetic order + wordPropertiesInDict.add(wordProperty); } - destination.write("\n"); - for (Word word : set) { - destination.write(" " + WORD_TAG + "=" + word.mWord + "," - + FREQUENCY_TAG + "=" + word.mFrequency - + (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n")); - if (null != word.mShortcutTargets) { - for (WeightedString target : word.mShortcutTargets) { - destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + "," - + FREQUENCY_TAG + "=" + target.getProbability() + "\n"); - } - } - if (null != word.mBigrams) { - for (WeightedString bigram : word.mBigrams) { - destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + "," - + FREQUENCY_TAG + "=" + bigram.getProbability() + "\n"); - } - } + destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes)); + for (final WordProperty wordProperty : wordPropertiesInDict) { + destination.write(CombinedFormatUtils.formatWordProperty(wordProperty)); } destination.close(); } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index c9f6bd508..9947608ea 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WordProperty; import java.util.Arrays; import java.util.ArrayList; @@ -108,42 +108,46 @@ public class Diff extends Dicttool.Command { private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) { boolean hasDifferences = false; - for (final Word word0 : dict0) { - final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray, - word0.mWord); - if (null == word1) { + for (final WordProperty word0Property : dict0) { + final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray, + word0Property.mWord); + if (null == word1PtNode) { // This word is not in dict1 - System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency); + System.out.println("Deleted: " + word0Property.mWord + " " + + word0Property.getProbability()); hasDifferences = true; } else { // We found the word. Compare frequencies, shortcuts, bigrams - if (word0.mFrequency != word1.getFrequency()) { - System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency - + " -> " + word1.getFrequency()); + if (word0Property.getProbability() != word1PtNode.getFrequency()) { + System.out.println("Probability changed: " + word0Property.mWord + " " + + word0Property.getProbability() + " -> " + word1PtNode.getFrequency()); hasDifferences = true; } - if (word0.mIsNotAWord != word1.getIsNotAWord()) { - System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord - + " -> " + word1.getIsNotAWord()); + if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) { + System.out.println("Not a word: " + word0Property.mWord + " " + + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord()); hasDifferences = true; } - if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) { - System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry - + " -> " + word1.getIsBlacklistEntry()); + if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) { + System.out.println("Blacklist: " + word0Property.mWord + " " + + word0Property.mIsBlacklistEntry + " -> " + + word1PtNode.getIsBlacklistEntry()); hasDifferences = true; } - hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord, - "Bigram", word0.mBigrams, word1.getBigrams()); - hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord, - "Shortcut", word0.mShortcutTargets, word1.getShortcutTargets()); + hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, + "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); + hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, + "Shortcut", word0Property.mShortcutTargets, + word1PtNode.getShortcutTargets()); } } - for (final Word word1 : dict1) { - final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray, - word1.mWord); - if (null == word0) { + for (final WordProperty word1Property : dict1) { + final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray, + word1Property.mWord); + if (null == word0PtNode) { // This word is not in dict0 - System.out.println("Added: " + word1.mWord + " " + word1.mFrequency); + System.out.println("Added: " + word1Property.mWord + " " + + word1Property.getProbability()); hasDifferences = true; } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 8f17fcd94..c1eb0f8e7 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WordProperty; import java.util.Arrays; import java.util.ArrayList; @@ -43,14 +43,14 @@ public class Info extends Dicttool.Command { int bigramCount = 0; int shortcutCount = 0; int whitelistCount = 0; - for (final Word w : dict) { + for (final WordProperty wordProperty : dict) { ++wordCount; - if (null != w.mBigrams) { - bigramCount += w.mBigrams.size(); + if (null != wordProperty.mBigrams) { + bigramCount += wordProperty.mBigrams.size(); } - if (null != w.mShortcutTargets) { - shortcutCount += w.mShortcutTargets.size(); - for (WeightedString shortcutTarget : w.mShortcutTargets) { + if (null != wordProperty.mShortcutTargets) { + shortcutCount += wordProperty.mShortcutTargets.size(); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.getProbability()) { ++whitelistCount; diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index cdc487b16..c6c60b8e2 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WordProperty; import java.io.BufferedReader; import java.io.File; @@ -52,7 +52,7 @@ public class XmlDictInputOutput { private static final String WORD_TAG = "w"; private static final String BIGRAM_TAG = "bigram"; private static final String SHORTCUT_TAG = "shortcut"; - private static final String FREQUENCY_ATTR = "f"; + private static final String PROBABILITY_ATTR = "f"; private static final String WORD_ATTR = "word"; private static final String NOT_A_WORD_ATTR = "not_a_word"; @@ -107,7 +107,7 @@ public class XmlDictInputOutput { mWord = ""; for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { final String attrName = attrs.getLocalName(attrIndex); - if (FREQUENCY_ATTR.equals(attrName)) { + if (PROBABILITY_ATTR.equals(attrName)) { mFreq = Integer.parseInt(attrs.getValue(attrIndex)); } } @@ -348,9 +348,9 @@ public class XmlDictInputOutput { */ public static void writeDictionaryXml(Writer destination, FusionDictionary dict) throws IOException { - final TreeSet<Word> set = new TreeSet<Word>(); - for (Word word : dict) { - set.add(word); + final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>(); + for (WordProperty wordProperty : dict) { + wordPropertiesInDict.add(wordProperty); } // TODO: use an XMLSerializer if this gets big destination.write("<wordlist format=\"2\""); @@ -361,23 +361,24 @@ public class XmlDictInputOutput { } destination.write(">\n"); destination.write("<!-- Warning: there is no code to read this format yet. -->\n"); - for (Word word : set) { - destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" " - + FREQUENCY_ATTR + "=\"" + word.mFrequency - + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">"); - if (null != word.mShortcutTargets) { + for (WordProperty wordProperty : wordPropertiesInDict) { + destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord + + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() + + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + + "\">"); + if (null != wordProperty.mShortcutTargets) { destination.write("\n"); - for (WeightedString target : word.mShortcutTargets) { - destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\"" + for (WeightedString target : wordProperty.mShortcutTargets) { + destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" + target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG + ">\n"); } destination.write(" "); } - if (null != word.mBigrams) { + if (null != wordProperty.mBigrams) { destination.write("\n"); - for (WeightedString bigram : word.mBigrams) { - destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\"" + for (WeightedString bigram : wordProperty.mBigrams) { + destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" + bigram.getProbability() + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n"); } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java index 76dadc25c..191546433 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java @@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.WordProperty; import junit.framework.TestCase; @@ -87,8 +87,8 @@ public class FusionDictionaryTest extends TestCase { } private void dumpDict(final FusionDictionary dict) { - for (Word w : dict) { - System.out.println("Word " + dumpWord(w.mWord)); + for (WordProperty wordProperty : dict) { + System.out.println("Word " + dumpWord(wordProperty.mWord)); } } |