aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/dicttool/Android.mk4
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java89
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java52
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java14
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java33
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java6
6 files changed, 95 insertions, 103 deletions
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
index adfb920dd..b83ce57e3 100644
--- a/tools/dicttool/Android.mk
+++ b/tools/dicttool/Android.mk
@@ -43,13 +43,13 @@ USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
+ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/LocaleUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ResizableIntArray.java \
- $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java \
- $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/WordProperty.java
+ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \
$(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmethod/latin/makedict/
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index b9840607a..8d2f5fbbf 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -21,7 +21,8 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
+import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.io.BufferedReader;
import java.io.File;
@@ -41,16 +42,10 @@ import java.util.TreeSet;
* All functions in this class are static.
*/
public class CombinedInputOutput {
-
- private static final String DICTIONARY_TAG = "dictionary";
- private static final String BIGRAM_TAG = "bigram";
- private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_TAG = "f";
- private static final String WORD_TAG = "word";
- private static final String NOT_A_WORD_TAG = "not_a_word";
private static final String WHITELIST_TAG = "whitelist";
private static final String OPTIONS_TAG = "options";
private static final String COMMENT_LINE_STARTER = "#";
+ private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;
/**
* Basic test to find out whether the file is in the combined format or not.
@@ -68,7 +63,8 @@ public class CombinedInputOutput {
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
firstLine = reader.readLine();
}
- return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
+ return firstLine.matches(
+ "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
@@ -123,7 +119,7 @@ public class CombinedInputOutput {
while (null != (line = reader.readLine())) {
if (line.startsWith(COMMENT_LINE_STARTER)) continue;
final String args[] = line.trim().split(",");
- if (args[0].matches(WORD_TAG + "=.*")) {
+ if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) {
@@ -136,23 +132,30 @@ public class CombinedInputOutput {
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (WORD_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
word = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
freq = Integer.parseInt(params[1]);
- } else if (NOT_A_WORD_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ // TODO: Use parsed historical info.
+ } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
isNotAWord = "true".equals(params[1]);
}
}
- } else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
String shortcut = null;
int shortcutFreq = 0;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (SHORTCUT_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
shortcut = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
shortcutFreq = WHITELIST_TAG.equals(params[1])
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
: Integer.parseInt(params[1]);
@@ -163,16 +166,23 @@ public class CombinedInputOutput {
} else {
throw new RuntimeException("Wrong format : " + line);
}
- } else if (args[0].matches(BIGRAM_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
String secondWordOfBigram = null;
int bigramFreq = 0;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (BIGRAM_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
secondWordOfBigram = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
bigramFreq = Integer.parseInt(params[1]);
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ // TODO: Use parsed historical info.
}
}
if (null != secondWordOfBigram) {
@@ -198,39 +208,16 @@ public class CombinedInputOutput {
* @param destination a destination stream to write to.
* @param dict the dictionary to write.
*/
- public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
- throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word); // This for ordering by frequency, then by asciibetic order
- }
- final HashMap<String, String> options = dict.mOptions.mAttributes;
- destination.write(DICTIONARY_TAG + "=");
- if (options.containsKey(DICTIONARY_TAG)) {
- destination.write(options.get(DICTIONARY_TAG));
- options.remove(DICTIONARY_TAG);
- }
- for (final String key : dict.mOptions.mAttributes.keySet()) {
- final String value = dict.mOptions.mAttributes.get(key);
- destination.write("," + key + "=" + value);
+ public static void writeDictionaryCombined(
+ final Writer destination, final FusionDictionary dict) throws IOException {
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
+ for (final WordProperty wordProperty : dict) {
+ // This for ordering by frequency, then by asciibetic order
+ wordPropertiesInDict.add(wordProperty);
}
- destination.write("\n");
- for (Word word : set) {
- destination.write(" " + WORD_TAG + "=" + word.mWord + ","
- + FREQUENCY_TAG + "=" + word.mFrequency
- + (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
- if (null != word.mShortcutTargets) {
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
- + FREQUENCY_TAG + "=" + target.getProbability() + "\n");
- }
- }
- if (null != word.mBigrams) {
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
- + FREQUENCY_TAG + "=" + bigram.getProbability() + "\n");
- }
- }
+ destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
+ for (final WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
}
destination.close();
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index c9f6bd508..9947608ea 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -108,42 +108,46 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
- for (final Word word0 : dict0) {
- final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
- word0.mWord);
- if (null == word1) {
+ for (final WordProperty word0Property : dict0) {
+ final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
+ word0Property.mWord);
+ if (null == word1PtNode) {
// This word is not in dict1
- System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
+ System.out.println("Deleted: " + word0Property.mWord + " "
+ + word0Property.getProbability());
hasDifferences = true;
} else {
// We found the word. Compare frequencies, shortcuts, bigrams
- if (word0.mFrequency != word1.getFrequency()) {
- System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
- + " -> " + word1.getFrequency());
+ if (word0Property.getProbability() != word1PtNode.getFrequency()) {
+ System.out.println("Probability changed: " + word0Property.mWord + " "
+ + word0Property.getProbability() + " -> " + word1PtNode.getFrequency());
hasDifferences = true;
}
- if (word0.mIsNotAWord != word1.getIsNotAWord()) {
- System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
- + " -> " + word1.getIsNotAWord());
+ if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
+ System.out.println("Not a word: " + word0Property.mWord + " "
+ + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
hasDifferences = true;
}
- if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
- System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
- + " -> " + word1.getIsBlacklistEntry());
+ if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
+ System.out.println("Blacklist: " + word0Property.mWord + " "
+ + word0Property.mIsBlacklistEntry + " -> "
+ + word1PtNode.getIsBlacklistEntry());
hasDifferences = true;
}
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Bigram", word0.mBigrams, word1.getBigrams());
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Shortcut", word0.mShortcutTargets, word1.getShortcutTargets());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Shortcut", word0Property.mShortcutTargets,
+ word1PtNode.getShortcutTargets());
}
}
- for (final Word word1 : dict1) {
- final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
- word1.mWord);
- if (null == word0) {
+ for (final WordProperty word1Property : dict1) {
+ final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
+ word1Property.mWord);
+ if (null == word0PtNode) {
// This word is not in dict0
- System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
+ System.out.println("Added: " + word1Property.mWord + " "
+ + word1Property.getProbability());
hasDifferences = true;
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 8f17fcd94..c1eb0f8e7 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -43,14 +43,14 @@ public class Info extends Dicttool.Command {
int bigramCount = 0;
int shortcutCount = 0;
int whitelistCount = 0;
- for (final Word w : dict) {
+ for (final WordProperty wordProperty : dict) {
++wordCount;
- if (null != w.mBigrams) {
- bigramCount += w.mBigrams.size();
+ if (null != wordProperty.mBigrams) {
+ bigramCount += wordProperty.mBigrams.size();
}
- if (null != w.mShortcutTargets) {
- shortcutCount += w.mShortcutTargets.size();
- for (WeightedString shortcutTarget : w.mShortcutTargets) {
+ if (null != wordProperty.mShortcutTargets) {
+ shortcutCount += wordProperty.mShortcutTargets.size();
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
== shortcutTarget.getProbability()) {
++whitelistCount;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index cdc487b16..c6c60b8e2 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.io.BufferedReader;
import java.io.File;
@@ -52,7 +52,7 @@ public class XmlDictInputOutput {
private static final String WORD_TAG = "w";
private static final String BIGRAM_TAG = "bigram";
private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_ATTR = "f";
+ private static final String PROBABILITY_ATTR = "f";
private static final String WORD_ATTR = "word";
private static final String NOT_A_WORD_ATTR = "not_a_word";
@@ -107,7 +107,7 @@ public class XmlDictInputOutput {
mWord = "";
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex);
- if (FREQUENCY_ATTR.equals(attrName)) {
+ if (PROBABILITY_ATTR.equals(attrName)) {
mFreq = Integer.parseInt(attrs.getValue(attrIndex));
}
}
@@ -348,9 +348,9 @@ public class XmlDictInputOutput {
*/
public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word);
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
+ for (WordProperty wordProperty : dict) {
+ wordPropertiesInDict.add(wordProperty);
}
// TODO: use an XMLSerializer if this gets big
destination.write("<wordlist format=\"2\"");
@@ -361,23 +361,24 @@ public class XmlDictInputOutput {
}
destination.write(">\n");
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
- for (Word word : set) {
- destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
- + FREQUENCY_ATTR + "=\"" + word.mFrequency
- + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
- if (null != word.mShortcutTargets) {
+ for (WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord
+ + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ + "\">");
+ if (null != wordProperty.mShortcutTargets) {
destination.write("\n");
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
+ for (WeightedString target : wordProperty.mShortcutTargets) {
+ destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
+ target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ ">\n");
}
destination.write(" ");
}
- if (null != word.mBigrams) {
+ if (null != wordProperty.mBigrams) {
destination.write("\n");
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
+ for (WeightedString bigram : wordProperty.mBigrams) {
+ destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ bigram.getProbability() + "\">" + bigram.mWord
+ "</" + BIGRAM_TAG + ">\n");
}
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
index 76dadc25c..191546433 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import junit.framework.TestCase;
@@ -87,8 +87,8 @@ public class FusionDictionaryTest extends TestCase {
}
private void dumpDict(final FusionDictionary dict) {
- for (Word w : dict) {
- System.out.println("Word " + dumpWord(w.mWord));
+ for (WordProperty wordProperty : dict) {
+ System.out.println("Word " + dumpWord(wordProperty.mWord));
}
}