aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java59
-rw-r--r--tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java6
2 files changed, 43 insertions, 22 deletions
diff --git a/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java
index 1d45fd25f..d1d2a9ca4 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java
@@ -16,6 +16,8 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.IOException;
@@ -46,10 +48,13 @@ public class XmlDictInputOutput {
private static final String SHORTCUT_TAG = "shortcut";
private static final String FREQUENCY_ATTR = "f";
private static final String WORD_ATTR = "word";
- private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly";
private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
+ private static final String OPTIONS_KEY = "options";
+ private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
+ private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
+
/**
* SAX handler for a unigram XML file.
*/
@@ -62,7 +67,7 @@ public class XmlDictInputOutput {
private static final int END = 5;
private static final int UNKNOWN = 6;
- final FusionDictionary mDictionary;
+ FusionDictionary mDictionary;
int mState; // the state of the parser
int mFreq; // the currently read freq
String mWord; // the current word
@@ -72,13 +77,12 @@ public class XmlDictInputOutput {
/**
* Create the handler.
*
- * @param dict the dictionary to construct.
+ * @param shortcuts the shortcuts as a map. This may be empty, but may not be null.
* @param bigrams the bigrams as a map. This may be empty, but may not be null.
*/
- public UnigramHandler(final FusionDictionary dict,
- final HashMap<String, ArrayList<WeightedString>> shortcuts,
+ public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts,
final HashMap<String, ArrayList<WeightedString>> bigrams) {
- mDictionary = dict;
+ mDictionary = null;
mShortcutsMap = shortcuts;
mBigramsMap = bigrams;
mWord = "";
@@ -86,6 +90,17 @@ public class XmlDictInputOutput {
mFreq = 0;
}
+ public FusionDictionary getFinalDictionary() {
+ final FusionDictionary dict = mDictionary;
+ mDictionary = null;
+ mShortcutsMap.clear();
+ mBigramsMap.clear();
+ mWord = "";
+ mState = START;
+ mFreq = 0;
+ return dict;
+ }
+
@Override
public void startElement(String uri, String localName, String qName, Attributes attrs) {
if (WORD_TAG.equals(localName)) {
@@ -98,10 +113,18 @@ public class XmlDictInputOutput {
}
}
} else if (ROOT_TAG.equals(localName)) {
+ final HashMap<String, String> attributes = new HashMap<String, String>();
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex);
- mDictionary.mOptions.mAttributes.put(attrName, attrs.getValue(attrIndex));
+ attributes.put(attrName, attrs.getValue(attrIndex));
}
+ final String optionsString = attributes.get(OPTIONS_KEY);
+ final boolean processUmlauts =
+ GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
+ final boolean processLigatures =
+ FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
+ mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes,
+ processUmlauts, processLigatures));
} else {
mState = UNKNOWN;
}
@@ -236,21 +259,11 @@ public class XmlDictInputOutput {
final ShortcutHandler shortcutHandler = new ShortcutHandler();
if (null != shortcuts) parser.parse(shortcuts, shortcutHandler);
- final FusionDictionary dict = new FusionDictionary();
final UnigramHandler unigramHandler =
- new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
+ new UnigramHandler(shortcutHandler.getShortcutMap(),
bigramHandler.getBigramMap());
parser.parse(unigrams, unigramHandler);
-
- final HashMap<String, ArrayList<WeightedString>> shortcutMap =
- shortcutHandler.getShortcutMap();
- for (final String shortcut : shortcutMap.keySet()) {
- if (dict.hasWord(shortcut)) continue;
- // TODO: list a frequency in the shortcut file and use it here, instead of
- // a constant freq
- dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
- }
- return dict;
+ return unigramHandler.getFinalDictionary();
}
/**
@@ -283,6 +296,11 @@ public class XmlDictInputOutput {
// TODO: use an XMLSerializer if this gets big
destination.write("<wordlist format=\"2\"");
final HashMap<String, String> options = dict.mOptions.mAttributes;
+ if (dict.mOptions.mGermanUmlautProcessing) {
+ destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\"");
+ } else if (dict.mOptions.mFrenchLigatureProcessing) {
+ destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\"");
+ }
for (final String key : dict.mOptions.mAttributes.keySet()) {
final String value = dict.mOptions.mAttributes.get(key);
destination.write(" " + key + "=\"" + value + "\"");
@@ -291,8 +309,7 @@ public class XmlDictInputOutput {
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
for (Word word : set) {
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
- + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\" " + SHORTCUT_ONLY_ATTR
- + "=\"" + word.mIsShortcutOnly + "\">");
+ + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
if (null != word.mShortcutTargets) {
destination.write("\n");
for (WeightedString target : word.mShortcutTargets) {
diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
index 9682c9b4e..191eb804d 100644
--- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
+++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
@@ -16,9 +16,11 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import java.util.ArrayList;
+import java.util.HashMap;
import junit.framework.TestCase;
@@ -38,7 +40,9 @@ public class BinaryDictInputOutputTest extends TestCase {
// Test the flattened array contains the expected number of nodes, and
// that it does not contain any duplicates.
public void testFlattenNodes() {
- final FusionDictionary dict = new FusionDictionary();
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new DictionaryOptions(new HashMap<String, String>(),
+ false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", 1, null, null);
dict.add("fta", 1, null, null);
dict.add("ftb", 1, null, null);