diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java | 59 | ||||
-rw-r--r-- | tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java | 6 |
2 files changed, 43 insertions, 22 deletions
diff --git a/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java index 1d45fd25f..d1d2a9ca4 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java @@ -16,6 +16,8 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.IOException; @@ -46,10 +48,13 @@ public class XmlDictInputOutput { private static final String SHORTCUT_TAG = "shortcut"; private static final String FREQUENCY_ATTR = "f"; private static final String WORD_ATTR = "word"; - private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly"; private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1; + private static final String OPTIONS_KEY = "options"; + private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing"; + private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing"; + /** * SAX handler for a unigram XML file. */ @@ -62,7 +67,7 @@ public class XmlDictInputOutput { private static final int END = 5; private static final int UNKNOWN = 6; - final FusionDictionary mDictionary; + FusionDictionary mDictionary; int mState; // the state of the parser int mFreq; // the currently read freq String mWord; // the current word @@ -72,13 +77,12 @@ public class XmlDictInputOutput { /** * Create the handler. * - * @param dict the dictionary to construct. + * @param shortcuts the shortcuts as a map. This may be empty, but may not be null. * @param bigrams the bigrams as a map. This may be empty, but may not be null. */ - public UnigramHandler(final FusionDictionary dict, - final HashMap<String, ArrayList<WeightedString>> shortcuts, + public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts, final HashMap<String, ArrayList<WeightedString>> bigrams) { - mDictionary = dict; + mDictionary = null; mShortcutsMap = shortcuts; mBigramsMap = bigrams; mWord = ""; @@ -86,6 +90,17 @@ public class XmlDictInputOutput { mFreq = 0; } + public FusionDictionary getFinalDictionary() { + final FusionDictionary dict = mDictionary; + mDictionary = null; + mShortcutsMap.clear(); + mBigramsMap.clear(); + mWord = ""; + mState = START; + mFreq = 0; + return dict; + } + @Override public void startElement(String uri, String localName, String qName, Attributes attrs) { if (WORD_TAG.equals(localName)) { @@ -98,10 +113,18 @@ public class XmlDictInputOutput { } } } else if (ROOT_TAG.equals(localName)) { + final HashMap<String, String> attributes = new HashMap<String, String>(); for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { final String attrName = attrs.getLocalName(attrIndex); - mDictionary.mOptions.mAttributes.put(attrName, attrs.getValue(attrIndex)); + attributes.put(attrName, attrs.getValue(attrIndex)); } + final String optionsString = attributes.get(OPTIONS_KEY); + final boolean processUmlauts = + GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString); + final boolean processLigatures = + FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString); + mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes, + processUmlauts, processLigatures)); } else { mState = UNKNOWN; } @@ -236,21 +259,11 @@ public class XmlDictInputOutput { final ShortcutHandler shortcutHandler = new ShortcutHandler(); if (null != shortcuts) parser.parse(shortcuts, shortcutHandler); - final FusionDictionary dict = new FusionDictionary(); final UnigramHandler unigramHandler = - new UnigramHandler(dict, shortcutHandler.getShortcutMap(), + new UnigramHandler(shortcutHandler.getShortcutMap(), bigramHandler.getBigramMap()); parser.parse(unigrams, unigramHandler); - - final HashMap<String, ArrayList<WeightedString>> shortcutMap = - shortcutHandler.getShortcutMap(); - for (final String shortcut : shortcutMap.keySet()) { - if (dict.hasWord(shortcut)) continue; - // TODO: list a frequency in the shortcut file and use it here, instead of - // a constant freq - dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut)); - } - return dict; + return unigramHandler.getFinalDictionary(); } /** @@ -283,6 +296,11 @@ public class XmlDictInputOutput { // TODO: use an XMLSerializer if this gets big destination.write("<wordlist format=\"2\""); final HashMap<String, String> options = dict.mOptions.mAttributes; + if (dict.mOptions.mGermanUmlautProcessing) { + destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\""); + } else if (dict.mOptions.mFrenchLigatureProcessing) { + destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\""); + } for (final String key : dict.mOptions.mAttributes.keySet()) { final String value = dict.mOptions.mAttributes.get(key); destination.write(" " + key + "=\"" + value + "\""); @@ -291,8 +309,7 @@ public class XmlDictInputOutput { destination.write("<!-- Warning: there is no code to read this format yet. -->\n"); for (Word word : set) { destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" " - + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\" " + SHORTCUT_ONLY_ATTR - + "=\"" + word.mIsShortcutOnly + "\">"); + + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">"); if (null != word.mShortcutTargets) { destination.write("\n"); for (WeightedString target : word.mShortcutTargets) { diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java index 9682c9b4e..191eb804d 100644 --- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java +++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java @@ -16,9 +16,11 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import java.util.ArrayList; +import java.util.HashMap; import junit.framework.TestCase; @@ -38,7 +40,9 @@ public class BinaryDictInputOutputTest extends TestCase { // Test the flattened array contains the expected number of nodes, and // that it does not contain any duplicates. public void testFlattenNodes() { - final FusionDictionary dict = new FusionDictionary(); + final FusionDictionary dict = new FusionDictionary(new Node(), + new DictionaryOptions(new HashMap<String, String>(), + false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); dict.add("foo", 1, null, null); dict.add("fta", 1, null, null); dict.add("ftb", 1, null, null); |