aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-08-10 11:26:40 +0900
committerJean Chalard <jchalard@google.com>2012-08-10 11:26:40 +0900
commit31b90a368fa0c9bf22e65384d8a8f5e23d92bb29 (patch)
tree11af5367e3bc54fb2e15ce2fbaa57a99258f3dfa
parent5bb420627520412a621121f5c21da123f6a4cb1c (diff)
downloadlatinime-31b90a368fa0c9bf22e65384d8a8f5e23d92bb29.tar.gz
latinime-31b90a368fa0c9bf22e65384d8a8f5e23d92bb29.tar.xz
latinime-31b90a368fa0c9bf22e65384d8a8f5e23d92bb29.zip
Support a syntax to add whitelist entries in shortcuts
Bug: 6906525 Change-Id: Ie33586c07973bf76e38390766175ba424d72d655
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java4
-rw-r--r--tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java45
2 files changed, 36 insertions, 13 deletions
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 9ebd3bbdd..25e1740cb 100644
--- a/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -112,7 +112,7 @@ public class DictionaryMaker {
public static String getHelp() {
return "Usage: makedict "
- + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
+ + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] "
+ "| -s <binary input>] [-d <binary output format version 2>] "
+ "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
+ "\n"
@@ -246,7 +246,7 @@ public class DictionaryMaker {
* Read a dictionary from a unigram XML file, and optionally a bigram XML file.
*
* @param unigramXmlFilename the name of the unigram XML file. May not be null.
- * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none.
+ * @param shortcutXmlFilename the name of the shortcut/whitelist XML file, or null if none.
* @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
* @return the read dictionary.
* @throws FileNotFoundException if one of the files can't be found
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index 8e2e73505..9ab56d797 100644
--- a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -179,7 +179,7 @@ public class XmlDictInputOutput {
mSrc = attrs.getValue(uri, SRC_ATTRIBUTE);
} else if (DST_TAG.equals(localName)) {
String dst = attrs.getValue(uri, DST_ATTRIBUTE);
- int freq = Integer.parseInt(attrs.getValue(uri, DST_FREQ));
+ int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ));
WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
if (null == bigramList) bigramList = new ArrayList<WeightedString>();
@@ -188,6 +188,10 @@ public class XmlDictInputOutput {
}
}
+ protected int getValueFromFreqString(final String freqString) {
+ return Integer.parseInt(freqString);
+ }
+
// This may return an empty map, but will never return null.
public HashMap<String, ArrayList<WeightedString>> getAssocMap() {
return mAssocMap;
@@ -216,22 +220,40 @@ public class XmlDictInputOutput {
}
/**
- * SAX handler for a shortcut XML file.
+ * SAX handler for a shortcut & whitelist XML file.
*/
- static private class ShortcutHandler extends AssociativeListHandler {
+ static private class ShortcutAndWhitelistHandler extends AssociativeListHandler {
private final static String ENTRY_TAG = "entry";
private final static String ENTRY_ATTRIBUTE = "shortcut";
private final static String TARGET_TAG = "target";
private final static String REPLACEMENT_ATTRIBUTE = "replacement";
private final static String TARGET_PRIORITY_ATTRIBUTE = "priority";
+ private final static String WHITELIST_MARKER = "whitelist";
+ private final static int WHITELIST_FREQ_VALUE = 15;
+ private final static int MIN_FREQ = 0;
+ private final static int MAX_FREQ = 14;
- public ShortcutHandler() {
+ public ShortcutAndWhitelistHandler() {
super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE,
TARGET_PRIORITY_ATTRIBUTE);
}
+ @Override
+ protected int getValueFromFreqString(final String freqString) {
+ if (WHITELIST_MARKER.equals(freqString)) {
+ return WHITELIST_FREQ_VALUE;
+ } else {
+ final int intValue = super.getValueFromFreqString(freqString);
+ if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
+ throw new RuntimeException("Shortcut freq out of range. Accepted range is "
+ + MIN_FREQ + ".." + MAX_FREQ);
+ }
+ return intValue;
+ }
+ }
+
// As per getAssocMap(), this never returns null.
- public HashMap<String, ArrayList<WeightedString>> getShortcutMap() {
+ public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() {
return getAssocMap();
}
}
@@ -243,7 +265,7 @@ public class XmlDictInputOutput {
* representation.
*
* @param unigrams the file to read the data from.
- * @param shortcuts the file to read the shortcuts from, or null.
+ * @param shortcuts the file to read the shortcuts & whitelist from, or null.
* @param bigrams the file to read the bigrams from, or null.
* @return the in-memory representation of the dictionary.
*/
@@ -256,11 +278,12 @@ public class XmlDictInputOutput {
final BigramHandler bigramHandler = new BigramHandler();
if (null != bigrams) parser.parse(bigrams, bigramHandler);
- final ShortcutHandler shortcutHandler = new ShortcutHandler();
- if (null != shortcuts) parser.parse(shortcuts, shortcutHandler);
+ final ShortcutAndWhitelistHandler shortcutAndWhitelistHandler =
+ new ShortcutAndWhitelistHandler();
+ if (null != shortcuts) parser.parse(shortcuts, shortcutAndWhitelistHandler);
final UnigramHandler unigramHandler =
- new UnigramHandler(shortcutHandler.getShortcutMap());
+ new UnigramHandler(shortcutAndWhitelistHandler.getShortcutAndWhitelistMap());
parser.parse(unigrams, unigramHandler);
final FusionDictionary dict = unigramHandler.getFinalDictionary();
final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap();
@@ -280,7 +303,7 @@ public class XmlDictInputOutput {
*
* This method reads data from the parser and creates a new FusionDictionary with it.
* The format parsed by this method is the format used before Ice Cream Sandwich,
- * which has no support for bigrams or shortcuts.
+ * which has no support for bigrams or shortcuts/whitelist.
* It is important to note that this method expects the parser to have already eaten
* the first, all-encompassing tag.
*
@@ -291,7 +314,7 @@ public class XmlDictInputOutput {
/**
* Writes a dictionary to an XML file.
*
- * The output format is the "second" format, which supports bigrams and shortcuts.
+ * The output format is the "second" format, which supports bigrams and shortcuts/whitelist.
*
* @param destination a destination stream to write to.
* @param dict the dictionary to write.