aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-01-16 02:25:13 -0800
committerAndroid Git Automerger <android-git-automerger@android.com>2012-01-16 02:25:13 -0800
commit99eabc62f1a6b08c78c9707e0dbe80e7c58e0008 (patch)
tree58efd875ff43c4413c56482153de62c6279b45e8
parentc0bf277fe345ab74ac531bcda686e057a166f1a2 (diff)
parentc83999972d309a9a57aac0473e72654bb3ca5438 (diff)
downloadlatinime-99eabc62f1a6b08c78c9707e0dbe80e7c58e0008.tar.gz
latinime-99eabc62f1a6b08c78c9707e0dbe80e7c58e0008.tar.xz
latinime-99eabc62f1a6b08c78c9707e0dbe80e7c58e0008.zip
am c8399997: Merge "Actually add shortcut-only entries."
* commit 'c83999972d309a9a57aac0473e72654bb3ca5438': Actually add shortcut-only entries.
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java4
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java72
-rw-r--r--tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java11
3 files changed, 64 insertions, 23 deletions
diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
index 35e8c4818..7aadc677b 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
@@ -606,7 +606,9 @@ public class BinaryDictInputOutput {
}
flags |= FLAG_HAS_BIGRAMS;
}
- // TODO: fill in the FLAG_IS_SHORTCUT_ONLY
+ if (group.mIsShortcutOnly) {
+ flags |= FLAG_IS_SHORTCUT_ONLY;
+ }
return flags;
}
diff --git a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
index 8b94485e4..918b1ca4b 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
@@ -171,6 +171,24 @@ public class FusionDictionary implements Iterable<Word> {
}
/**
+ * Helper method to add all words in a list as 0-frequency entries
+ *
+ * These words are added when shortcuts targets or bigrams are not found in the dictionary
+ * yet. The same words may be added later with an actual frequency - this is handled by
+ * the private version of add().
+ */
+ private void addNeutralWords(final ArrayList<WeightedString> words) {
+ if (null != words) {
+ for (WeightedString word : words) {
+ final CharGroup t = findWordInTree(mRoot, word.mWord);
+ if (null == t) {
+ add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */);
+ }
+ }
+ }
+ }
+
+ /**
* Helper method to add a word as a string.
*
* This method adds a word to the dictionary with the given frequency. Optional
@@ -186,22 +204,12 @@ public class FusionDictionary implements Iterable<Word> {
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams) {
if (null != shortcutTargets) {
- for (WeightedString target : shortcutTargets) {
- final CharGroup t = findWordInTree(mRoot, target.mWord);
- if (null == t) {
- add(getCodePoints(target.mWord), 0, null, null);
- }
- }
+ addNeutralWords(shortcutTargets);
}
if (null != bigrams) {
- for (WeightedString bigram : bigrams) {
- final CharGroup t = findWordInTree(mRoot, bigram.mWord);
- if (null == t) {
- add(getCodePoints(bigram.mWord), 0, null, null);
- }
- }
+ addNeutralWords(bigrams);
}
- add(getCodePoints(word), frequency, shortcutTargets, bigrams);
+ add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */);
}
/**
@@ -223,6 +231,22 @@ public class FusionDictionary implements Iterable<Word> {
}
/**
+ * Helper method to add a shortcut that should not be a dictionary word.
+ *
+ * @param word the word to add.
+ * @param frequency the frequency of the word, in the range [0..255].
+ * @param shortcutTargets a list of shortcut targets. May not be null.
+ */
+ public void addShortcutOnly(final String word, final int frequency,
+ final ArrayList<WeightedString> shortcutTargets) {
+ if (null == shortcutTargets) {
+ throw new RuntimeException("Can't add a shortcut without targets");
+ }
+ addNeutralWords(shortcutTargets);
+ add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */);
+ }
+
+ /**
* Add a word to this dictionary.
*
* The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't,
@@ -232,10 +256,12 @@ public class FusionDictionary implements Iterable<Word> {
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param bigrams an optional list of bigrams for this word (null if none).
+ * @param isShortcutOnly whether this should be a shortcut only.
*/
private void add(final int[] word, final int frequency,
final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams) {
+ final ArrayList<WeightedString> bigrams,
+ final boolean isShortcutOnly) {
assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot;
int charIndex = 0;
@@ -260,7 +286,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length),
- shortcutTargets, bigrams, frequency, false /* isShortcutOnly */);
+ shortcutTargets, bigrams, frequency, isShortcutOnly);
currentNode.mData.add(insertionIndex, newGroup);
checkStack(currentNode);
} else {
@@ -275,7 +301,7 @@ public class FusionDictionary implements Iterable<Word> {
} else {
final CharGroup newNode = new CharGroup(currentGroup.mChars,
shortcutTargets, bigrams, frequency, currentGroup.mChildren,
- false /* isShortcutOnly */);
+ isShortcutOnly);
currentNode.mData.set(nodeIndex, newNode);
checkStack(currentNode);
}
@@ -284,8 +310,7 @@ public class FusionDictionary implements Iterable<Word> {
// We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- shortcutTargets, bigrams, frequency,
- false /* isShortcutOnly */);
+ shortcutTargets, bigrams, frequency, isShortcutOnly);
currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode);
}
@@ -300,7 +325,8 @@ public class FusionDictionary implements Iterable<Word> {
}
final CharGroup newGroup = new CharGroup(word,
currentGroup.mShortcutTargets, currentGroup.mBigrams,
- frequency, currentGroup.mChildren, false /* isShortcutOnly */);
+ frequency, currentGroup.mChildren,
+ currentGroup.mIsShortcutOnly && isShortcutOnly);
currentNode.mData.set(nodeIndex, newGroup);
}
} else {
@@ -318,16 +344,18 @@ public class FusionDictionary implements Iterable<Word> {
if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- shortcutTargets, bigrams, frequency, newChildren,
- false /* isShortcutOnly */);
+ shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly);
} else {
+ // isShortcutOnly makes no sense for non-terminal nodes. The following node
+ // is non-terminal (frequency 0 in FusionDictionary representation) so we
+ // pass false for isShortcutOnly
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
null, null, -1, newChildren, false /* isShortcutOnly */);
final CharGroup newWord = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex,
word.length), shortcutTargets, bigrams, frequency,
- false /* isShortcutOnly */);
+ isShortcutOnly);
final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
index a13a3b88f..77c536668 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
@@ -47,6 +47,8 @@ public class XmlDictInputOutput {
private static final String WORD_ATTR = "word";
private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly";
+ private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
+
/**
* SAX handler for a unigram XML file.
*/
@@ -233,6 +235,15 @@ public class XmlDictInputOutput {
new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
bigramHandler.getBigramMap());
parser.parse(unigrams, unigramHandler);
+
+ final HashMap<String, ArrayList<WeightedString>> shortcutMap =
+ shortcutHandler.getShortcutMap();
+ for (final String shortcut : shortcutMap.keySet()) {
+ if (dict.hasWord(shortcut)) continue;
+ // TODO: list a frequency in the shortcut file and use it here, instead of
+ // a constant freq
+ dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
+ }
return dict;
}