aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-08-31 15:24:39 +0900
committerJean Chalard <jchalard@google.com>2012-08-31 22:11:52 +0900
commit72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa (patch)
tree546b40591ede9a009afdab46dd7495aafc859fa0 /java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
parent49d8af8a4e900f3c68c333aba7fde0a11fd368b1 (diff)
downloadlatinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.tar.gz
latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.tar.xz
latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.zip
Reinstate the shortcut-only attribute
Also add the blacklist attribute Bug: 7005742 Bug: 2704000 Change-Id: Icbe60bdf25bfb098d9e3f20870be30d6aef07c9d
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java76
1 files changed, 58 insertions, 18 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 7c15ba54d..f1abea9ec 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren;
+ boolean mIsNotAWord; // Only a shortcut
+ boolean mIsBlacklistEntry;
// The two following members to help with binary generation
int mCachedSize;
int mCachedAddress;
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams, final int frequency) {
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = children;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
public void addChild(CharGroup n) {
@@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
* the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
* updated if they are higher than the existing ones.
*/
- public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
- ArrayList<WeightedString> bigrams) {
+ public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
if (frequency > mFrequency) {
mFrequency = frequency;
}
@@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
}
}
}
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
}
@@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets a list of shortcut targets for this word, or null.
+ * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
*/
public void add(final String word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets) {
- add(getCodePoints(word), frequency, shortcutTargets);
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+ add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
+ false /* isBlacklistEntry */);
+ }
+
+ /**
+ * Helper method to add a blacklist entry as a string.
+ *
+ * @param word the word to add as a blacklist entry.
+ * @param shortcutTargets a list of shortcut targets for this word, or null.
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+ */
+ public void addBlacklistEntry(final String word,
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+ add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
}
/**
@@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
if (charGroup != null) {
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
if (charGroup2 == null) {
- add(getCodePoints(word2), 0, null);
+ add(getCodePoints(word2), 0, null, false /* isNotAWord */,
+ false /* isBlacklistEntry */);
}
charGroup.addBigram(word2, frequency);
} else {
@@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word, as an int array.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+ * @param isBlacklistEntry true if this is a blacklisted word, false otherwise
*/
private void add(final int[] word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets) {
+ final ArrayList<WeightedString> shortcutTargets,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot;
int charIndex = 0;
@@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNode.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNode);
} else {
@@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
// The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal,
// make it one by filling in its frequency and other attributes
- currentGroup.update(frequency, shortcutTargets, null);
+ currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
+ isBlacklistEntry);
} else {
// The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency, isNotAWord,
+ isBlacklistEntry);
currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode);
}
@@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the
// new shortcuts to the existing shortcut list if it already exists.
- currentGroup.update(frequency, shortcutTargets, null);
+ currentGroup.update(frequency, shortcutTargets, null,
+ currentGroup.mIsNotAWord && isNotAWord,
+ currentGroup.mIsBlacklistEntry || isBlacklistEntry);
} else {
// Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails.
@@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets,
- currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
+ currentGroup.mBigrams, currentGroup.mFrequency,
+ currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
+ currentGroup.mChildren);
newChildren.mData.add(newOldWord);
final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- shortcutTargets, null /* bigrams */, frequency, newChildren);
+ shortcutTargets, null /* bigrams */, frequency,
+ isNotAWord, isBlacklistEntry, newChildren);
} else {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
+ null /* shortcutTargets */, null /* bigrams */, -1,
+ false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency,
+ isNotAWord, isBlacklistEntry);
final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
@@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
private static int findInsertionIndex(final Node node, int character) {
final ArrayList<CharGroup> data = node.mData;
final CharGroup reference = new CharGroup(new int[] { character },
- null /* shortcutTargets */, null /* bigrams */, 0);
+ null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
+ false /* isBlacklistEntry */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1;
}
@@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
}
if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
- currentGroup.mShortcutTargets, currentGroup.mBigrams);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams,
+ currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
} else {
mPositions.removeLast();
currentPos = mPositions.getLast();