diff options
author | 2012-08-31 15:24:39 +0900 | |
---|---|---|
committer | 2012-08-31 22:11:52 +0900 | |
commit | 72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa (patch) | |
tree | 546b40591ede9a009afdab46dd7495aafc859fa0 /java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java | |
parent | 49d8af8a4e900f3c68c333aba7fde0a11fd368b1 (diff) | |
download | latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.tar.gz latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.tar.xz latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.zip |
Reinstate the shortcut-only attribute
Also add the blacklist attribute
Bug: 7005742
Bug: 2704000
Change-Id: Icbe60bdf25bfb098d9e3f20870be30d6aef07c9d
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java | 76 |
1 files changed, 58 insertions, 18 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 7c15ba54d..f1abea9ec 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> { ArrayList<WeightedString> mBigrams; int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. Node mChildren; + boolean mIsNotAWord; // Only a shortcut + boolean mIsBlacklistEntry; // The two following members to help with binary generation int mCachedSize; int mCachedAddress; public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams, final int frequency) { + final ArrayList<WeightedString> bigrams, final int frequency, + final boolean isNotAWord, final boolean isBlacklistEntry) { mChars = chars; mFrequency = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams, final int frequency, final Node children) { + final ArrayList<WeightedString> bigrams, final int frequency, + final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) { mChars = chars; mFrequency = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = children; + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } public void addChild(CharGroup n) { @@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> { * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only * updated if they are higher than the existing ones. */ - public void update(int frequency, ArrayList<WeightedString> shortcutTargets, - ArrayList<WeightedString> bigrams) { + public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets, + final ArrayList<WeightedString> bigrams, + final boolean isNotAWord, final boolean isBlacklistEntry) { if (frequency > mFrequency) { mFrequency = frequency; } @@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> { } } } + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } } @@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> { * @param word the word to add. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets a list of shortcut targets for this word, or null. + * @param isNotAWord true if this should not be considered a word (e.g. shortcut only) */ public void add(final String word, final int frequency, - final ArrayList<WeightedString> shortcutTargets) { - add(getCodePoints(word), frequency, shortcutTargets); + final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { + add(getCodePoints(word), frequency, shortcutTargets, isNotAWord, + false /* isBlacklistEntry */); + } + + /** + * Helper method to add a blacklist entry as a string. + * + * @param word the word to add as a blacklist entry. + * @param shortcutTargets a list of shortcut targets for this word, or null. + * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) + */ + public void addBlacklistEntry(final String word, + final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { + add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */); } /** @@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> { if (charGroup != null) { final CharGroup charGroup2 = findWordInTree(mRoot, word2); if (charGroup2 == null) { - add(getCodePoints(word2), 0, null); + add(getCodePoints(word2), 0, null, false /* isNotAWord */, + false /* isBlacklistEntry */); } charGroup.addBigram(word2, frequency); } else { @@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> { * @param word the word, as an int array. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). + * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) + * @param isBlacklistEntry true if this is a blacklisted word, false otherwise */ private void add(final int[] word, final int frequency, - final ArrayList<WeightedString> shortcutTargets) { + final ArrayList<WeightedString> shortcutTargets, + final boolean isNotAWord, final boolean isBlacklistEntry) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; @@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> { final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), - shortcutTargets, null /* bigrams */, frequency); + shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); currentNode.mData.add(insertionIndex, newGroup); if (DBG) checkStack(currentNode); } else { @@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old CharNode was not a terminal, // make it one by filling in its frequency and other attributes - currentGroup.update(frequency, shortcutTargets, null); + currentGroup.update(frequency, shortcutTargets, null, isNotAWord, + isBlacklistEntry); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, null /* bigrams */, frequency); + shortcutTargets, null /* bigrams */, frequency, isNotAWord, + isBlacklistEntry); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } @@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the // new shortcuts to the existing shortcut list if it already exists. - currentGroup.update(frequency, shortcutTargets, null); + currentGroup.update(frequency, shortcutTargets, null, + currentGroup.mIsNotAWord && isNotAWord, + currentGroup.mIsBlacklistEntry || isBlacklistEntry); } else { // Partial prefix match only. We have to replace the current node with a node // containing the current prefix and create two new ones for the tails. @@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> { final CharGroup newOldWord = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, currentGroup.mChars.length), currentGroup.mShortcutTargets, - currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren); + currentGroup.mBigrams, currentGroup.mFrequency, + currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry, + currentGroup.mChildren); newChildren.mData.add(newOldWord); final CharGroup newParent; if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - shortcutTargets, null /* bigrams */, frequency, newChildren); + shortcutTargets, null /* bigrams */, frequency, + isNotAWord, isBlacklistEntry, newChildren); } else { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - null /* shortcutTargets */, null /* bigrams */, -1, newChildren); + null /* shortcutTargets */, null /* bigrams */, -1, + false /* isNotAWord */, false /* isBlacklistEntry */, newChildren); final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, null /* bigrams */, frequency); + shortcutTargets, null /* bigrams */, frequency, + isNotAWord, isBlacklistEntry); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); @@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> { private static int findInsertionIndex(final Node node, int character) { final ArrayList<CharGroup> data = node.mData; final CharGroup reference = new CharGroup(new int[] { character }, - null /* shortcutTargets */, null /* bigrams */, 0); + null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */, + false /* isBlacklistEntry */); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); return result >= 0 ? result : -result - 1; } @@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> { } if (currentGroup.mFrequency >= 0) return new Word(mCurrentString.toString(), currentGroup.mFrequency, - currentGroup.mShortcutTargets, currentGroup.mBigrams); + currentGroup.mShortcutTargets, currentGroup.mBigrams, + currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry); } else { mPositions.removeLast(); currentPos = mPositions.getLast(); |