aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-08-31 15:24:39 +0900
committerJean Chalard <jchalard@google.com>2012-08-31 22:11:52 +0900
commit72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa (patch)
tree546b40591ede9a009afdab46dd7495aafc859fa0 /java/src
parent49d8af8a4e900f3c68c333aba7fde0a11fd368b1 (diff)
downloadlatinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.tar.gz
latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.tar.xz
latinime-72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa.zip
Reinstate the shortcut-only attribute
Also add the blacklist attribute Bug: 7005742 Bug: 2704000 Change-Id: Icbe60bdf25bfb098d9e3f20870be30d6aef07c9d
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java4
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java26
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java76
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Word.java15
4 files changed, 93 insertions, 28 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index cdf5247de..8a509be48 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
// considering performance regression.
protected void addWord(final String word, final String shortcutTarget, final int frequency) {
if (shortcutTarget == null) {
- mFusionDictionary.add(word, frequency, null);
+ mFusionDictionary.add(word, frequency, null, false /* isNotAWord */);
} else {
// TODO: Do this in the subclass, with this class taking an arraylist.
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
- mFusionDictionary.add(word, frequency, shortcutTargets);
+ mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */);
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index d4f7cab5c..0c6b9c319 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -55,6 +55,8 @@ public class BinaryDictInputOutput {
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
+ * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
+ * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -153,6 +155,8 @@ public class BinaryDictInputOutput {
private static final int FLAG_IS_TERMINAL = 0x10;
private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
private static final int FLAG_HAS_BIGRAMS = 0x04;
+ private static final int FLAG_IS_NOT_A_WORD = 0x02;
+ private static final int FLAG_IS_BLACKLISTED = 0x01;
private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -778,6 +782,12 @@ public class BinaryDictInputOutput {
}
flags |= FLAG_HAS_BIGRAMS;
}
+ if (group.mIsNotAWord) {
+ flags |= FLAG_IS_NOT_A_WORD;
+ }
+ if (group.mIsBlacklistEntry) {
+ flags |= FLAG_IS_BLACKLISTED;
+ }
return flags;
}
@@ -1352,12 +1362,14 @@ public class BinaryDictInputOutput {
buffer.position(currentPosition);
}
nodeContents.add(
- new CharGroup(info.mCharacters, shortcutTargets,
- bigrams, info.mFrequency, children));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+ 0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
+ 0 != (info.mFlags & FLAG_IS_BLACKLISTED), children));
} else {
nodeContents.add(
- new CharGroup(info.mCharacters, shortcutTargets,
- bigrams, info.mFrequency));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+ 0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
+ 0 != (info.mFlags & FLAG_IS_BLACKLISTED)));
}
groupOffset = info.mEndAddress;
}
@@ -1478,7 +1490,11 @@ public class BinaryDictInputOutput {
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
if (null != dict) {
for (final Word w : dict) {
- newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets);
+ if (w.mIsBlacklistEntry) {
+ newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
+ } else {
+ newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
+ }
}
for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 7c15ba54d..f1abea9ec 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren;
+ boolean mIsNotAWord; // Only a shortcut
+ boolean mIsBlacklistEntry;
// The two following members to help with binary generation
int mCachedSize;
int mCachedAddress;
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams, final int frequency) {
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = children;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
public void addChild(CharGroup n) {
@@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
* the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
* updated if they are higher than the existing ones.
*/
- public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
- ArrayList<WeightedString> bigrams) {
+ public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
if (frequency > mFrequency) {
mFrequency = frequency;
}
@@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
}
}
}
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
}
@@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets a list of shortcut targets for this word, or null.
+ * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
*/
public void add(final String word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets) {
- add(getCodePoints(word), frequency, shortcutTargets);
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+ add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
+ false /* isBlacklistEntry */);
+ }
+
+ /**
+ * Helper method to add a blacklist entry as a string.
+ *
+ * @param word the word to add as a blacklist entry.
+ * @param shortcutTargets a list of shortcut targets for this word, or null.
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+ */
+ public void addBlacklistEntry(final String word,
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+ add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
}
/**
@@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
if (charGroup != null) {
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
if (charGroup2 == null) {
- add(getCodePoints(word2), 0, null);
+ add(getCodePoints(word2), 0, null, false /* isNotAWord */,
+ false /* isBlacklistEntry */);
}
charGroup.addBigram(word2, frequency);
} else {
@@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word, as an int array.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+ * @param isBlacklistEntry true if this is a blacklisted word, false otherwise
*/
private void add(final int[] word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets) {
+ final ArrayList<WeightedString> shortcutTargets,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot;
int charIndex = 0;
@@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNode.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNode);
} else {
@@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
// The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal,
// make it one by filling in its frequency and other attributes
- currentGroup.update(frequency, shortcutTargets, null);
+ currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
+ isBlacklistEntry);
} else {
// The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency, isNotAWord,
+ isBlacklistEntry);
currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode);
}
@@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the
// new shortcuts to the existing shortcut list if it already exists.
- currentGroup.update(frequency, shortcutTargets, null);
+ currentGroup.update(frequency, shortcutTargets, null,
+ currentGroup.mIsNotAWord && isNotAWord,
+ currentGroup.mIsBlacklistEntry || isBlacklistEntry);
} else {
// Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails.
@@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets,
- currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
+ currentGroup.mBigrams, currentGroup.mFrequency,
+ currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
+ currentGroup.mChildren);
newChildren.mData.add(newOldWord);
final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- shortcutTargets, null /* bigrams */, frequency, newChildren);
+ shortcutTargets, null /* bigrams */, frequency,
+ isNotAWord, isBlacklistEntry, newChildren);
} else {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
+ null /* shortcutTargets */, null /* bigrams */, -1,
+ false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency,
+ isNotAWord, isBlacklistEntry);
final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
@@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
private static int findInsertionIndex(final Node node, int character) {
final ArrayList<CharGroup> data = node.mData;
final CharGroup reference = new CharGroup(new int[] { character },
- null /* shortcutTargets */, null /* bigrams */, 0);
+ null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
+ false /* isBlacklistEntry */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1;
}
@@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
}
if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
- currentGroup.mShortcutTargets, currentGroup.mBigrams);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams,
+ currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
} else {
mPositions.removeLast();
currentPos = mPositions.getLast();
diff --git a/java/src/com/android/inputmethod/latin/makedict/Word.java b/java/src/com/android/inputmethod/latin/makedict/Word.java
index 65fc72c40..4683ef154 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Word.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Word.java
@@ -31,16 +31,21 @@ public class Word implements Comparable<Word> {
public final int mFrequency;
public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams;
+ public final boolean mIsNotAWord;
+ public final boolean mIsBlacklistEntry;
private int mHashCode = 0;
public Word(final String word, final int frequency,
final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams) {
+ final ArrayList<WeightedString> bigrams,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
mWord = word;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
private static int computeHashCode(Word word) {
@@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
word.mWord,
word.mFrequency,
word.mShortcutTargets.hashCode(),
- word.mBigrams.hashCode()
+ word.mBigrams.hashCode(),
+ word.mIsNotAWord,
+ word.mIsBlacklistEntry
});
}
@@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
Word w = (Word)o;
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets)
- && mBigrams.equals(w.mBigrams);
+ && mBigrams.equals(w.mBigrams)
+ && mIsNotAWord == w.mIsNotAWord
+ && mIsBlacklistEntry == w.mIsBlacklistEntry;
}
@Override