diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin')
6 files changed, 399 insertions, 45 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index cdf5247de..8a509be48 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { // considering performance regression. protected void addWord(final String word, final String shortcutTarget, final int frequency) { if (shortcutTarget == null) { - mFusionDictionary.add(word, frequency, null); + mFusionDictionary.add(word, frequency, null, false /* isNotAWord */); } else { // TODO: Do this in the subclass, with this class taking an arraylist. final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList(); shortcutTargets.add(new WeightedString(shortcutTarget, frequency)); - mFusionDictionary.add(word, frequency, shortcutTargets); + mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */); } } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 76f495729..39c3a808f 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1309,7 +1309,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen break; case Keyboard.CODE_RESEARCH: if (ProductionFlag.IS_EXPERIMENTAL) { - ResearchLogger.getInstance().presentResearchDialog(this); + ResearchLogger.getInstance().onResearchKeySelected(this); } break; default: diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java new file mode 100644 index 000000000..942c82837 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.util.Log; + +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.FusionDictionary; +import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.PendingAttribute; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * Reads and writes Binary files for a UserHistoryDictionary. + * + * All the methods in this class are static. + */ +public class UserHistoryDictIOUtils { + private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); + private static final boolean DEBUG = false; + + public interface OnAddWordListener { + public void setUnigram(final String word, final String shortcutTarget, final int frequency); + public void setBigram(final String word1, final String word2, final int frequency); + } + + public interface BigramDictionaryInterface { + public int getFrequency(final String word1, final String word2); + } + + public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface { + private byte[] mBuffer; + private int mPosition; + + ByteArrayWrapper(final byte[] buffer) { + mBuffer = buffer; + mPosition = 0; + } + + @Override + public int readUnsignedByte() { + return ((int)mBuffer[mPosition++]) & 0xFF; + } + + @Override + public int readUnsignedShort() { + final int retval = readUnsignedByte(); + return (retval << 8) + readUnsignedByte(); + } + + @Override + public int readUnsignedInt24() { + final int retval = readUnsignedShort(); + return (retval << 8) + readUnsignedByte(); + } + + @Override + public int readInt() { + final int retval = readUnsignedShort(); + return (retval << 16) + readUnsignedShort(); + } + + @Override + public int position() { + return mPosition; + } + + @Override + public void position(int position) { + mPosition = position; + } + } + + /** + * Writes dictionary to file. + */ + public static void writeDictionaryBinary(final OutputStream destination, + final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, + final int version) { + + final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); + + try { + BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version); + } catch (IOException e) { + Log.e(TAG, "IO exception while writing file: " + e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported fomat: " + e); + } + } + + /** + * Constructs a new FusionDictionary from BigramDictionaryInterface. + */ + /* packages for test */ static FusionDictionary constructFusionDictionary( + final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { + + final FusionDictionary fusionDict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions( + new HashMap<String,String>(), false, false)); + + for (final String word1 : bigrams.keySet()) { + final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); + for (final String word2 : word1Bigrams.keySet()) { + final int freq = dict.getFrequency(word1, word2); + + if (DEBUG) { + if (word1 == null) { + Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); + } else { + Log.d(TAG, "add bigram: " + word1 + + "," + word2 + "," + Integer.toString(freq)); + } + } + + if (word1 == null) { // unigram + fusionDict.add(word2, freq, null, false /* isNotAWord */); + } else { // bigram + fusionDict.setBigram(word1, word2, freq); + } + bigrams.updateBigram(word1, word2, (byte)freq); + } + } + + return fusionDict; + } + + /** + * Reads dictionary from file. + */ + public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer, + final OnAddWordListener dict) { + final Map<Integer, String> unigrams = CollectionUtils.newTreeMap(); + final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); + final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); + + try { + BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, + bigrams); + addWordsFromWordMap(unigrams, frequencies, bigrams, dict); + } catch (IOException e) { + Log.e(TAG, "IO exception while reading file: " + e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format: " + e); + } + } + + /** + * Adds all unigrams and bigrams in maps to OnAddWordListener. + */ + /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams, + final Map<Integer, Integer> frequencies, + final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) { + + for (Map.Entry<Integer, String> entry : unigrams.entrySet()) { + final String word1 = entry.getValue(); + final int unigramFrequency = frequencies.get(entry.getKey()); + to.setUnigram(word1, null, unigramFrequency); + + final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); + + if (attrList != null) { + for (final PendingAttribute attr : attrList) { + to.setBigram(word1, unigrams.get(attr.mAddress), + BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, + attr.mFrequency)); + } + } + } + + } +}
\ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index d4f7cab5c..abc39d923 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -34,6 +34,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.Stack; import java.util.TreeMap; /** @@ -55,6 +56,8 @@ public class BinaryDictInputOutput { * s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS + * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD + * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED * * c | IF FLAG_HAS_MULTIPLE_CHARS * h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers @@ -153,6 +156,8 @@ public class BinaryDictInputOutput { private static final int FLAG_IS_TERMINAL = 0x10; private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08; private static final int FLAG_HAS_BIGRAMS = 0x04; + private static final int FLAG_IS_NOT_A_WORD = 0x02; + private static final int FLAG_IS_BLACKLISTED = 0x01; private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; @@ -188,7 +193,7 @@ public class BinaryDictInputOutput { // suspicion that a bug might be causing an infinite loop. private static final int MAX_PASSES = 24; - private interface FusionDictionaryBufferInterface { + public interface FusionDictionaryBufferInterface { public int readUnsignedByte(); public int readUnsignedShort(); public int readUnsignedInt24(); @@ -197,20 +202,21 @@ public class BinaryDictInputOutput { public void position(int newPosition); } - private static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { - private ByteBuffer buffer; - ByteBufferWrapper(final ByteBuffer buffer) { - this.buffer = buffer; + public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { + private ByteBuffer mBuffer; + + public ByteBufferWrapper(final ByteBuffer buffer) { + mBuffer = buffer; } @Override public int readUnsignedByte() { - return ((int)buffer.get()) & 0xFF; + return ((int)mBuffer.get()) & 0xFF; } @Override public int readUnsignedShort() { - return ((int)buffer.getShort()) & 0xFFFF; + return ((int)mBuffer.getShort()) & 0xFFFF; } @Override @@ -221,18 +227,17 @@ public class BinaryDictInputOutput { @Override public int readInt() { - return buffer.getInt(); + return mBuffer.getInt(); } @Override public int position() { - return buffer.position(); + return mBuffer.position(); } @Override public void position(int newPos) { - buffer.position(newPos); - return; + mBuffer.position(newPos); } } @@ -778,6 +783,12 @@ public class BinaryDictInputOutput { } flags |= FLAG_HAS_BIGRAMS; } + if (group.mIsNotAWord) { + flags |= FLAG_IS_NOT_A_WORD; + } + if (group.mIsBlacklistEntry) { + flags |= FLAG_IS_BLACKLISTED; + } return flags; } @@ -1352,12 +1363,14 @@ public class BinaryDictInputOutput { buffer.position(currentPosition); } nodeContents.add( - new CharGroup(info.mCharacters, shortcutTargets, - bigrams, info.mFrequency, children)); + new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, + 0 != (info.mFlags & FLAG_IS_NOT_A_WORD), + 0 != (info.mFlags & FLAG_IS_BLACKLISTED), children)); } else { nodeContents.add( - new CharGroup(info.mCharacters, shortcutTargets, - bigrams, info.mFrequency)); + new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, + 0 != (info.mFlags & FLAG_IS_NOT_A_WORD), + 0 != (info.mFlags & FLAG_IS_BLACKLISTED))); } groupOffset = info.mEndAddress; } @@ -1367,6 +1380,105 @@ public class BinaryDictInputOutput { return node; } + // TODO: move these methods (readUnigramsAndBigramsBinary(|Inner)) and an inner class (Position) + // out of this class. + private static class Position { + public static final int NOT_READ_GROUPCOUNT = -1; + + public int mAddress; + public int mNumOfCharGroup; + public int mPosition; + public int mLength; + + public Position(int address, int length) { + mAddress = address; + mLength = length; + mNumOfCharGroup = NOT_READ_GROUPCOUNT; + } + } + + /** + * Tours all node without recursive call. + */ + private static void readUnigramsAndBigramsBinaryInner( + final FusionDictionaryBufferInterface buffer, final int headerSize, + final Map<Integer, String> words, final Map<Integer, Integer> frequencies, + final Map<Integer, ArrayList<PendingAttribute>> bigrams) { + int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; + + Stack<Position> stack = new Stack<Position>(); + int index = 0; + + Position initPos = new Position(headerSize, 0); + stack.push(initPos); + + while (!stack.empty()) { + Position p = stack.peek(); + + if (DBG) { + MakedictLog.d("read: address=" + p.mAddress + ", numOfCharGroup=" + + p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength); + } + + if (buffer.position() != p.mAddress) buffer.position(p.mAddress); + if (index != p.mLength) index = p.mLength; + + if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { + p.mNumOfCharGroup = readCharGroupCount(buffer); + p.mAddress += getGroupCountSize(p.mNumOfCharGroup); + p.mPosition = 0; + } + + CharGroupInfo info = readCharGroup(buffer, p.mAddress - headerSize); + for (int i = 0; i < info.mCharacters.length; ++i) { + pushedChars[index++] = info.mCharacters[i]; + } + p.mPosition++; + + if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word + words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); + frequencies.put(info.mOriginalAddress, info.mFrequency); + if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); + } + + if (p.mPosition == p.mNumOfCharGroup) { + stack.pop(); + } else { + // the node has more groups. + p.mAddress = buffer.position(); + } + + if (hasChildrenAddress(info.mChildrenAddress)) { + Position childrenPos = new Position(info.mChildrenAddress + headerSize, index); + stack.push(childrenPos); + } + } + } + + /** + * Reads unigrams and bigrams from the binary file. + * Doesn't make the memory representation of the dictionary. + * + * @param buffer the buffer to read. + * @param words the map to store the address as a key and the word as a value. + * @param frequencies the map to store the address as a key and the frequency as a value. + * @param bigrams the map to store the address as a key and the list of address as a value. + * @throws IOException + * @throws UnsupportedFormatException + */ + public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer, + final Map<Integer, String> words, final Map<Integer, Integer> frequencies, + final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, + UnsupportedFormatException { + // Read header + final int version = checkFormatVersion(buffer); + final int optionsFlags = buffer.readUnsignedShort(); + final HashMap<String, String> options = new HashMap<String, String>(); + final int headerSize = readHeader(buffer, options, version); + + readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams); + } + /** * Helper function to get the binary format version from the header. * @throws IOException @@ -1402,10 +1514,8 @@ public class BinaryDictInputOutput { * @throws UnsupportedFormatException */ private static int readHeader(final FusionDictionaryBufferInterface buffer, - final HashMap<String, String> options, - final int version) + final HashMap<String, String> options, final int version) throws IOException, UnsupportedFormatException { - final int headerSize; if (version < FIRST_VERSION_WITH_HEADER_SIZE) { headerSize = buffer.position(); @@ -1418,7 +1528,6 @@ public class BinaryDictInputOutput { if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } - return headerSize; } @@ -1456,7 +1565,6 @@ public class BinaryDictInputOutput { public static FusionDictionary readDictionaryBinary( final FusionDictionaryBufferInterface buffer, final FusionDictionary dict) throws IOException, UnsupportedFormatException { - // clear cache wordCache.clear(); @@ -1478,7 +1586,11 @@ public class BinaryDictInputOutput { 0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG))); if (null != dict) { for (final Word w : dict) { - newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets); + if (w.mIsBlacklistEntry) { + newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord); + } else { + newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord); + } } for (final Word w : dict) { // By construction a binary dictionary may not have bigrams pointing to diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 7c15ba54d..f1abea9ec 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> { ArrayList<WeightedString> mBigrams; int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. Node mChildren; + boolean mIsNotAWord; // Only a shortcut + boolean mIsBlacklistEntry; // The two following members to help with binary generation int mCachedSize; int mCachedAddress; public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams, final int frequency) { + final ArrayList<WeightedString> bigrams, final int frequency, + final boolean isNotAWord, final boolean isBlacklistEntry) { mChars = chars; mFrequency = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams, final int frequency, final Node children) { + final ArrayList<WeightedString> bigrams, final int frequency, + final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) { mChars = chars; mFrequency = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = children; + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } public void addChild(CharGroup n) { @@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> { * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only * updated if they are higher than the existing ones. */ - public void update(int frequency, ArrayList<WeightedString> shortcutTargets, - ArrayList<WeightedString> bigrams) { + public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets, + final ArrayList<WeightedString> bigrams, + final boolean isNotAWord, final boolean isBlacklistEntry) { if (frequency > mFrequency) { mFrequency = frequency; } @@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> { } } } + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } } @@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> { * @param word the word to add. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets a list of shortcut targets for this word, or null. + * @param isNotAWord true if this should not be considered a word (e.g. shortcut only) */ public void add(final String word, final int frequency, - final ArrayList<WeightedString> shortcutTargets) { - add(getCodePoints(word), frequency, shortcutTargets); + final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { + add(getCodePoints(word), frequency, shortcutTargets, isNotAWord, + false /* isBlacklistEntry */); + } + + /** + * Helper method to add a blacklist entry as a string. + * + * @param word the word to add as a blacklist entry. + * @param shortcutTargets a list of shortcut targets for this word, or null. + * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) + */ + public void addBlacklistEntry(final String word, + final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { + add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */); } /** @@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> { if (charGroup != null) { final CharGroup charGroup2 = findWordInTree(mRoot, word2); if (charGroup2 == null) { - add(getCodePoints(word2), 0, null); + add(getCodePoints(word2), 0, null, false /* isNotAWord */, + false /* isBlacklistEntry */); } charGroup.addBigram(word2, frequency); } else { @@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> { * @param word the word, as an int array. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). + * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) + * @param isBlacklistEntry true if this is a blacklisted word, false otherwise */ private void add(final int[] word, final int frequency, - final ArrayList<WeightedString> shortcutTargets) { + final ArrayList<WeightedString> shortcutTargets, + final boolean isNotAWord, final boolean isBlacklistEntry) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; @@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> { final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), - shortcutTargets, null /* bigrams */, frequency); + shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); currentNode.mData.add(insertionIndex, newGroup); if (DBG) checkStack(currentNode); } else { @@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old CharNode was not a terminal, // make it one by filling in its frequency and other attributes - currentGroup.update(frequency, shortcutTargets, null); + currentGroup.update(frequency, shortcutTargets, null, isNotAWord, + isBlacklistEntry); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, null /* bigrams */, frequency); + shortcutTargets, null /* bigrams */, frequency, isNotAWord, + isBlacklistEntry); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } @@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the // new shortcuts to the existing shortcut list if it already exists. - currentGroup.update(frequency, shortcutTargets, null); + currentGroup.update(frequency, shortcutTargets, null, + currentGroup.mIsNotAWord && isNotAWord, + currentGroup.mIsBlacklistEntry || isBlacklistEntry); } else { // Partial prefix match only. We have to replace the current node with a node // containing the current prefix and create two new ones for the tails. @@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> { final CharGroup newOldWord = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, currentGroup.mChars.length), currentGroup.mShortcutTargets, - currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren); + currentGroup.mBigrams, currentGroup.mFrequency, + currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry, + currentGroup.mChildren); newChildren.mData.add(newOldWord); final CharGroup newParent; if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - shortcutTargets, null /* bigrams */, frequency, newChildren); + shortcutTargets, null /* bigrams */, frequency, + isNotAWord, isBlacklistEntry, newChildren); } else { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - null /* shortcutTargets */, null /* bigrams */, -1, newChildren); + null /* shortcutTargets */, null /* bigrams */, -1, + false /* isNotAWord */, false /* isBlacklistEntry */, newChildren); final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, null /* bigrams */, frequency); + shortcutTargets, null /* bigrams */, frequency, + isNotAWord, isBlacklistEntry); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); @@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> { private static int findInsertionIndex(final Node node, int character) { final ArrayList<CharGroup> data = node.mData; final CharGroup reference = new CharGroup(new int[] { character }, - null /* shortcutTargets */, null /* bigrams */, 0); + null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */, + false /* isBlacklistEntry */); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); return result >= 0 ? result : -result - 1; } @@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> { } if (currentGroup.mFrequency >= 0) return new Word(mCurrentString.toString(), currentGroup.mFrequency, - currentGroup.mShortcutTargets, currentGroup.mBigrams); + currentGroup.mShortcutTargets, currentGroup.mBigrams, + currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry); } else { mPositions.removeLast(); currentPos = mPositions.getLast(); diff --git a/java/src/com/android/inputmethod/latin/makedict/Word.java b/java/src/com/android/inputmethod/latin/makedict/Word.java index 65fc72c40..4683ef154 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Word.java +++ b/java/src/com/android/inputmethod/latin/makedict/Word.java @@ -31,16 +31,21 @@ public class Word implements Comparable<Word> { public final int mFrequency; public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<WeightedString> mBigrams; + public final boolean mIsNotAWord; + public final boolean mIsBlacklistEntry; private int mHashCode = 0; public Word(final String word, final int frequency, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams) { + final ArrayList<WeightedString> bigrams, + final boolean isNotAWord, final boolean isBlacklistEntry) { mWord = word; mFrequency = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; + mIsNotAWord = isNotAWord; + mIsBlacklistEntry = isBlacklistEntry; } private static int computeHashCode(Word word) { @@ -48,7 +53,9 @@ public class Word implements Comparable<Word> { word.mWord, word.mFrequency, word.mShortcutTargets.hashCode(), - word.mBigrams.hashCode() + word.mBigrams.hashCode(), + word.mIsNotAWord, + word.mIsBlacklistEntry }); } @@ -78,7 +85,9 @@ public class Word implements Comparable<Word> { Word w = (Word)o; return mFrequency == w.mFrequency && mWord.equals(w.mWord) && mShortcutTargets.equals(w.mShortcutTargets) - && mBigrams.equals(w.mBigrams); + && mBigrams.equals(w.mBigrams) + && mIsNotAWord == w.mIsNotAWord + && mIsBlacklistEntry == w.mIsBlacklistEntry; } @Override |