aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin')
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java4
-rw-r--r--java/src/com/android/inputmethod/latin/LatinIME.java2
-rw-r--r--java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java193
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java154
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java76
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Word.java15
6 files changed, 399 insertions, 45 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index cdf5247de..8a509be48 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
// considering performance regression.
protected void addWord(final String word, final String shortcutTarget, final int frequency) {
if (shortcutTarget == null) {
- mFusionDictionary.add(word, frequency, null);
+ mFusionDictionary.add(word, frequency, null, false /* isNotAWord */);
} else {
// TODO: Do this in the subclass, with this class taking an arraylist.
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
- mFusionDictionary.add(word, frequency, shortcutTargets);
+ mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */);
}
}
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 76f495729..39c3a808f 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1309,7 +1309,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
break;
case Keyboard.CODE_RESEARCH:
if (ProductionFlag.IS_EXPERIMENTAL) {
- ResearchLogger.getInstance().presentResearchDialog(this);
+ ResearchLogger.getInstance().onResearchKeySelected(this);
}
break;
default:
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
new file mode 100644
index 000000000..942c82837
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.util.Log;
+
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.PendingAttribute;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Reads and writes Binary files for a UserHistoryDictionary.
+ *
+ * All the methods in this class are static.
+ */
+public class UserHistoryDictIOUtils {
+ private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
+ private static final boolean DEBUG = false;
+
+ public interface OnAddWordListener {
+ public void setUnigram(final String word, final String shortcutTarget, final int frequency);
+ public void setBigram(final String word1, final String word2, final int frequency);
+ }
+
+ public interface BigramDictionaryInterface {
+ public int getFrequency(final String word1, final String word2);
+ }
+
+ public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
+ private byte[] mBuffer;
+ private int mPosition;
+
+ ByteArrayWrapper(final byte[] buffer) {
+ mBuffer = buffer;
+ mPosition = 0;
+ }
+
+ @Override
+ public int readUnsignedByte() {
+ return ((int)mBuffer[mPosition++]) & 0xFF;
+ }
+
+ @Override
+ public int readUnsignedShort() {
+ final int retval = readUnsignedByte();
+ return (retval << 8) + readUnsignedByte();
+ }
+
+ @Override
+ public int readUnsignedInt24() {
+ final int retval = readUnsignedShort();
+ return (retval << 8) + readUnsignedByte();
+ }
+
+ @Override
+ public int readInt() {
+ final int retval = readUnsignedShort();
+ return (retval << 16) + readUnsignedShort();
+ }
+
+ @Override
+ public int position() {
+ return mPosition;
+ }
+
+ @Override
+ public void position(int position) {
+ mPosition = position;
+ }
+ }
+
+ /**
+ * Writes dictionary to file.
+ */
+ public static void writeDictionaryBinary(final OutputStream destination,
+ final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
+ final int version) {
+
+ final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
+
+ try {
+ BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version);
+ } catch (IOException e) {
+ Log.e(TAG, "IO exception while writing file: " + e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "Unsupported fomat: " + e);
+ }
+ }
+
+ /**
+ * Constructs a new FusionDictionary from BigramDictionaryInterface.
+ */
+ /* packages for test */ static FusionDictionary constructFusionDictionary(
+ final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
+
+ final FusionDictionary fusionDict = new FusionDictionary(new Node(),
+ new FusionDictionary.DictionaryOptions(
+ new HashMap<String,String>(), false, false));
+
+ for (final String word1 : bigrams.keySet()) {
+ final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
+ for (final String word2 : word1Bigrams.keySet()) {
+ final int freq = dict.getFrequency(word1, word2);
+
+ if (DEBUG) {
+ if (word1 == null) {
+ Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
+ } else {
+ Log.d(TAG, "add bigram: " + word1
+ + "," + word2 + "," + Integer.toString(freq));
+ }
+ }
+
+ if (word1 == null) { // unigram
+ fusionDict.add(word2, freq, null, false /* isNotAWord */);
+ } else { // bigram
+ fusionDict.setBigram(word1, word2, freq);
+ }
+ bigrams.updateBigram(word1, word2, (byte)freq);
+ }
+ }
+
+ return fusionDict;
+ }
+
+ /**
+ * Reads dictionary from file.
+ */
+ public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer,
+ final OnAddWordListener dict) {
+ final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
+ final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
+ final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
+
+ try {
+ BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
+ bigrams);
+ addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
+ } catch (IOException e) {
+ Log.e(TAG, "IO exception while reading file: " + e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "Unsupported format: " + e);
+ }
+ }
+
+ /**
+ * Adds all unigrams and bigrams in maps to OnAddWordListener.
+ */
+ /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams,
+ final Map<Integer, Integer> frequencies,
+ final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) {
+
+ for (Map.Entry<Integer, String> entry : unigrams.entrySet()) {
+ final String word1 = entry.getValue();
+ final int unigramFrequency = frequencies.get(entry.getKey());
+ to.setUnigram(word1, null, unigramFrequency);
+
+ final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
+
+ if (attrList != null) {
+ for (final PendingAttribute attr : attrList) {
+ to.setBigram(word1, unigrams.get(attr.mAddress),
+ BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency,
+ attr.mFrequency));
+ }
+ }
+ }
+
+ }
+} \ No newline at end of file
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index d4f7cab5c..abc39d923 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -34,6 +34,7 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.Stack;
import java.util.TreeMap;
/**
@@ -55,6 +56,8 @@ public class BinaryDictInputOutput {
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
+ * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
+ * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -153,6 +156,8 @@ public class BinaryDictInputOutput {
private static final int FLAG_IS_TERMINAL = 0x10;
private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
private static final int FLAG_HAS_BIGRAMS = 0x04;
+ private static final int FLAG_IS_NOT_A_WORD = 0x02;
+ private static final int FLAG_IS_BLACKLISTED = 0x01;
private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -188,7 +193,7 @@ public class BinaryDictInputOutput {
// suspicion that a bug might be causing an infinite loop.
private static final int MAX_PASSES = 24;
- private interface FusionDictionaryBufferInterface {
+ public interface FusionDictionaryBufferInterface {
public int readUnsignedByte();
public int readUnsignedShort();
public int readUnsignedInt24();
@@ -197,20 +202,21 @@ public class BinaryDictInputOutput {
public void position(int newPosition);
}
- private static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
- private ByteBuffer buffer;
- ByteBufferWrapper(final ByteBuffer buffer) {
- this.buffer = buffer;
+ public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
+ private ByteBuffer mBuffer;
+
+ public ByteBufferWrapper(final ByteBuffer buffer) {
+ mBuffer = buffer;
}
@Override
public int readUnsignedByte() {
- return ((int)buffer.get()) & 0xFF;
+ return ((int)mBuffer.get()) & 0xFF;
}
@Override
public int readUnsignedShort() {
- return ((int)buffer.getShort()) & 0xFFFF;
+ return ((int)mBuffer.getShort()) & 0xFFFF;
}
@Override
@@ -221,18 +227,17 @@ public class BinaryDictInputOutput {
@Override
public int readInt() {
- return buffer.getInt();
+ return mBuffer.getInt();
}
@Override
public int position() {
- return buffer.position();
+ return mBuffer.position();
}
@Override
public void position(int newPos) {
- buffer.position(newPos);
- return;
+ mBuffer.position(newPos);
}
}
@@ -778,6 +783,12 @@ public class BinaryDictInputOutput {
}
flags |= FLAG_HAS_BIGRAMS;
}
+ if (group.mIsNotAWord) {
+ flags |= FLAG_IS_NOT_A_WORD;
+ }
+ if (group.mIsBlacklistEntry) {
+ flags |= FLAG_IS_BLACKLISTED;
+ }
return flags;
}
@@ -1352,12 +1363,14 @@ public class BinaryDictInputOutput {
buffer.position(currentPosition);
}
nodeContents.add(
- new CharGroup(info.mCharacters, shortcutTargets,
- bigrams, info.mFrequency, children));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+ 0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
+ 0 != (info.mFlags & FLAG_IS_BLACKLISTED), children));
} else {
nodeContents.add(
- new CharGroup(info.mCharacters, shortcutTargets,
- bigrams, info.mFrequency));
+ new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+ 0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
+ 0 != (info.mFlags & FLAG_IS_BLACKLISTED)));
}
groupOffset = info.mEndAddress;
}
@@ -1367,6 +1380,105 @@ public class BinaryDictInputOutput {
return node;
}
+ // TODO: move these methods (readUnigramsAndBigramsBinary(|Inner)) and an inner class (Position)
+ // out of this class.
+ private static class Position {
+ public static final int NOT_READ_GROUPCOUNT = -1;
+
+ public int mAddress;
+ public int mNumOfCharGroup;
+ public int mPosition;
+ public int mLength;
+
+ public Position(int address, int length) {
+ mAddress = address;
+ mLength = length;
+ mNumOfCharGroup = NOT_READ_GROUPCOUNT;
+ }
+ }
+
+ /**
+ * Tours all node without recursive call.
+ */
+ private static void readUnigramsAndBigramsBinaryInner(
+ final FusionDictionaryBufferInterface buffer, final int headerSize,
+ final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
+ final Map<Integer, ArrayList<PendingAttribute>> bigrams) {
+ int[] pushedChars = new int[MAX_WORD_LENGTH + 1];
+
+ Stack<Position> stack = new Stack<Position>();
+ int index = 0;
+
+ Position initPos = new Position(headerSize, 0);
+ stack.push(initPos);
+
+ while (!stack.empty()) {
+ Position p = stack.peek();
+
+ if (DBG) {
+ MakedictLog.d("read: address=" + p.mAddress + ", numOfCharGroup=" +
+ p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
+ }
+
+ if (buffer.position() != p.mAddress) buffer.position(p.mAddress);
+ if (index != p.mLength) index = p.mLength;
+
+ if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
+ p.mNumOfCharGroup = readCharGroupCount(buffer);
+ p.mAddress += getGroupCountSize(p.mNumOfCharGroup);
+ p.mPosition = 0;
+ }
+
+ CharGroupInfo info = readCharGroup(buffer, p.mAddress - headerSize);
+ for (int i = 0; i < info.mCharacters.length; ++i) {
+ pushedChars[index++] = info.mCharacters[i];
+ }
+ p.mPosition++;
+
+ if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word
+ words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
+ frequencies.put(info.mOriginalAddress, info.mFrequency);
+ if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
+ }
+
+ if (p.mPosition == p.mNumOfCharGroup) {
+ stack.pop();
+ } else {
+ // the node has more groups.
+ p.mAddress = buffer.position();
+ }
+
+ if (hasChildrenAddress(info.mChildrenAddress)) {
+ Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
+ stack.push(childrenPos);
+ }
+ }
+ }
+
+ /**
+ * Reads unigrams and bigrams from the binary file.
+ * Doesn't make the memory representation of the dictionary.
+ *
+ * @param buffer the buffer to read.
+ * @param words the map to store the address as a key and the word as a value.
+ * @param frequencies the map to store the address as a key and the frequency as a value.
+ * @param bigrams the map to store the address as a key and the list of address as a value.
+ * @throws IOException
+ * @throws UnsupportedFormatException
+ */
+ public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer,
+ final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
+ final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
+ UnsupportedFormatException {
+ // Read header
+ final int version = checkFormatVersion(buffer);
+ final int optionsFlags = buffer.readUnsignedShort();
+ final HashMap<String, String> options = new HashMap<String, String>();
+ final int headerSize = readHeader(buffer, options, version);
+
+ readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams);
+ }
+
/**
* Helper function to get the binary format version from the header.
* @throws IOException
@@ -1402,10 +1514,8 @@ public class BinaryDictInputOutput {
* @throws UnsupportedFormatException
*/
private static int readHeader(final FusionDictionaryBufferInterface buffer,
- final HashMap<String, String> options,
- final int version)
+ final HashMap<String, String> options, final int version)
throws IOException, UnsupportedFormatException {
-
final int headerSize;
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
headerSize = buffer.position();
@@ -1418,7 +1528,6 @@ public class BinaryDictInputOutput {
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
-
return headerSize;
}
@@ -1456,7 +1565,6 @@ public class BinaryDictInputOutput {
public static FusionDictionary readDictionaryBinary(
final FusionDictionaryBufferInterface buffer, final FusionDictionary dict)
throws IOException, UnsupportedFormatException {
-
// clear cache
wordCache.clear();
@@ -1478,7 +1586,11 @@ public class BinaryDictInputOutput {
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
if (null != dict) {
for (final Word w : dict) {
- newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets);
+ if (w.mIsBlacklistEntry) {
+ newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
+ } else {
+ newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
+ }
}
for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 7c15ba54d..f1abea9ec 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren;
+ boolean mIsNotAWord; // Only a shortcut
+ boolean mIsBlacklistEntry;
// The two following members to help with binary generation
int mCachedSize;
int mCachedAddress;
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams, final int frequency) {
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
+ final ArrayList<WeightedString> bigrams, final int frequency,
+ final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = children;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
public void addChild(CharGroup n) {
@@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
* the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
* updated if they are higher than the existing ones.
*/
- public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
- ArrayList<WeightedString> bigrams) {
+ public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
if (frequency > mFrequency) {
mFrequency = frequency;
}
@@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
}
}
}
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
}
@@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets a list of shortcut targets for this word, or null.
+ * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
*/
public void add(final String word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets) {
- add(getCodePoints(word), frequency, shortcutTargets);
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+ add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
+ false /* isBlacklistEntry */);
+ }
+
+ /**
+ * Helper method to add a blacklist entry as a string.
+ *
+ * @param word the word to add as a blacklist entry.
+ * @param shortcutTargets a list of shortcut targets for this word, or null.
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+ */
+ public void addBlacklistEntry(final String word,
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+ add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
}
/**
@@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
if (charGroup != null) {
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
if (charGroup2 == null) {
- add(getCodePoints(word2), 0, null);
+ add(getCodePoints(word2), 0, null, false /* isNotAWord */,
+ false /* isBlacklistEntry */);
}
charGroup.addBigram(word2, frequency);
} else {
@@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
* @param word the word, as an int array.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+ * @param isBlacklistEntry true if this is a blacklisted word, false otherwise
*/
private void add(final int[] word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets) {
+ final ArrayList<WeightedString> shortcutTargets,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot;
int charIndex = 0;
@@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNode.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNode);
} else {
@@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
// The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal,
// make it one by filling in its frequency and other attributes
- currentGroup.update(frequency, shortcutTargets, null);
+ currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
+ isBlacklistEntry);
} else {
// The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency, isNotAWord,
+ isBlacklistEntry);
currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode);
}
@@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the
// new shortcuts to the existing shortcut list if it already exists.
- currentGroup.update(frequency, shortcutTargets, null);
+ currentGroup.update(frequency, shortcutTargets, null,
+ currentGroup.mIsNotAWord && isNotAWord,
+ currentGroup.mIsBlacklistEntry || isBlacklistEntry);
} else {
// Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails.
@@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets,
- currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
+ currentGroup.mBigrams, currentGroup.mFrequency,
+ currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
+ currentGroup.mChildren);
newChildren.mData.add(newOldWord);
final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- shortcutTargets, null /* bigrams */, frequency, newChildren);
+ shortcutTargets, null /* bigrams */, frequency,
+ isNotAWord, isBlacklistEntry, newChildren);
} else {
newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
- null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
+ null /* shortcutTargets */, null /* bigrams */, -1,
+ false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length),
- shortcutTargets, null /* bigrams */, frequency);
+ shortcutTargets, null /* bigrams */, frequency,
+ isNotAWord, isBlacklistEntry);
final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
@@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
private static int findInsertionIndex(final Node node, int character) {
final ArrayList<CharGroup> data = node.mData;
final CharGroup reference = new CharGroup(new int[] { character },
- null /* shortcutTargets */, null /* bigrams */, 0);
+ null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
+ false /* isBlacklistEntry */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1;
}
@@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
}
if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
- currentGroup.mShortcutTargets, currentGroup.mBigrams);
+ currentGroup.mShortcutTargets, currentGroup.mBigrams,
+ currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
} else {
mPositions.removeLast();
currentPos = mPositions.getLast();
diff --git a/java/src/com/android/inputmethod/latin/makedict/Word.java b/java/src/com/android/inputmethod/latin/makedict/Word.java
index 65fc72c40..4683ef154 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Word.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Word.java
@@ -31,16 +31,21 @@ public class Word implements Comparable<Word> {
public final int mFrequency;
public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams;
+ public final boolean mIsNotAWord;
+ public final boolean mIsBlacklistEntry;
private int mHashCode = 0;
public Word(final String word, final int frequency,
final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams) {
+ final ArrayList<WeightedString> bigrams,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
mWord = word;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
}
private static int computeHashCode(Word word) {
@@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
word.mWord,
word.mFrequency,
word.mShortcutTargets.hashCode(),
- word.mBigrams.hashCode()
+ word.mBigrams.hashCode(),
+ word.mIsNotAWord,
+ word.mIsBlacklistEntry
});
}
@@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
Word w = (Word)o;
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets)
- && mBigrams.equals(w.mBigrams);
+ && mBigrams.equals(w.mBigrams)
+ && mIsNotAWord == w.mIsNotAWord
+ && mIsBlacklistEntry == w.mIsBlacklistEntry;
}
@Override