diff options
Diffstat (limited to 'java/src')
20 files changed, 391 insertions, 133 deletions
diff --git a/java/src/com/android/inputmethod/compat/SuggestionSpanUtils.java b/java/src/com/android/inputmethod/compat/SuggestionSpanUtils.java index 25afef1e6..a0f48d24c 100644 --- a/java/src/com/android/inputmethod/compat/SuggestionSpanUtils.java +++ b/java/src/com/android/inputmethod/compat/SuggestionSpanUtils.java @@ -108,6 +108,7 @@ public class SuggestionSpanUtils { if (!dictionaryAvailable || TextUtils.isEmpty(pickedWord) || CONSTRUCTOR_SuggestionSpan == null || suggestedWords == null || suggestedWords.size() == 0 + || suggestedWords.mIsPrediction || suggestedWords.mIsPunctuationSuggestions || OBJ_SUGGESTIONS_MAX_SIZE == null) { return pickedWord; } diff --git a/java/src/com/android/inputmethod/keyboard/Keyboard.java b/java/src/com/android/inputmethod/keyboard/Keyboard.java index 28855f561..bd3b0e114 100644 --- a/java/src/com/android/inputmethod/keyboard/Keyboard.java +++ b/java/src/com/android/inputmethod/keyboard/Keyboard.java @@ -511,7 +511,6 @@ public class Keyboard { // keyWidth enum constants private static final int KEYWIDTH_NOT_ENUM = 0; private static final int KEYWIDTH_FILL_RIGHT = -1; - private static final int KEYWIDTH_FILL_BOTH = -2; private final Params mParams; /** Default width of a key in this row. */ @@ -576,11 +575,6 @@ public class Keyboard { public float getKeyX(TypedArray keyAttr) { final int widthType = Builder.getEnumValue(keyAttr, R.styleable.Keyboard_Key_keyWidth, KEYWIDTH_NOT_ENUM); - if (widthType == KEYWIDTH_FILL_BOTH) { - // If keyWidth is fillBoth, the key width should start right after the nearest - // key on the left hand side. - return mCurrentX; - } final int keyboardRightEdge = mParams.mOccupiedWidth - mParams.mHorizontalEdgesPadding; @@ -610,14 +604,10 @@ public class Keyboard { R.styleable.Keyboard_Key_keyWidth, KEYWIDTH_NOT_ENUM); switch (widthType) { case KEYWIDTH_FILL_RIGHT: - case KEYWIDTH_FILL_BOTH: final int keyboardRightEdge = mParams.mOccupiedWidth - mParams.mHorizontalEdgesPadding; // If keyWidth is fillRight, the actual key width will be determined to fill // out the area up to the right edge of the keyboard. - // If keyWidth is fillBoth, the actual key width will be determined to fill out - // the area between the nearest key on the left hand side and the right edge of - // the keyboard. return keyboardRightEdge - keyXPos; default: // KEYWIDTH_NOT_ENUM return Builder.getDimensionOrFraction(keyAttr, diff --git a/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java b/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java index be807ab0c..0bde2c011 100644 --- a/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java +++ b/java/src/com/android/inputmethod/latin/AdditionalSubtypeSettings.java @@ -131,6 +131,7 @@ public class AdditionalSubtypeSettings extends PreferenceFragment { private interface SubtypeDialogProxy { public void onRemovePressed(SubtypePreference subtypePref); + public void onAddPressed(SubtypePreference subtypePref); public SubtypeLocaleAdapter getSubtypeLocaleAdapter(); public KeyboardLayoutSetAdapter getKeyboardLayoutSetAdapter(); } @@ -241,6 +242,7 @@ public class AdditionalSubtypeSettings extends PreferenceFragment { super.onClick(dialog, which); switch (which) { case DialogInterface.BUTTON_POSITIVE: + final boolean addPressed = isIncomplete(); final SubtypeLocaleItem locale = (SubtypeLocaleItem) mSubtypeLocaleSpinner.getSelectedItem(); final KeyboardLayoutSetItem layout = @@ -249,6 +251,9 @@ public class AdditionalSubtypeSettings extends PreferenceFragment { locale.first, layout.first, ASCII_CAPABLE); setSubtype(subtype); notifyChanged(); + if (addPressed) { + mProxy.onAddPressed(this); + } break; case DialogInterface.BUTTON_NEUTRAL: // Nothing to do @@ -391,6 +396,11 @@ public class AdditionalSubtypeSettings extends PreferenceFragment { } @Override + public void onAddPressed(SubtypePreference subtypePref) { + mIsAddingNewSubtype = false; + } + + @Override public SubtypeLocaleAdapter getSubtypeLocaleAdapter() { return mSubtypeLocaleAdapter; } diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index a644ec0d9..cc20f4294 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -17,6 +17,7 @@ package com.android.inputmethod.latin; import android.content.Context; +import android.text.TextUtils; import com.android.inputmethod.keyboard.ProximityInfo; @@ -84,6 +85,7 @@ public class BinaryDictionary extends Dictionary { int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); private native void closeNative(long dict); private native boolean isValidWordNative(long dict, int[] word, int wordLength); + private native boolean isValidBigramNative(long dict, int[] word1, int[] word2); private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, boolean useFullEditDistance, char[] outputChars, int[] scores); @@ -204,6 +206,15 @@ public class BinaryDictionary extends Dictionary { return isValidWordNative(mNativeDict, chars, chars.length); } + // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni + // calls when checking for changes in an entire dictionary. + public boolean isValidBigram(CharSequence word1, CharSequence word2) { + if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; + int[] chars1 = StringUtils.toCodePointArray(word1.toString()); + int[] chars2 = StringUtils.toCodePointArray(word2.toString()); + return isValidBigramNative(mNativeDict, chars1, chars2); + } + @Override public synchronized void close() { closeInternal(); diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index 65f97e987..22787c218 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -18,6 +18,7 @@ import android.content.ContentResolver; import android.content.Context; import android.database.ContentObserver; import android.database.Cursor; +import android.os.SystemClock; import android.provider.BaseColumns; import android.provider.ContactsContract.Contacts; import android.text.TextUtils; @@ -30,18 +31,27 @@ import java.util.Locale; public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,}; + private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID}; private static final String TAG = ContactsBinaryDictionary.class.getSimpleName(); private static final String NAME = "contacts"; + private static boolean DEBUG = false; + /** * Frequency for contacts information into the dictionary */ private static final int FREQUENCY_FOR_CONTACTS = 40; private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90; + /** The maximum number of contacts that this dictionary supports. */ + private static final int MAX_CONTACT_COUNT = 10000; + private static final int INDEX_NAME = 1; + /** The number of contacts in the most recent dictionary rebuild. */ + static private int sContactCountAtLastRebuild = 0; + private ContentObserver mObserver; /** @@ -98,6 +108,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { if (cursor != null) { try { if (cursor.moveToFirst()) { + sContactCountAtLastRebuild = getContactCount(); addWords(cursor); } } finally { @@ -125,15 +136,28 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { private void addWords(Cursor cursor) { clearFusionDictionary(); - while (!cursor.isAfterLast()) { + int count = 0; + while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) { String name = cursor.getString(INDEX_NAME); - if (name != null && -1 == name.indexOf('@')) { + if (isValidName(name)) { addName(name); + ++count; } cursor.moveToNext(); } } + private int getContactCount() { + // TODO: consider switching to a rawQuery("select count(*)...") on the database if + // performance is a bottleneck. + final Cursor cursor = mContext.getContentResolver().query( + Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null); + if (cursor != null) { + return cursor.getCount(); + } + return 0; + } + /** * Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their * bigrams depending on locale. @@ -144,16 +168,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { // TODO: Better tokenization for non-Latin writing systems for (int i = 0; i < len; i++) { if (Character.isLetter(name.codePointAt(i))) { - int j; - for (j = i + 1; j < len; j++) { - final int codePoint = name.codePointAt(j); - if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE - || Character.isLetter(codePoint))) { - break; - } - } - String word = name.substring(i, j); - i = j - 1; + int end = getWordEndPosition(name, len, i); + String word = name.substring(i, end); + i = end - 1; // Don't add single letter words, possibly confuses // capitalization of i. final int wordLen = word.codePointCount(0, word.length()); @@ -169,4 +186,100 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { } } } + + /** + * Returns the index of the last letter in the word, starting from position startIndex. + */ + private static int getWordEndPosition(String string, int len, int startIndex) { + int end; + int cp = 0; + for (end = startIndex + 1; end < len; end += Character.charCount(cp)) { + cp = string.codePointAt(end); + if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE + || Character.isLetter(cp))) { + break; + } + } + return end; + } + + @Override + protected boolean hasContentChanged() { + final long startTime = SystemClock.uptimeMillis(); + final int contactCount = getContactCount(); + if (contactCount > MAX_CONTACT_COUNT) { + // If there are too many contacts then return false. In this rare case it is impossible + // to include all of them anyways and the cost of rebuilding the dictionary is too high. + // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts? + return false; + } + if (contactCount != sContactCountAtLastRebuild) { + return true; + } + // Check all contacts since it's not possible to find out which names have changed. + // This is needed because it's possible to receive extraneous onChange events even when no + // name has changed. + Cursor cursor = mContext.getContentResolver().query( + Contacts.CONTENT_URI, PROJECTION, null, null, null); + if (cursor != null) { + try { + if (cursor.moveToFirst()) { + while (!cursor.isAfterLast()) { + String name = cursor.getString(INDEX_NAME); + if (isValidName(name) && !isNameInDictionary(name)) { + if (DEBUG) { + Log.d(TAG, "Contact name missing: " + name + " (runtime = " + + (SystemClock.uptimeMillis() - startTime) + " ms)"); + } + return true; + } + cursor.moveToNext(); + } + } + } finally { + cursor.close(); + } + } + if (DEBUG) { + Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime) + + " ms)"); + } + return false; + } + + private static boolean isValidName(String name) { + if (name != null && -1 == name.indexOf('@')) { + return true; + } + return false; + } + + /** + * Checks if the words in a name are in the current binary dictionary. + */ + private boolean isNameInDictionary(String name) { + int len = name.codePointCount(0, name.length()); + String prevWord = null; + for (int i = 0; i < len; i++) { + if (Character.isLetter(name.codePointAt(i))) { + int end = getWordEndPosition(name, len, i); + String word = name.substring(i, end); + i = end - 1; + final int wordLen = word.codePointCount(0, word.length()); + if (wordLen < MAX_WORD_LENGTH && wordLen > 1) { + if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) { + if (!super.isValidBigramLocked(prevWord, word)) { + return false; + } + } else { + if (!super.isValidWordLocked(word)) { + return false; + } + } + prevWord = word; + } + } + } + return true; + } } diff --git a/java/src/com/android/inputmethod/latin/ContactsDictionary.java b/java/src/com/android/inputmethod/latin/ContactsDictionary.java index 8a7dfb839..83bc9046b 100644 --- a/java/src/com/android/inputmethod/latin/ContactsDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsDictionary.java @@ -149,7 +149,8 @@ public class ContactsDictionary extends ExpandableDictionary { // capitalization of i. final int wordLen = word.length(); if (wordLen < maxWordLength && wordLen > 1) { - super.addWord(word, FREQUENCY_FOR_CONTACTS); + super.addWord(word, null /* shortcut */, + FREQUENCY_FOR_CONTACTS); if (!TextUtils.isEmpty(prevWord)) { super.setBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM); diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index a405aa409..1ec678f7f 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -24,11 +24,6 @@ import com.android.inputmethod.keyboard.ProximityInfo; */ public abstract class Dictionary { /** - * Whether or not to replicate the typed word in the suggested list, even if it's valid. - */ - protected static final boolean INCLUDE_TYPED_WORD_IF_VALID = false; - - /** * The weight to give to a word if it's length is the same as the number of typed characters. */ protected static final int FULL_WORD_SCORE_MULTIPLIER = 2; diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 3d89226c0..22d8f24f1 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -96,6 +96,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { protected abstract void loadDictionaryAsync(); /** + * Indicates that the source dictionary content has changed and a rebuild of the binary file is + * required. If it returns false, the next reload will only read the current binary dictionary + * from file. Note that the shared binary dictionary is locked when this is called. + */ + protected abstract boolean hasContentChanged(); + + /** * Gets the shared dictionary controller for the given filename. */ private static synchronized DictionaryController getSharedDictionaryController( @@ -148,8 +155,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * the native side. */ public void clearFusionDictionary() { - mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( - new HashMap<String, String>(), false, false)); + mFusionDictionary = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, + false)); } /** @@ -224,9 +232,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { protected boolean isValidWordInner(final CharSequence word) { if (mLocalDictionaryController.tryLock()) { try { - if (mBinaryDictionary != null) { - return mBinaryDictionary.isValidWord(word); - } + return isValidWordLocked(word); } finally { mLocalDictionaryController.unlock(); } @@ -234,6 +240,32 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { return false; } + protected boolean isValidWordLocked(final CharSequence word) { + if (mBinaryDictionary == null) return false; + return mBinaryDictionary.isValidWord(word); + } + + protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) { + if (mBinaryDictionary == null) return false; + return mBinaryDictionary.isValidBigram(word1, word2); + } + + protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) { + if (mLocalDictionaryController.tryLock()) { + try { + return isValidBigramLocked(word1, word2); + } finally { + mLocalDictionaryController.unlock(); + } + } + return false; + } + + protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) { + if (mBinaryDictionary == null) return false; + return mBinaryDictionary.isValidBigram(word1, word2); + } + /** * Load the current binary dictionary from internal storage in a background thread. If no binary * dictionary exists, this method will generate one. @@ -315,12 +347,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } /** - * Sets whether or not the dictionary is out of date and requires a reload. + * Marks that the dictionary is out of date and requires a reload. + * + * @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild + * of the binary file is required. If not true, the next reload process will only read + * the current binary dictionary from file. */ - protected void setRequiresReload(final boolean reload) { - final long time = reload ? SystemClock.uptimeMillis() : 0; - mSharedDictionaryController.mLastUpdateRequestTime = time; + protected void setRequiresReload(final boolean requiresRebuild) { + final long time = SystemClock.uptimeMillis(); mLocalDictionaryController.mLastUpdateRequestTime = time; + mSharedDictionaryController.mLastUpdateRequestTime = time; if (DEBUG) { Log.d(TAG, "Reload request: request=" + time + " update=" + mSharedDictionaryController.mLastUpdateTime); @@ -351,21 +387,30 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) { // If the shared dictionary file does not exist or is out of date, the first // instance that acquires the lock will generate a new one. - mSharedDictionaryController.mLastUpdateTime = time; - mLocalDictionaryController.mLastUpdateTime = time; - generateBinaryDictionary(); - loadBinaryDictionary(); - } else if (mLocalDictionaryController.isOutOfDate()) { - // Otherwise, if only the local dictionary for this instance is out of date, load - // the shared dictionary from file. - mLocalDictionaryController.mLastUpdateTime = time; + if (hasContentChanged()) { + // If the source content has changed, rebuild the binary dictionary. + mSharedDictionaryController.mLastUpdateTime = time; + generateBinaryDictionary(); + loadBinaryDictionary(); + } else { + // If not, the reload request was unnecessary so revert LastUpdateRequestTime + // to LastUpdateTime. + mSharedDictionaryController.mLastUpdateRequestTime = + mSharedDictionaryController.mLastUpdateTime; + } + } else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime + < mSharedDictionaryController.mLastUpdateTime) { + // Otherwise, if the local dictionary is older than the shared dictionary, load the + // shared dictionary. loadBinaryDictionary(); } + mLocalDictionaryController.mLastUpdateTime = time; } finally { mSharedDictionaryController.unlock(); } } + // TODO: cache the file's existence so that we avoid doing a disk access each time. private boolean dictionaryFileExists() { final File file = new File(mContext.getFilesDir(), mFilename); return file.exists(); diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index fe21ebe87..7a740b3f1 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -22,6 +22,7 @@ import com.android.inputmethod.keyboard.KeyDetector; import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.ProximityInfo; +import java.util.ArrayList; import java.util.LinkedList; /** @@ -53,6 +54,8 @@ public class ExpandableDictionary extends Dictionary { boolean mTerminal; Node mParent; NodeArray mChildren; + ArrayList<char[]> mShortcutTargets; + boolean mShortcutOnly; LinkedList<NextWord> mNGrams; // Supports ngram } @@ -150,15 +153,15 @@ public class ExpandableDictionary extends Dictionary { return BinaryDictionary.MAX_WORD_LENGTH; } - public void addWord(String word, int frequency) { + public void addWord(final String word, final String shortcutTarget, final int frequency) { if (word.length() >= BinaryDictionary.MAX_WORD_LENGTH) { return; } - addWordRec(mRoots, word, 0, frequency, null); + addWordRec(mRoots, word, 0, shortcutTarget, frequency, null); } private void addWordRec(NodeArray children, final String word, final int depth, - final int frequency, Node parentNode) { + final String shortcutTarget, final int frequency, Node parentNode) { final int wordLength = word.length(); if (wordLength <= depth) return; final char c = word.charAt(depth); @@ -172,15 +175,25 @@ public class ExpandableDictionary extends Dictionary { break; } } + final boolean isShortcutOnly = (null != shortcutTarget); if (childNode == null) { childNode = new Node(); childNode.mCode = c; childNode.mParent = parentNode; + childNode.mShortcutOnly = isShortcutOnly; children.add(childNode); } if (wordLength == depth + 1) { // Terminate this word childNode.mTerminal = true; + if (isShortcutOnly) { + if (null == childNode.mShortcutTargets) { + childNode.mShortcutTargets = new ArrayList<char[]>(); + } + childNode.mShortcutTargets.add(shortcutTarget.toCharArray()); + } else { + childNode.mShortcutOnly = false; + } childNode.mFrequency = Math.max(frequency, childNode.mFrequency); if (childNode.mFrequency > 255) childNode.mFrequency = 255; return; @@ -188,7 +201,7 @@ public class ExpandableDictionary extends Dictionary { if (childNode.mChildren == null) { childNode.mChildren = new NodeArray(); } - addWordRec(childNode.mChildren, word, depth + 1, frequency, childNode); + addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode); } @Override @@ -239,7 +252,13 @@ public class ExpandableDictionary extends Dictionary { if (mRequiresReload) startDictionaryLoadingTaskLocked(); if (mUpdatingDictionary) return false; } - return getWordFrequency(word) > -1; + final Node node = searchNode(mRoots, word, 0, word.length()); + // If node is null, we didn't find the word, so it's not valid. + // If node.mShortcutOnly is true, then it exists as a shortcut but not as a word, + // so that means it's not a valid word. + // If node.mShortcutOnly is false, then it exists as a word (it may also exist as + // a shortcut, but this does not matter), so it's a valid word. + return (node == null) ? false : !node.mShortcutOnly; } /** @@ -247,7 +266,7 @@ public class ExpandableDictionary extends Dictionary { */ protected int getWordFrequency(CharSequence word) { // Case-sensitive search - Node node = searchNode(mRoots, word, 0, word.length()); + final Node node = searchNode(mRoots, word, 0, word.length()); return (node == null) ? -1 : node.mFrequency; } @@ -262,6 +281,35 @@ public class ExpandableDictionary extends Dictionary { } /** + * Helper method to add a word and its shortcuts. + * + * @param node the terminal node + * @param word the word to insert, as an array of code points + * @param depth the depth of the node in the tree + * @param finalFreq the frequency for this word + * @return whether there is still space for more words. {@see Dictionary.WordCallback#addWord}. + */ + private boolean addWordAndShortcutsFromNode(final Node node, final char[] word, final int depth, + final int finalFreq, final WordCallback callback) { + if (finalFreq > 0 && !node.mShortcutOnly) { + if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, Dictionary.UNIGRAM)) { + return false; + } + } + if (null != node.mShortcutTargets) { + final int length = node.mShortcutTargets.size(); + for (int shortcutIndex = 0; shortcutIndex < length; ++shortcutIndex) { + final char[] shortcut = node.mShortcutTargets.get(shortcutIndex); + if (!callback.addWord(shortcut, 0, shortcut.length, finalFreq, mDicTypeId, + Dictionary.UNIGRAM)) { + return false; + } + } + } + return true; + } + + /** * Recursively traverse the tree for words that match the input. Input consists of * a list of arrays. Each item in the list is one input character position. An input * character is actually an array of multiple possible candidates. This function is not @@ -313,8 +361,8 @@ public class ExpandableDictionary extends Dictionary { } else { finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength); } - if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, - Dictionary.UNIGRAM)) { + if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, callback)) { + // No space left in the queue, bail out return; } } @@ -344,18 +392,18 @@ public class ExpandableDictionary extends Dictionary { if (codeSize == inputIndex + 1) { if (terminal) { - if (INCLUDE_TYPED_WORD_IF_VALID - || !same(word, depth + 1, codes.getTypedWord())) { - final int finalFreq; - if (skipPos < 0) { - finalFreq = freq * snr * addedAttenuation - * FULL_WORD_SCORE_MULTIPLIER; - } else { - finalFreq = computeSkippedWordFinalFreq(freq, - snr * addedAttenuation, mInputLength); - } - callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, - Dictionary.UNIGRAM); + final int finalFreq; + if (skipPos < 0) { + finalFreq = freq * snr * addedAttenuation + * FULL_WORD_SCORE_MULTIPLIER; + } else { + finalFreq = computeSkippedWordFinalFreq(freq, + snr * addedAttenuation, mInputLength); + } + if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, + callback)) { + // No space left in the queue, bail out + return; } } if (children != null) { diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 011b512e8..b59e939b7 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -877,7 +877,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen false /* hasAutoCorrectionCandidate */, false /* allowsToBeAutoCorrected */, false /* isPunctuationSuggestions */, - false /* isObsoleteSuggestions */); + false /* isObsoleteSuggestions */, + false /* isPrediction */); // When in fullscreen mode, show completions generated by the application final boolean isAutoCorrection = false; setSuggestions(suggestedWords, isAutoCorrection); @@ -1120,7 +1121,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen @Override public boolean addWordToDictionary(String word) { - mUserDictionary.addWord(word, 128); + mUserDictionary.addWordToUserDictionary(word, 128); // Suggestion strip should be updated after the operation of adding word to the // user dictionary mHandler.postUpdateSuggestions(); @@ -1772,7 +1773,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen false /* hasAutoCorrectionCandidate */, false /* allowsToBeAutoCorrected */, false /* isPunctuationSuggestions */, - true /* isObsoleteSuggestions */); + true /* isObsoleteSuggestions */, + false /* isPrediction */); showSuggestions(obsoleteSuggestedWords, typedWord); } } diff --git a/java/src/com/android/inputmethod/latin/SettingsValues.java b/java/src/com/android/inputmethod/latin/SettingsValues.java index 5f9e1bc76..55b896f5a 100644 --- a/java/src/com/android/inputmethod/latin/SettingsValues.java +++ b/java/src/com/android/inputmethod/latin/SettingsValues.java @@ -166,7 +166,8 @@ public class SettingsValues { false /* hasAutoCorrectionCandidate */, false /* allowsToBeAutoCorrected */, true /* isPunctuationSuggestions */, - false /* isObsoleteSuggestions */); + false /* isObsoleteSuggestions */, + false /* isPrediction */); } private static String createWordSeparators(final String weakSpaceStrippers, diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 112bde6a3..845df81f6 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -253,13 +253,12 @@ public class Suggest implements Dictionary.WordCallback { SuggestedWordInfo.removeDups(mSuggestions); return new SuggestedWords(mSuggestions, - // TODO: Just assuming the suggestions that came from the bigram prediction are - // valid now. Need to assign a correct value for typedWordValid. - true /* typedWordValid */, + false /* typedWordValid */, false /* hasAutoCorrectionCandidate */, false /* allowsToBeAutoCorrected */, false /* isPunctuationSuggestions */, - false /* isObsoleteSuggestions */); + false /* isObsoleteSuggestions */, + true /* isPrediction */); } // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder @@ -396,7 +395,8 @@ public class Suggest implements Dictionary.WordCallback { autoCorrectionAvailable /* hasAutoCorrectionCandidate */, allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, false /* isPunctuationSuggestions */, - false /* isObsoleteSuggestions */); + false /* isObsoleteSuggestions */, + false /* isPrediction */); } /** diff --git a/java/src/com/android/inputmethod/latin/SuggestedWords.java b/java/src/com/android/inputmethod/latin/SuggestedWords.java index 91110d888..497fd3bfa 100644 --- a/java/src/com/android/inputmethod/latin/SuggestedWords.java +++ b/java/src/com/android/inputmethod/latin/SuggestedWords.java @@ -25,13 +25,14 @@ import java.util.HashSet; public class SuggestedWords { public static final SuggestedWords EMPTY = new SuggestedWords( - new ArrayList<SuggestedWordInfo>(0), false, false, false, false, false); + new ArrayList<SuggestedWordInfo>(0), false, false, false, false, false, false); public final boolean mTypedWordValid; public final boolean mHasAutoCorrectionCandidate; public final boolean mIsPunctuationSuggestions; public final boolean mAllowsToBeAutoCorrected; public final boolean mIsObsoleteSuggestions; + public final boolean mIsPrediction; private final ArrayList<SuggestedWordInfo> mSuggestedWordInfoList; public SuggestedWords(final ArrayList<SuggestedWordInfo> suggestedWordInfoList, @@ -39,13 +40,15 @@ public class SuggestedWords { final boolean hasAutoCorrectionCandidate, final boolean allowsToBeAutoCorrected, final boolean isPunctuationSuggestions, - final boolean isObsoleteSuggestions) { + final boolean isObsoleteSuggestions, + final boolean isPrediction) { mSuggestedWordInfoList = suggestedWordInfoList; mTypedWordValid = typedWordValid; mHasAutoCorrectionCandidate = hasAutoCorrectionCandidate; mAllowsToBeAutoCorrected = allowsToBeAutoCorrected; mIsPunctuationSuggestions = isPunctuationSuggestions; mIsObsoleteSuggestions = isObsoleteSuggestions; + mIsPrediction = isPrediction; } public int size() { diff --git a/java/src/com/android/inputmethod/latin/SynchronouslyLoadedUserDictionary.java b/java/src/com/android/inputmethod/latin/SynchronouslyLoadedUserDictionary.java index 50e8b249e..b78be89b8 100644 --- a/java/src/com/android/inputmethod/latin/SynchronouslyLoadedUserDictionary.java +++ b/java/src/com/android/inputmethod/latin/SynchronouslyLoadedUserDictionary.java @@ -42,6 +42,6 @@ public class SynchronouslyLoadedUserDictionary extends UserDictionary { @Override public synchronized boolean isValidWord(CharSequence word) { blockingReloadDictionaryIfRequired(); - return getWordFrequency(word) > -1; + return super.isValidWord(word); } } diff --git a/java/src/com/android/inputmethod/latin/UserDictionary.java b/java/src/com/android/inputmethod/latin/UserDictionary.java index 6beeaace9..218bac72a 100644 --- a/java/src/com/android/inputmethod/latin/UserDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserDictionary.java @@ -31,8 +31,11 @@ import java.util.Arrays; public class UserDictionary extends ExpandableDictionary { + // TODO: use Words.SHORTCUT when it's public in the SDK + final static String SHORTCUT = "shortcut"; private static final String[] PROJECTION_QUERY = { Words.WORD, + SHORTCUT, Words.FREQUENCY, }; @@ -149,15 +152,18 @@ public class UserDictionary extends ExpandableDictionary { } /** - * Adds a word to the dictionary and makes it persistent. + * Adds a word to the user dictionary and makes it persistent. + * + * This will call upon the system interface to do the actual work through the intent + * readied by the system to this effect. + * * @param word the word to add. If the word is capitalized, then the dictionary will * recognize it as a capitalized word when searched. * @param frequency the frequency of occurrence of the word. A frequency of 255 is considered * the highest. * @TODO use a higher or float range for frequency */ - @Override - public synchronized void addWord(final String word, final int frequency) { + public synchronized void addWordToUserDictionary(final String word, final int frequency) { // Force load the dictionary here synchronously if (getRequiresReload()) loadDictionaryAsync(); // TODO: do something for the UI. With the following, any sufficiently long word will @@ -191,14 +197,19 @@ public class UserDictionary extends ExpandableDictionary { final int maxWordLength = getMaxWordLength(); if (cursor.moveToFirst()) { final int indexWord = cursor.getColumnIndex(Words.WORD); + final int indexShortcut = cursor.getColumnIndex(SHORTCUT); final int indexFrequency = cursor.getColumnIndex(Words.FREQUENCY); while (!cursor.isAfterLast()) { String word = cursor.getString(indexWord); + String shortcut = cursor.getString(indexShortcut); int frequency = cursor.getInt(indexFrequency); // Safeguard against adding really long words. Stack may overflow due // to recursion if (word.length() < maxWordLength) { - super.addWord(word, frequency); + super.addWord(word, null, frequency); + } + if (null != shortcut && shortcut.length() < maxWordLength) { + super.addWord(shortcut, word, frequency); } cursor.moveToNext(); } diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java index 9191aa953..e13602e50 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java @@ -176,7 +176,7 @@ public class UserHistoryDictionary extends ExpandableDictionary { * The second word may not be null (a NullPointerException would be thrown). */ public int addToUserHistory(final String word1, String word2) { - super.addWord(word2, FREQUENCY_FOR_TYPED); + super.addWord(word2, null /* shortcut */, FREQUENCY_FOR_TYPED); // Do not insert a word as a bigram of itself if (word2.equals(word1)) { return 0; @@ -246,7 +246,7 @@ public class UserHistoryDictionary extends ExpandableDictionary { // Safeguard against adding really long words. Stack may overflow due // to recursive lookup if (null == word1) { - super.addWord(word2, frequency); + super.addWord(word2, null /* shortcut */, frequency); } else if (word1.length() < BinaryDictionary.MAX_WORD_LENGTH && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) { super.setBigram(word1, word2, frequency); diff --git a/java/src/com/android/inputmethod/latin/WhitelistDictionary.java b/java/src/com/android/inputmethod/latin/WhitelistDictionary.java index bb3ba8651..a0de2f970 100644 --- a/java/src/com/android/inputmethod/latin/WhitelistDictionary.java +++ b/java/src/com/android/inputmethod/latin/WhitelistDictionary.java @@ -66,7 +66,7 @@ public class WhitelistDictionary extends ExpandableDictionary { if (before != null && after != null) { mWhitelistWords.put( before.toLowerCase(), new Pair<Integer, String>(score, after)); - addWord(after, score); + addWord(after, null /* shortcut */, score); } } } catch (NumberFormatException e) { diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 3c818cc56..563f8a99b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -40,6 +40,8 @@ import java.util.TreeMap; */ public class BinaryDictInputOutput { + final static boolean DBG = MakedictLog.DBG; + /* Node layout is as follows: * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE * 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS @@ -489,10 +491,17 @@ public class BinaryDictInputOutput { // Merging tails can only be done if there are no attributes. Searching for attributes // in LatinIME code depends on a total breadth-first ordering, which merging tails // breaks. If there are no attributes, it should be fine (and reduce the file size) - // to merge tails, and the following step would be necessary. - // If eventually the code runs on Android, searching through the whole array each time - // may be a performance concern. - list.remove(node); + // to merge tails, and removing the node from the list would be necessary. However, + // we don't merge tails because breaking the breadth-first ordering would result in + // extreme overhead at bigram lookup time (it would make the search function O(n) instead + // of the current O(log(n)), where n=number of nodes in the dictionary which is pretty + // high). + // If no nodes are ever merged, we can't have the same node twice in the list, hence + // searching for duplicates in unnecessary. It is also very performance consuming, + // since `list' is an ArrayList so it's an O(n) operation that runs on all nodes, making + // this simple list.remove operation O(n*n) overall. On Android this overhead is very + // high. + // For future reference, the code to remove duplicate is a simple : list.remove(node); list.add(node); final ArrayList<CharGroup> branches = node.mData; final int nodeSize = branches.size(); @@ -708,13 +717,13 @@ public class BinaryDictInputOutput { } } if (null != group.mShortcutTargets) { - if (0 == group.mShortcutTargets.size()) { + if (DBG && 0 == group.mShortcutTargets.size()) { throw new RuntimeException("0-sized shortcut list must be null"); } flags |= FLAG_HAS_SHORTCUT_TARGETS; } if (null != group.mBigrams) { - if (0 == group.mBigrams.size()) { + if (DBG && 0 == group.mBigrams.size()) { throw new RuntimeException("0-sized bigram list must be null"); } flags |= FLAG_HAS_BIGRAMS; @@ -756,14 +765,39 @@ public class BinaryDictInputOutput { bigramFrequency = unigramFrequency; } // We compute the difference between 255 (which means probability = 1) and the - // unigram score. We split this into discrete 16 steps, and this is the value - // we store into the 4 bits of the bigrams frequency. - final float bigramRatio = (float)(bigramFrequency - unigramFrequency) - / (MAX_TERMINAL_FREQUENCY - unigramFrequency); - // TODO: if the bigram freq is very close to the unigram frequency, we don't want - // to include the bigram in the binary dictionary at all. - final int discretizedFrequency = Math.round(bigramRatio * MAX_BIGRAM_FREQUENCY); - bigramFlags += discretizedFrequency & FLAG_ATTRIBUTE_FREQUENCY; + // unigram score. We split this into a number of discrete steps. + // Now, the steps are numbered 0~15; 0 represents an increase of 1 step while 15 + // represents an increase of 16 steps: a value of 15 will be interpreted as the median + // value of the 16th step. In all justice, if the bigram frequency is low enough to be + // rounded below the first step (which means it is less than half a step higher than the + // unigram frequency) then the unigram frequency itself is the best approximation of the + // bigram freq that we could possibly supply, hence we should *not* include this bigram + // in the file at all. + // until this is done, we'll write 0 and slightly overestimate this case. + // In other words, 0 means "between 0.5 step and 1.5 step", 1 means "between 1.5 step + // and 2.5 steps", and 15 means "between 15.5 steps and 16.5 steps". So we want to + // divide our range [unigramFreq..MAX_TERMINAL_FREQUENCY] in 16.5 steps to get the + // step size. Then we compute the start of the first step (the one where value 0 starts) + // by adding half-a-step to the unigramFrequency. From there, we compute the integer + // number of steps to the bigramFrequency. One last thing: we want our steps to include + // their lower bound and exclude their higher bound so we need to have the first step + // start at exactly 1 unit higher than floor(unigramFreq + half a step). + // Note : to reconstruct the score, the dictionary reader will need to divide + // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise, and add + // (discretizedFrequency + 0.5) times this value to get the median value of the step, + // which is the best approximation. This is how we get the most precise result with + // only four bits. + final double stepSize = + (double)(MAX_TERMINAL_FREQUENCY - unigramFrequency) / (1.5 + MAX_BIGRAM_FREQUENCY); + final double firstStepStart = 1 + unigramFrequency + (stepSize / 2.0); + final int discretizedFrequency = (int)((bigramFrequency - firstStepStart) / stepSize); + // If the bigram freq is less than half-a-step higher than the unigram freq, we get -1 + // here. The best approximation would be the unigram freq itself, so we should not + // include this bigram in the dictionary. For now, register as 0, and live with the + // small over-estimation that we get in this case. TODO: actually remove this bigram + // if discretizedFrequency < 0. + final int finalBigramFrequency = discretizedFrequency > 0 ? discretizedFrequency : 0; + bigramFlags += finalBigramFrequency & FLAG_ATTRIBUTE_FREQUENCY; return bigramFlags; } @@ -823,7 +857,7 @@ public class BinaryDictInputOutput { + index + " <> " + group.mCachedAddress); groupAddress += GROUP_FLAGS_SIZE + getGroupCharactersSize(group); // Sanity checks. - if (group.mFrequency > MAX_TERMINAL_FREQUENCY) { + if (DBG && group.mFrequency > MAX_TERMINAL_FREQUENCY) { throw new RuntimeException("A node has a frequency > " + MAX_TERMINAL_FREQUENCY + " : " + group.mFrequency); } @@ -1030,7 +1064,7 @@ public class BinaryDictInputOutput { MakedictLog.i("Computing addresses..."); computeAddresses(dict, flatNodes); MakedictLog.i("Checking array..."); - checkFlatNodeArray(flatNodes); + if (DBG) checkFlatNodeArray(flatNodes); // Create a buffer that matches the final dictionary size. final Node lastNode = flatNodes.get(flatNodes.size() - 1); @@ -1044,7 +1078,7 @@ public class BinaryDictInputOutput { dataEndOffset = writePlacedNode(dict, buffer, n); } - showStatistics(flatNodes); + if (DBG) showStatistics(flatNodes); destination.write(buffer, 0, dataEndOffset); diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index b08702e47..c467ef7d4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -28,6 +28,8 @@ import java.util.LinkedList; */ public class FusionDictionary implements Iterable<Word> { + private static final boolean DBG = MakedictLog.DBG; + /** * A node of the dictionary, containing several CharGroups. * @@ -159,6 +161,7 @@ public class FusionDictionary implements Iterable<Word> { * shortcut list. */ public WeightedString getShortcut(final String word) { + // TODO: Don't do a linear search if (mShortcutTargets != null) { final int size = mShortcutTargets.size(); for (int i = 0; i < size; ++i) { @@ -176,6 +179,7 @@ public class FusionDictionary implements Iterable<Word> { * Returns null if the word is not in the bigrams list. */ public WeightedString getBigram(final String word) { + // TODO: Don't do a linear search if (mBigrams != null) { final int size = mBigrams.size(); for (int i = 0; i < size; ++i) { @@ -265,31 +269,21 @@ public class FusionDictionary implements Iterable<Word> { /** * Helper method to convert a String to an int array. */ - static private int[] getCodePoints(String word) { - final int wordLength = word.length(); - int[] array = new int[word.codePointCount(0, wordLength)]; - for (int i = 0; i < wordLength; i = word.offsetByCodePoints(i, 1)) { - array[i] = word.codePointAt(i); - } - return array; - } - - /** - * Helper method to add all words in a list as 0-frequency entries - * - * These words are added when shortcuts targets or bigrams are not found in the dictionary - * yet. The same words may be added later with an actual frequency - this is handled by - * the private version of add(). - */ - private void addNeutralWords(final ArrayList<WeightedString> words) { - if (null != words) { - for (WeightedString word : words) { - final CharGroup t = findWordInTree(mRoot, word.mWord); - if (null == t) { - add(getCodePoints(word.mWord), 0, null); - } - } - } + static private int[] getCodePoints(final String word) { + // TODO: this is a copy-paste of the contents of StringUtils.toCodePointArray, + // which is not visible from the makedict package. Factor this code. + final char[] characters = word.toCharArray(); + final int length = characters.length; + final int[] codePoints = new int[Character.codePointCount(characters, 0, length)]; + int codePoint = Character.codePointAt(characters, 0); + int dsti = 0; + for (int srci = Character.charCount(codePoint); + srci < length; srci += Character.charCount(codePoint), ++dsti) { + codePoints[dsti] = codePoint; + codePoint = Character.codePointAt(characters, srci); + } + codePoints[dsti] = codePoint; + return codePoints; } /** @@ -339,7 +333,6 @@ public class FusionDictionary implements Iterable<Word> { if (charGroup != null) { final CharGroup charGroup2 = findWordInTree(mRoot, word2); if (charGroup2 == null) { - // TODO: refactor with the identical code in addNeutralWords add(getCodePoints(word2), 0, null); } charGroup.addBigram(word2, frequency); @@ -386,7 +379,7 @@ public class FusionDictionary implements Iterable<Word> { Arrays.copyOfRange(word, charIndex, word.length), shortcutTargets, null /* bigrams */, frequency); currentNode.mData.add(insertionIndex, newGroup); - checkStack(currentNode); + if (DBG) checkStack(currentNode); } else { // There is a word with a common prefix. if (differentCharIndex == currentGroup.mChars.length) { @@ -437,7 +430,7 @@ public class FusionDictionary implements Iterable<Word> { } currentNode.mData.set(nodeIndex, newParent); } - checkStack(currentNode); + if (DBG) checkStack(currentNode); } } } @@ -514,21 +507,21 @@ public class FusionDictionary implements Iterable<Word> { */ public static CharGroup findWordInTree(Node node, final String s) { int index = 0; - final StringBuilder checker = new StringBuilder(); + final StringBuilder checker = DBG ? new StringBuilder() : null; CharGroup currentGroup; do { int indexOfGroup = findIndexOfChar(node, s.codePointAt(index)); if (CHARACTER_NOT_FOUND == indexOfGroup) return null; currentGroup = node.mData.get(indexOfGroup); - checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length)); + if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length)); index += currentGroup.mChars.length; if (index < s.length()) { node = currentGroup.mChildren; } } while (null != node && index < s.length()); - if (!s.equals(checker.toString())) return null; + if (DBG && !s.equals(checker.toString())) return null; return currentGroup; } diff --git a/java/src/com/android/inputmethod/latin/makedict/MakedictLog.java b/java/src/com/android/inputmethod/latin/makedict/MakedictLog.java index 1281c7e3a..3f0cd0796 100644 --- a/java/src/com/android/inputmethod/latin/makedict/MakedictLog.java +++ b/java/src/com/android/inputmethod/latin/makedict/MakedictLog.java @@ -22,7 +22,7 @@ import android.util.Log; * Wrapper to redirect log events to the right output medium. */ public class MakedictLog { - private static final boolean DBG = false; + public static final boolean DBG = false; private static final String TAG = MakedictLog.class.getSimpleName(); public static void d(String message) { |