diff options
author | 2010-07-16 13:02:45 +0900 | |
---|---|---|
committer | 2010-07-16 13:02:45 +0900 | |
commit | b9c57e6540502ef3b2941235bbbede4dedfdcfb7 (patch) | |
tree | c8ccdc1455ab21eaef06c2b053ac7cada78de9f9 /java/src/com/android/inputmethod/latin | |
parent | b5a0d8ef42d9e0be4e56be04637c167074447744 (diff) | |
parent | 2a118d844e0b7dd3e01f25e937b02b05711768a6 (diff) | |
download | latinime-b9c57e6540502ef3b2941235bbbede4dedfdcfb7.tar.gz latinime-b9c57e6540502ef3b2941235bbbede4dedfdcfb7.tar.xz latinime-b9c57e6540502ef3b2941235bbbede4dedfdcfb7.zip |
Merge remote branch 'goog/master'
Conflicts:
java/res/xml/prefs.xml
java/src/com/android/inputmethod/latin/BinaryDictionary.java
java/src/com/android/inputmethod/latin/Dictionary.java
java/src/com/android/inputmethod/latin/ExpandableDictionary.java
java/src/com/android/inputmethod/latin/LatinIME.java
java/src/com/android/inputmethod/latin/Suggest.java
tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
Diffstat (limited to 'java/src/com/android/inputmethod/latin')
7 files changed, 249 insertions, 61 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 9e7dfa3a1..fad56c5d9 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -32,9 +32,9 @@ import android.util.Log; public class BinaryDictionary extends Dictionary { private static final String TAG = "BinaryDictionary"; - public static final int MAX_WORD_LENGTH = 48; private static final int MAX_ALTERNATIVES = 16; private static final int MAX_WORDS = 16; + private static final int MAX_BIGRAMS = 255; // TODO Probably don't need all 255 private static final int TYPED_LETTER_MULTIPLIER = 2; private static final boolean ENABLE_MISSED_CHARACTERS = true; @@ -44,7 +44,9 @@ public class BinaryDictionary extends Dictionary { private int mDictLength; private int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES]; private char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; + private char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; private int[] mFrequencies = new int[MAX_WORDS]; + private int[] mFrequencies_bigrams = new int[MAX_BIGRAMS]; // Keep a reference to the native dict direct buffer in Java to avoid // unexpected deallocation of the direct buffer. private ByteBuffer mNativeDictDirectBuffer; @@ -72,7 +74,7 @@ public class BinaryDictionary extends Dictionary { /** * Create a dictionary from a byte buffer. This is used for testing. * @param context application context for reading resources - * @param resId the resource containing the raw binary dictionary + * @param byteBuffer a ByteBuffer containing the binary dictionary */ public BinaryDictionary(Context context, ByteBuffer byteBuffer, int dicTypeId) { if (byteBuffer != null) { @@ -97,6 +99,8 @@ public class BinaryDictionary extends Dictionary { char[] outputChars, int[] frequencies, int maxWordLength, int maxWords, int maxAlternatives, int skipPos, int[] nextLettersFrequencies, int nextLettersSize); + private native int getBigramsNative(int nativeData, char[] prevWord, int prevWordLength, + char[] outputChars, int[] frequencies, int maxWordLength, int maxBigrams); private final void loadDictionary(Context context, int resId) { InputStream is = context.getResources().openRawResource(resId); @@ -124,6 +128,30 @@ public class BinaryDictionary extends Dictionary { } @Override + public void getBigrams(final WordComposer composer, final CharSequence previousWord, + final WordCallback callback, int[] nextLettersFrequencies) { + + char[] chars = previousWord.toString().toCharArray(); + Arrays.fill(mOutputChars_bigrams, (char) 0); + Arrays.fill(mFrequencies_bigrams, 0); + + int count = getBigramsNative(mNativeDict, chars, chars.length, mOutputChars_bigrams, + mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS); + for (int j = 0; j < count; j++) { + if (mFrequencies_bigrams[j] < 1) break; + int start = j * MAX_WORD_LENGTH; + int len = 0; + while (mOutputChars_bigrams[start + len] != 0) { + len++; + } + if (len > 0) { + callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j], + DataType.BIGRAM); + } + } + } + + @Override public void getWords(final WordComposer codes, final WordCallback callback, int[] nextLettersFrequencies) { final int codesSize = codes.size(); @@ -168,7 +196,7 @@ public class BinaryDictionary extends Dictionary { len++; } if (len > 0) { - callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId); + callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId, DataType.UNIGRAM); } } } diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index e38a32fa1..a02edeee5 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -21,7 +21,9 @@ package com.android.inputmethod.latin; * strokes. */ abstract public class Dictionary { - + + protected static final int MAX_WORD_LENGTH = 48; + /** * Whether or not to replicate the typed word in the suggested list, even if it's valid. */ @@ -31,7 +33,11 @@ abstract public class Dictionary { * The weight to give to a word if it's length is the same as the number of typed characters. */ protected static final int FULL_WORD_FREQ_MULTIPLIER = 2; - + + public static enum DataType { + UNIGRAM, BIGRAM + } + /** * Interface to be implemented by classes requesting words to be fetched from the dictionary. * @see #getWords(WordComposer, WordCallback) @@ -46,9 +52,11 @@ abstract public class Dictionary { * @param frequency the frequency of occurence. This is normalized between 1 and 255, but * can exceed those limits * @param dicTypeId of the dictionary where word was from + * @param dataType tells type of this data * @return true if the word was added, false if no more words are required */ - boolean addWord(char[] word, int wordOffset, int wordLength, int frequency, int dicTypeId); + boolean addWord(char[] word, int wordOffset, int wordLength, int frequency, int dicTypeId, + DataType dataType); } /** @@ -66,6 +74,21 @@ abstract public class Dictionary { int[] nextLettersFrequencies); /** + * Searches for pairs in the bigram dictionary that matches the previous word and all the + * possible words following are added through the callback object. + * @param composer the key sequence to match + * @param callback the callback object to send possible word following previous word + * @param nextLettersFrequencies array of frequencies of next letters that could follow the + * word so far. For instance, "bracke" can be followed by "t", so array['t'] will have + * a non-zero value on returning from this method. + * Pass in null if you don't want the dictionary to look up next letters. + */ + public void getBigrams(final WordComposer composer, final CharSequence previousWord, + final WordCallback callback, int[] nextLettersFrequencies) { + // empty base implementation + } + + /** * Checks if the given word occurs in the dictionary * @param word the word to search for. The search should be case-insensitive. * @return true if the word exists, false otherwise diff --git a/java/src/com/android/inputmethod/latin/EditingUtil.java b/java/src/com/android/inputmethod/latin/EditingUtil.java index 7571f1daf..5133c60ca 100644 --- a/java/src/com/android/inputmethod/latin/EditingUtil.java +++ b/java/src/com/android/inputmethod/latin/EditingUtil.java @@ -16,6 +16,8 @@ package com.android.inputmethod.latin; +import java.util.regex.Pattern; + import android.view.inputmethod.ExtractedText; import android.view.inputmethod.ExtractedTextRequest; import android.view.inputmethod.InputConnection; @@ -24,6 +26,11 @@ import android.view.inputmethod.InputConnection; * Utility methods to deal with editing text through an InputConnection. */ public class EditingUtil { + /** + * Number of characters we want to look back in order to identify the previous word + */ + public static final int LOOKBACK_CHARACTER_NUM = 15; + private EditingUtil() {}; /** @@ -175,4 +182,13 @@ public class EditingUtil { private static boolean isWhitespace(int code, String whitespace) { return whitespace.contains(String.valueOf((char) code)); } + + private static final Pattern spaceRegex = Pattern.compile("\\s+"); + + public static CharSequence getPreviousWord(InputConnection connection) { + //TODO: Should fix this. This could be slow! + CharSequence prev = connection.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0); + String[] w = spaceRegex.split(prev); + return (w.length >= 2) ? w[w.length-2] : null; + } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index e2a812796..d8a9547c1 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -266,7 +266,8 @@ public class ExpandableDictionary extends Dictionary { if (completion) { word[depth] = c; if (terminal) { - if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId)) { + if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId, + DataType.UNIGRAM)) { return; } // Add to frequency of next letters for predictive correction @@ -304,7 +305,8 @@ public class ExpandableDictionary extends Dictionary { || !same(word, depth + 1, codes.getTypedWord())) { int finalFreq = freq * snr * addedAttenuation; if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER; - callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId); + callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, + DataType.UNIGRAM); } } if (children != null) { diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 2cc92e133..2527c81fa 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -48,8 +48,8 @@ import android.view.HapticFeedbackConstants; import android.view.KeyEvent; import android.view.LayoutInflater; import android.view.View; -import android.view.ViewParent; import android.view.ViewGroup; +import android.view.ViewParent; import android.view.Window; import android.view.WindowManager; import android.view.inputmethod.CompletionInfo; @@ -88,6 +88,7 @@ public class LatinIME extends InputMethodService private static final String PREF_QUICK_FIXES = "quick_fixes"; private static final String PREF_SHOW_SUGGESTIONS = "show_suggestions"; private static final String PREF_AUTO_COMPLETE = "auto_complete"; + private static final String PREF_BIGRAM_SUGGESTIONS = "bigram_suggestion"; private static final String PREF_VOICE_MODE = "voice_mode"; // Whether or not the user has used voice input before (and thus, whether to show the @@ -186,6 +187,7 @@ public class LatinIME extends InputMethodService private boolean mAutoSpace; private boolean mJustAddedAutoSpace; private boolean mAutoCorrectEnabled; + private boolean mBigramSuggestionEnabled; private boolean mAutoCorrectOn; private boolean mCapsLock; private boolean mPasswordText; @@ -713,12 +715,14 @@ public class LatinIME extends InputMethodService // TODO: Uncomment this block when we enable re-editing feature // If a word is selected - /*if ((candidatesStart == candidatesEnd || newSelStart != oldSelStart) + /*if (isPredictionOn() && mJustRevertedSeparator == null + && (candidatesStart == candidatesEnd || newSelStart != oldSelStart) && (newSelStart < newSelEnd - 1 || (!mPredicting)) && !mVoiceInputHighlighted) { - abortCorrection(false); if (isCursorTouchingWord() || mLastSelectionStart < mLastSelectionEnd) { postUpdateOldSuggestions(); + } else { + abortCorrection(false); } }*/ } @@ -1113,6 +1117,8 @@ public class LatinIME extends InputMethodService InputConnection ic = getCurrentInputConnection(); if (ic == null) return; + ic.beginBatchEdit(); + if (mAfterVoiceInput) { // Don't log delete if the user is pressing delete at // the beginning of the text box (hence not deleting anything) @@ -1145,6 +1151,7 @@ public class LatinIME extends InputMethodService TextEntryState.backspace(); if (TextEntryState.getState() == TextEntryState.STATE_UNDO_COMMIT) { revertLastWord(deleteChar); + ic.endBatchEdit(); return; } else if (mEnteredText != null && sameAsTextBeforeCursor(ic, mEnteredText)) { ic.deleteSurroundingText(mEnteredText.length(), 0); @@ -1155,6 +1162,7 @@ public class LatinIME extends InputMethodService } } mJustRevertedSeparator = null; + ic.endBatchEdit(); } private void handleShift() { @@ -1312,9 +1320,10 @@ public class LatinIME extends InputMethodService mWord.reset(); return; } - TypedWordAlternatives entry = new TypedWordAlternatives(result, mWord); - // Create a new WordComposer as the old one is being saved for later use - mWord = new WordComposer(mWord); + // Make a copy of the CharSequence, since it is/could be a mutable CharSequence + final String resultCopy = result.toString(); + TypedWordAlternatives entry = new TypedWordAlternatives(resultCopy, + new WordComposer(mWord)); mWordHistory.add(entry); } @@ -1569,8 +1578,7 @@ public class LatinIME extends InputMethodService } private List<CharSequence> getTypedSuggestions(WordComposer word) { - List<CharSequence> stringList = mSuggest.getSuggestions( - mKeyboardSwitcher.getInputView(), word, false); + List<CharSequence> stringList = mSuggest.getSuggestions(mKeyboardSwitcher.getInputView(), word, false, null); return stringList; } @@ -1581,8 +1589,14 @@ public class LatinIME extends InputMethodService } private void showSuggestions(WordComposer word) { - List<CharSequence> stringList = mSuggest.getSuggestions( - mKeyboardSwitcher.getInputView(), word, false); + //long startTime = System.currentTimeMillis(); // TIME MEASUREMENT! + // TODO Maybe need better way of retrieving previous word + CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection()); + List<CharSequence> stringList = mSuggest.getSuggestions(mKeyboardSwitcher.getInputView(), word, false, + prevWord); + //long stopTime = System.currentTimeMillis(); // TIME MEASUREMENT! + //Log.d("LatinIME","Suggest Total Time - " + (stopTime - startTime)); + int[] nextLettersFrequencies = mSuggest.getNextLettersFrequencies(); ((LatinKeyboard) mKeyboardSwitcher.getInputView().getKeyboard()).setPreferredLetters( @@ -1699,7 +1713,8 @@ public class LatinIME extends InputMethodService // Fool the state watcher so that a subsequent backspace will not do a revert TextEntryState.typedCharacter((char) KEYCODE_SPACE, true); - if (index == 0 && mCorrectionMode > 0 && !mSuggest.isValidWord(suggestion)) { + if (index == 0 && mCorrectionMode > 0 && !mSuggest.isValidWord(suggestion) + && !mSuggest.isValidWord(suggestion.toString().toLowerCase())) { mCandidateView.showAddToDictionaryHint(suggestion); } if (ic != null) { @@ -1713,9 +1728,9 @@ public class LatinIME extends InputMethodService InputConnection ic = getCurrentInputConnection(); EditingUtil.Range range = new EditingUtil.Range(); String wordToBeReplaced = EditingUtil.getWordAtCursor(getCurrentInputConnection(), - mWordSeparators, range).trim(); + mWordSeparators, range); if (!mWordToSuggestions.containsKey(wordToBeReplaced)) { - wordToBeReplaced = wordToBeReplaced.toLowerCase(); + wordToBeReplaced = wordToBeReplaced.toLowerCase(); } if (mWordToSuggestions.containsKey(wordToBeReplaced)) { List<CharSequence> suggestions = mWordToSuggestions.get(wordToBeReplaced); @@ -1743,9 +1758,6 @@ public class LatinIME extends InputMethodService InputConnection ic = getCurrentInputConnection(); if (ic != null) { rememberReplacedWord(suggestion); - if (mSuggestionShouldReplaceCurrentWord) { - EditingUtil.deleteWordAtCursor(ic, getWordSeparators()); - } if (!VoiceInput.DELETE_SYMBOL.equals(suggestion)) { ic.commitText(suggestion, 1); } @@ -1772,9 +1784,8 @@ public class LatinIME extends InputMethodService } if (!mPredicting && isCursorTouchingWord()) { EditingUtil.Range range = new EditingUtil.Range(); - CharSequence touching = - EditingUtil.getWordAtCursor(getCurrentInputConnection(), mWordSeparators, - range); + CharSequence touching = EditingUtil.getWordAtCursor(getCurrentInputConnection(), + mWordSeparators, range); if (touching != null && touching.length() > 1) { if (mWordSeparators.indexOf(touching.charAt(touching.length() - 1)) > 0) { touching = touching.toString().substring(0, touching.length() - 1); @@ -1835,7 +1846,7 @@ public class LatinIME extends InputMethodService foundWord); showCorrections(alternatives); if (foundWord != null) { - mWord = foundWord; + mWord = new WordComposer(foundWord); } else { mWord.reset(); } @@ -1868,6 +1879,7 @@ public class LatinIME extends InputMethodService private void underlineWord(CharSequence word, int left, int right) { InputConnection ic = getCurrentInputConnection(); if (ic == null) return; + ic.finishComposingText(); ic.deleteSurroundingText(left, right); ic.setComposingText(word, 1); ic.setSelection(mLastSelectionStart, mLastSelectionStart); @@ -1912,7 +1924,6 @@ public class LatinIME extends InputMethodService if (!mPredicting && length > 0) { final InputConnection ic = getCurrentInputConnection(); mPredicting = true; - ic.beginBatchEdit(); mJustRevertedSeparator = ic.getTextBeforeCursor(1, 0); if (deleteChar) ic.deleteSurroundingText(1, 0); int toDelete = mCommittedLength; @@ -1924,7 +1935,6 @@ public class LatinIME extends InputMethodService ic.deleteSurroundingText(toDelete, 0); ic.setComposingText(mComposing, 1); TextEntryState.backspace(); - ic.endBatchEdit(); postUpdateSuggestions(); } else { sendDownUpKeyEvents(KeyEvent.KEYCODE_DEL); @@ -2139,6 +2149,8 @@ public class LatinIME extends InputMethodService mCorrectionMode = (mAutoCorrectOn && mAutoCorrectEnabled) ? Suggest.CORRECTION_FULL : (mAutoCorrectOn ? Suggest.CORRECTION_BASIC : Suggest.CORRECTION_NONE); + mCorrectionMode = (mBigramSuggestionEnabled && mAutoCorrectOn && mAutoCorrectEnabled) + ? Suggest.CORRECTION_FULL_BIGRAM : mCorrectionMode; if (mSuggest != null) { mSuggest.setCorrectionMode(mCorrectionMode); } @@ -2205,6 +2217,7 @@ public class LatinIME extends InputMethodService } mAutoCorrectEnabled = sp.getBoolean(PREF_AUTO_COMPLETE, mResources.getBoolean(R.bool.enable_autocorrect)) & mShowSuggestions; + mBigramSuggestionEnabled = sp.getBoolean(PREF_BIGRAM_SUGGESTIONS, true) & mShowSuggestions; updateCorrectionMode(); updateAutoTextEnabled(mResources.getConfiguration().locale); mLanguageSwitcher.loadLocales(sp); diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index e7b9e5d69..6705e9a36 100755 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -37,6 +37,21 @@ public class Suggest implements Dictionary.WordCallback { public static final int CORRECTION_NONE = 0; public static final int CORRECTION_BASIC = 1; public static final int CORRECTION_FULL = 2; + public static final int CORRECTION_FULL_BIGRAM = 3; + + /** + * Words that appear in both bigram and unigram data gets multiplier ranging from + * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from + * bigram data. + */ + public static final double BIGRAM_MULTIPLIER_MIN = 1.2; + public static final double BIGRAM_MULTIPLIER_MAX = 1.5; + + /** + * Maximum possible bigram frequency. Will depend on how many bits are being used in data + * structure. Maximum bigram freqeuncy will get the BIGRAM_MULTIPLIER_MAX as the multiplier. + */ + public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; public static final int DIC_USER_TYPED = 0; public static final int DIC_MAIN = 1; @@ -57,10 +72,13 @@ public class Suggest implements Dictionary.WordCallback { private Dictionary mContactsDictionary; private int mPrefMaxSuggestions = 12; + private int mPrefMaxBigrams = 255; private boolean mAutoTextEnabled; private int[] mPriorities = new int[mPrefMaxSuggestions]; + private int[] mBigramPriorities = new int[mPrefMaxBigrams]; + // Handle predictive correction for only the first 1280 characters for performance reasons // If we support scripts that need latin characters beyond that, we should probably use some // kind of a sparse array or language specific list with a mapping lookup table. @@ -68,6 +86,7 @@ public class Suggest implements Dictionary.WordCallback { // latin characters. private int[] mNextLettersFrequencies = new int[1280]; private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); + private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>(); private boolean mHaveCorrection; private CharSequence mOriginalWord; @@ -88,7 +107,7 @@ public class Suggest implements Dictionary.WordCallback { private void initPool() { for (int i = 0; i < mPrefMaxSuggestions; i++) { - StringBuilder sb = new StringBuilder(32); + StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH); mStringPool.add(sb); } } @@ -140,9 +159,10 @@ public class Suggest implements Dictionary.WordCallback { } mPrefMaxSuggestions = maxSuggestions; mPriorities = new int[mPrefMaxSuggestions]; - collectGarbage(); + mBigramPriorities = new int[mPrefMaxBigrams]; + collectGarbage(mSuggestions, mPrefMaxSuggestions); while (mStringPool.size() < mPrefMaxSuggestions) { - StringBuilder sb = new StringBuilder(32); + StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH); mStringPool.add(sb); } } @@ -177,18 +197,17 @@ public class Suggest implements Dictionary.WordCallback { /** * Returns a list of words that match the list of character codes passed in. * This list will be overwritten the next time this function is called. - * @param a view for retrieving the context for AutoText - * @param codes the list of codes. Each list item contains an array of character codes - * in order of probability where the character at index 0 in the array has the highest - * probability. + * @param view a view for retrieving the context for AutoText + * @param wordComposer contains what is currently being typed + * @param prevWordForBigram previous word (used only for bigram) * @return list of suggestions. */ public List<CharSequence> getSuggestions(View view, WordComposer wordComposer, - boolean includeTypedWordIfValid) { + boolean includeTypedWordIfValid, CharSequence prevWordForBigram) { LatinImeLogger.onStartSuggestion(); mHaveCorrection = false; mCapitalize = wordComposer.isCapitalized(); - collectGarbage(); + collectGarbage(mSuggestions, mPrefMaxSuggestions); Arrays.fill(mPriorities, 0); Arrays.fill(mNextLettersFrequencies, 0); @@ -203,7 +222,38 @@ public class Suggest implements Dictionary.WordCallback { } // Search the dictionary only if there are at least 2 characters - if (wordComposer.size() > 1) { + if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM + || mCorrectionMode == CORRECTION_BASIC)) { + // At first character, just get the bigrams + Arrays.fill(mBigramPriorities, 0); + collectGarbage(mBigramSuggestions, mPrefMaxBigrams); + + if (!TextUtils.isEmpty(prevWordForBigram)) { + CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); + if (mMainDict.isValidWord(lowerPrevWord)) { + prevWordForBigram = lowerPrevWord; + } + mMainDict.getBigrams(wordComposer, prevWordForBigram, this, + mNextLettersFrequencies); + char currentChar = wordComposer.getTypedWord().charAt(0); + int count = 0; + int bigramSuggestionSize = mBigramSuggestions.size(); + for (int i = 0; i < bigramSuggestionSize; i++) { + if (mBigramSuggestions.get(i).charAt(0) == currentChar) { + int poolSize = mStringPool.size(); + StringBuilder sb = poolSize > 0 ? + (StringBuilder) mStringPool.remove(poolSize - 1) + : new StringBuilder(Dictionary.MAX_WORD_LENGTH); + sb.setLength(0); + sb.append(mBigramSuggestions.get(i)); + mSuggestions.add(count++, sb); + if (count > mPrefMaxSuggestions) break; + } + } + } + + } else if (wordComposer.size() > 1) { + // Search the dictionary only if there are at least 2 characters if (mUserDictionary != null || mContactsDictionary != null) { if (mUserDictionary != null) { mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies); @@ -213,21 +263,26 @@ public class Suggest implements Dictionary.WordCallback { } if (mSuggestions.size() > 0 && isValidWord(mOriginalWord) - && mCorrectionMode == CORRECTION_FULL) { + && (mCorrectionMode == CORRECTION_FULL + || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { mHaveCorrection = true; } } mMainDict.getWords(wordComposer, this, mNextLettersFrequencies); - if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 0) { + if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM) + && mSuggestions.size() > 0) { mHaveCorrection = true; } } + if (mOriginalWord != null) { mSuggestions.add(0, mOriginalWord.toString()); } - + // Check if the first suggestion has a minimum number of characters in common - if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 1) { + if (wordComposer.size() > 1 && mSuggestions.size() > 1 + && (mCorrectionMode == CORRECTION_FULL + || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) { mHaveCorrection = false; } @@ -258,7 +313,6 @@ public class Suggest implements Dictionary.WordCallback { i++; } } - removeDupes(); return mSuggestions; } @@ -312,21 +366,50 @@ public class Suggest implements Dictionary.WordCallback { return false; } - public boolean addWord(final char[] word, final int offset, final int length, - final int freq, final int dicTypeId) { + public boolean addWord(final char[] word, final int offset, final int length, int freq, + final int dicTypeId, final Dictionary.DataType dataType) { + ArrayList<CharSequence> suggestions; + int[] priorities; + int prefMaxSuggestions; + if(dataType == Dictionary.DataType.BIGRAM) { + suggestions = mBigramSuggestions; + priorities = mBigramPriorities; + prefMaxSuggestions = mPrefMaxBigrams; + } else { + suggestions = mSuggestions; + priorities = mPriorities; + prefMaxSuggestions = mPrefMaxSuggestions; + } + int pos = 0; - final int[] priorities = mPriorities; - final int prefMaxSuggestions = mPrefMaxSuggestions; + // Check if it's the same word, only caps are different if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) { pos = 0; } else { + if (dataType == Dictionary.DataType.UNIGRAM) { + // Check if the word was already added before (by bigram data) + int bigramSuggestion = searchBigramSuggestion(word,offset,length); + if(bigramSuggestion >= 0) { + // turn freq from bigram into multiplier specified above + double multiplier = (((double) mBigramPriorities[bigramSuggestion]) + / MAXIMUM_BIGRAM_FREQUENCY) + * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) + + BIGRAM_MULTIPLIER_MIN; + /* Log.d("Suggest","bigram num: " + bigramSuggestion + + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() + + " currentPriority: " + freq + " bigramPriority: " + + mBigramPriorities[bigramSuggestion] + + " multiplier: " + multiplier); */ + freq = (int)Math.round((freq * multiplier)); + } + } + // Check the last one's priority and bail if (priorities[prefMaxSuggestions - 1] >= freq) return true; while (pos < prefMaxSuggestions) { if (priorities[pos] < freq - || (priorities[pos] == freq && length < mSuggestions - .get(pos).length())) { + || (priorities[pos] == freq && length < suggestions.get(pos).length())) { break; } pos++; @@ -336,12 +419,13 @@ public class Suggest implements Dictionary.WordCallback { if (pos >= prefMaxSuggestions) { return true; } + System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1); priorities[pos] = freq; int poolSize = mStringPool.size(); StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) - : new StringBuilder(32); + : new StringBuilder(Dictionary.MAX_WORD_LENGTH); sb.setLength(0); if (mCapitalize) { sb.append(Character.toUpperCase(word[offset])); @@ -351,9 +435,9 @@ public class Suggest implements Dictionary.WordCallback { } else { sb.append(word, offset, length); } - mSuggestions.add(pos, sb); - if (mSuggestions.size() > prefMaxSuggestions) { - CharSequence garbage = mSuggestions.remove(prefMaxSuggestions); + suggestions.add(pos, sb); + if (suggestions.size() > prefMaxSuggestions) { + CharSequence garbage = suggestions.remove(prefMaxSuggestions); if (garbage instanceof StringBuilder) { mStringPool.add(garbage); } @@ -363,6 +447,26 @@ public class Suggest implements Dictionary.WordCallback { return true; } + private int searchBigramSuggestion(final char[] word, final int offset, final int length) { + // TODO This is almost O(n^2). Might need fix. + // search whether the word appeared in bigram data + int bigramSuggestSize = mBigramSuggestions.size(); + for(int i = 0; i < bigramSuggestSize; i++) { + if(mBigramSuggestions.get(i).length() == length) { + boolean chk = true; + for(int j = 0; j < length; j++) { + if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { + chk = false; + break; + } + } + if(chk) return i; + } + } + + return -1; + } + public boolean isValidWord(final CharSequence word) { if (word == null || word.length() == 0) { return false; @@ -373,21 +477,21 @@ public class Suggest implements Dictionary.WordCallback { || (mContactsDictionary != null && mContactsDictionary.isValidWord(word)); } - private void collectGarbage() { + private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) { int poolSize = mStringPool.size(); - int garbageSize = mSuggestions.size(); - while (poolSize < mPrefMaxSuggestions && garbageSize > 0) { - CharSequence garbage = mSuggestions.get(garbageSize - 1); + int garbageSize = suggestions.size(); + while (poolSize < prefMaxSuggestions && garbageSize > 0) { + CharSequence garbage = suggestions.get(garbageSize - 1); if (garbage != null && garbage instanceof StringBuilder) { mStringPool.add(garbage); poolSize++; } garbageSize--; } - if (poolSize == mPrefMaxSuggestions + 1) { + if (poolSize == prefMaxSuggestions + 1) { Log.w("Suggest", "String pool got too big: " + poolSize); } - mSuggestions.clear(); + suggestions.clear(); } public void close() { diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index e2573a0a9..1ea74847a 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -55,7 +55,9 @@ public class WordComposer { mTypedWord = new StringBuilder(copy.mTypedWord); mCapsCount = copy.mCapsCount; mAutoCapitalized = copy.mAutoCapitalized; + mIsCapitalized = copy.mIsCapitalized; } + /** * Clear out the keys registered so far. */ |