diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rwxr-xr-x | java/src/com/android/inputmethod/latin/Suggest.java | 166 |
1 files changed, 135 insertions, 31 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index e7b9e5d69..6705e9a36 100755 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -37,6 +37,21 @@ public class Suggest implements Dictionary.WordCallback { public static final int CORRECTION_NONE = 0; public static final int CORRECTION_BASIC = 1; public static final int CORRECTION_FULL = 2; + public static final int CORRECTION_FULL_BIGRAM = 3; + + /** + * Words that appear in both bigram and unigram data gets multiplier ranging from + * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from + * bigram data. + */ + public static final double BIGRAM_MULTIPLIER_MIN = 1.2; + public static final double BIGRAM_MULTIPLIER_MAX = 1.5; + + /** + * Maximum possible bigram frequency. Will depend on how many bits are being used in data + * structure. Maximum bigram freqeuncy will get the BIGRAM_MULTIPLIER_MAX as the multiplier. + */ + public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; public static final int DIC_USER_TYPED = 0; public static final int DIC_MAIN = 1; @@ -57,10 +72,13 @@ public class Suggest implements Dictionary.WordCallback { private Dictionary mContactsDictionary; private int mPrefMaxSuggestions = 12; + private int mPrefMaxBigrams = 255; private boolean mAutoTextEnabled; private int[] mPriorities = new int[mPrefMaxSuggestions]; + private int[] mBigramPriorities = new int[mPrefMaxBigrams]; + // Handle predictive correction for only the first 1280 characters for performance reasons // If we support scripts that need latin characters beyond that, we should probably use some // kind of a sparse array or language specific list with a mapping lookup table. @@ -68,6 +86,7 @@ public class Suggest implements Dictionary.WordCallback { // latin characters. private int[] mNextLettersFrequencies = new int[1280]; private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); + private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>(); private boolean mHaveCorrection; private CharSequence mOriginalWord; @@ -88,7 +107,7 @@ public class Suggest implements Dictionary.WordCallback { private void initPool() { for (int i = 0; i < mPrefMaxSuggestions; i++) { - StringBuilder sb = new StringBuilder(32); + StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH); mStringPool.add(sb); } } @@ -140,9 +159,10 @@ public class Suggest implements Dictionary.WordCallback { } mPrefMaxSuggestions = maxSuggestions; mPriorities = new int[mPrefMaxSuggestions]; - collectGarbage(); + mBigramPriorities = new int[mPrefMaxBigrams]; + collectGarbage(mSuggestions, mPrefMaxSuggestions); while (mStringPool.size() < mPrefMaxSuggestions) { - StringBuilder sb = new StringBuilder(32); + StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH); mStringPool.add(sb); } } @@ -177,18 +197,17 @@ public class Suggest implements Dictionary.WordCallback { /** * Returns a list of words that match the list of character codes passed in. * This list will be overwritten the next time this function is called. - * @param a view for retrieving the context for AutoText - * @param codes the list of codes. Each list item contains an array of character codes - * in order of probability where the character at index 0 in the array has the highest - * probability. + * @param view a view for retrieving the context for AutoText + * @param wordComposer contains what is currently being typed + * @param prevWordForBigram previous word (used only for bigram) * @return list of suggestions. */ public List<CharSequence> getSuggestions(View view, WordComposer wordComposer, - boolean includeTypedWordIfValid) { + boolean includeTypedWordIfValid, CharSequence prevWordForBigram) { LatinImeLogger.onStartSuggestion(); mHaveCorrection = false; mCapitalize = wordComposer.isCapitalized(); - collectGarbage(); + collectGarbage(mSuggestions, mPrefMaxSuggestions); Arrays.fill(mPriorities, 0); Arrays.fill(mNextLettersFrequencies, 0); @@ -203,7 +222,38 @@ public class Suggest implements Dictionary.WordCallback { } // Search the dictionary only if there are at least 2 characters - if (wordComposer.size() > 1) { + if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM + || mCorrectionMode == CORRECTION_BASIC)) { + // At first character, just get the bigrams + Arrays.fill(mBigramPriorities, 0); + collectGarbage(mBigramSuggestions, mPrefMaxBigrams); + + if (!TextUtils.isEmpty(prevWordForBigram)) { + CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); + if (mMainDict.isValidWord(lowerPrevWord)) { + prevWordForBigram = lowerPrevWord; + } + mMainDict.getBigrams(wordComposer, prevWordForBigram, this, + mNextLettersFrequencies); + char currentChar = wordComposer.getTypedWord().charAt(0); + int count = 0; + int bigramSuggestionSize = mBigramSuggestions.size(); + for (int i = 0; i < bigramSuggestionSize; i++) { + if (mBigramSuggestions.get(i).charAt(0) == currentChar) { + int poolSize = mStringPool.size(); + StringBuilder sb = poolSize > 0 ? + (StringBuilder) mStringPool.remove(poolSize - 1) + : new StringBuilder(Dictionary.MAX_WORD_LENGTH); + sb.setLength(0); + sb.append(mBigramSuggestions.get(i)); + mSuggestions.add(count++, sb); + if (count > mPrefMaxSuggestions) break; + } + } + } + + } else if (wordComposer.size() > 1) { + // Search the dictionary only if there are at least 2 characters if (mUserDictionary != null || mContactsDictionary != null) { if (mUserDictionary != null) { mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies); @@ -213,21 +263,26 @@ public class Suggest implements Dictionary.WordCallback { } if (mSuggestions.size() > 0 && isValidWord(mOriginalWord) - && mCorrectionMode == CORRECTION_FULL) { + && (mCorrectionMode == CORRECTION_FULL + || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { mHaveCorrection = true; } } mMainDict.getWords(wordComposer, this, mNextLettersFrequencies); - if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 0) { + if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM) + && mSuggestions.size() > 0) { mHaveCorrection = true; } } + if (mOriginalWord != null) { mSuggestions.add(0, mOriginalWord.toString()); } - + // Check if the first suggestion has a minimum number of characters in common - if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 1) { + if (wordComposer.size() > 1 && mSuggestions.size() > 1 + && (mCorrectionMode == CORRECTION_FULL + || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) { mHaveCorrection = false; } @@ -258,7 +313,6 @@ public class Suggest implements Dictionary.WordCallback { i++; } } - removeDupes(); return mSuggestions; } @@ -312,21 +366,50 @@ public class Suggest implements Dictionary.WordCallback { return false; } - public boolean addWord(final char[] word, final int offset, final int length, - final int freq, final int dicTypeId) { + public boolean addWord(final char[] word, final int offset, final int length, int freq, + final int dicTypeId, final Dictionary.DataType dataType) { + ArrayList<CharSequence> suggestions; + int[] priorities; + int prefMaxSuggestions; + if(dataType == Dictionary.DataType.BIGRAM) { + suggestions = mBigramSuggestions; + priorities = mBigramPriorities; + prefMaxSuggestions = mPrefMaxBigrams; + } else { + suggestions = mSuggestions; + priorities = mPriorities; + prefMaxSuggestions = mPrefMaxSuggestions; + } + int pos = 0; - final int[] priorities = mPriorities; - final int prefMaxSuggestions = mPrefMaxSuggestions; + // Check if it's the same word, only caps are different if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) { pos = 0; } else { + if (dataType == Dictionary.DataType.UNIGRAM) { + // Check if the word was already added before (by bigram data) + int bigramSuggestion = searchBigramSuggestion(word,offset,length); + if(bigramSuggestion >= 0) { + // turn freq from bigram into multiplier specified above + double multiplier = (((double) mBigramPriorities[bigramSuggestion]) + / MAXIMUM_BIGRAM_FREQUENCY) + * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) + + BIGRAM_MULTIPLIER_MIN; + /* Log.d("Suggest","bigram num: " + bigramSuggestion + + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() + + " currentPriority: " + freq + " bigramPriority: " + + mBigramPriorities[bigramSuggestion] + + " multiplier: " + multiplier); */ + freq = (int)Math.round((freq * multiplier)); + } + } + // Check the last one's priority and bail if (priorities[prefMaxSuggestions - 1] >= freq) return true; while (pos < prefMaxSuggestions) { if (priorities[pos] < freq - || (priorities[pos] == freq && length < mSuggestions - .get(pos).length())) { + || (priorities[pos] == freq && length < suggestions.get(pos).length())) { break; } pos++; @@ -336,12 +419,13 @@ public class Suggest implements Dictionary.WordCallback { if (pos >= prefMaxSuggestions) { return true; } + System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1); priorities[pos] = freq; int poolSize = mStringPool.size(); StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) - : new StringBuilder(32); + : new StringBuilder(Dictionary.MAX_WORD_LENGTH); sb.setLength(0); if (mCapitalize) { sb.append(Character.toUpperCase(word[offset])); @@ -351,9 +435,9 @@ public class Suggest implements Dictionary.WordCallback { } else { sb.append(word, offset, length); } - mSuggestions.add(pos, sb); - if (mSuggestions.size() > prefMaxSuggestions) { - CharSequence garbage = mSuggestions.remove(prefMaxSuggestions); + suggestions.add(pos, sb); + if (suggestions.size() > prefMaxSuggestions) { + CharSequence garbage = suggestions.remove(prefMaxSuggestions); if (garbage instanceof StringBuilder) { mStringPool.add(garbage); } @@ -363,6 +447,26 @@ public class Suggest implements Dictionary.WordCallback { return true; } + private int searchBigramSuggestion(final char[] word, final int offset, final int length) { + // TODO This is almost O(n^2). Might need fix. + // search whether the word appeared in bigram data + int bigramSuggestSize = mBigramSuggestions.size(); + for(int i = 0; i < bigramSuggestSize; i++) { + if(mBigramSuggestions.get(i).length() == length) { + boolean chk = true; + for(int j = 0; j < length; j++) { + if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { + chk = false; + break; + } + } + if(chk) return i; + } + } + + return -1; + } + public boolean isValidWord(final CharSequence word) { if (word == null || word.length() == 0) { return false; @@ -373,21 +477,21 @@ public class Suggest implements Dictionary.WordCallback { || (mContactsDictionary != null && mContactsDictionary.isValidWord(word)); } - private void collectGarbage() { + private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) { int poolSize = mStringPool.size(); - int garbageSize = mSuggestions.size(); - while (poolSize < mPrefMaxSuggestions && garbageSize > 0) { - CharSequence garbage = mSuggestions.get(garbageSize - 1); + int garbageSize = suggestions.size(); + while (poolSize < prefMaxSuggestions && garbageSize > 0) { + CharSequence garbage = suggestions.get(garbageSize - 1); if (garbage != null && garbage instanceof StringBuilder) { mStringPool.add(garbage); poolSize++; } garbageSize--; } - if (poolSize == mPrefMaxSuggestions + 1) { + if (poolSize == prefMaxSuggestions + 1) { Log.w("Suggest", "String pool got too big: " + poolSize); } - mSuggestions.clear(); + suggestions.clear(); } public void close() { |