diff options
-rw-r--r-- | java/src/com/android/inputmethod/latin/BinaryDictionary.java | 4 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/Dictionary.java | 11 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/ExpandableDictionary.java | 6 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/LatinIME.java | 12 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/LatinImeLogger.java | 3 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/Suggest.java | 12 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java | 3 | ||||
-rw-r--r-- | native/src/correction.cpp | 147 | ||||
-rw-r--r-- | native/src/correction.h | 51 | ||||
-rw-r--r-- | native/src/defines.h | 4 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 202 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 20 | ||||
-rw-r--r-- | native/src/words_priority_queue.h | 2 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 26 | ||||
-rw-r--r-- | tests/src/com/android/inputmethod/latin/InputLogicTests.java | 15 |
15 files changed, 220 insertions, 298 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 3692ac179..b82400046 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -162,7 +162,7 @@ public class BinaryDictionary extends Dictionary { } if (len > 0) { callback.addWord(mOutputChars_bigrams, start, len, mBigramScores[j], - mDicTypeId, DataType.BIGRAM); + mDicTypeId, Dictionary.BIGRAM); } } } @@ -182,7 +182,7 @@ public class BinaryDictionary extends Dictionary { } if (len > 0) { callback.addWord(mOutputChars, start, len, mScores[j], mDicTypeId, - DataType.UNIGRAM); + Dictionary.UNIGRAM); } } } diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index c35b42877..79bf33850 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -33,9 +33,8 @@ public abstract class Dictionary { */ protected static final int FULL_WORD_SCORE_MULTIPLIER = 2; - public static enum DataType { - UNIGRAM, BIGRAM - } + public static final int UNIGRAM = 0; + public static final int BIGRAM = 1; /** * Interface to be implemented by classes requesting words to be fetched from the dictionary. @@ -51,11 +50,11 @@ public abstract class Dictionary { * @param score the score of occurrence. This is normalized between 1 and 255, but * can exceed those limits * @param dicTypeId of the dictionary where word was from - * @param dataType tells type of this data + * @param dataType tells type of this data, either UNIGRAM or BIGRAM * @return true if the word was added, false if no more words are required */ boolean addWord(char[] word, int wordOffset, int wordLength, int score, int dicTypeId, - DataType dataType); + int dataType); } /** @@ -64,7 +63,7 @@ public abstract class Dictionary { * @param composer the key sequence to match * @param callback the callback object to send matched words to as possible candidates * @param proximityInfo the object for key proximity. May be ignored by some implementations. - * @see WordCallback#addWord(char[], int, int, int, int, DataType) + * @see WordCallback#addWord(char[], int, int, int, int, int) */ abstract public void getWords(final WordComposer composer, final WordCallback callback, final ProximityInfo proximityInfo); diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index 7eec8e2ca..8e8adc1c2 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -301,7 +301,7 @@ public class ExpandableDictionary extends Dictionary { finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength); } if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, - DataType.UNIGRAM)) { + Dictionary.UNIGRAM)) { return; } } @@ -342,7 +342,7 @@ public class ExpandableDictionary extends Dictionary { snr * addedAttenuation, mInputLength); } callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, - DataType.UNIGRAM); + Dictionary.UNIGRAM); } } if (children != null) { @@ -506,7 +506,7 @@ public class ExpandableDictionary extends Dictionary { } while (node != null); callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId, - DataType.BIGRAM); + Dictionary.BIGRAM); } } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 2c7b40960..f2fa7880f 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1632,7 +1632,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar private CharSequence getTextWithUnderline(final CharSequence text) { return mComposingStateManager.isAutoCorrectionIndicatorOn() ? SuggestionSpanUtils.getTextWithAutoCorrectionIndicatorUnderline(this, text) - : mWordComposer.getTypedWord(); + : text; } private void handleClose() { @@ -2239,10 +2239,14 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar final CharSequence textBeforeCursor = ic.getTextBeforeCursor(2, 0); // NOTE: This does not work with surrogate pairs. Hopefully when the keyboard is able to // enter surrogate pairs this code will have been removed. - if (Keyboard.CODE_SPACE != textBeforeCursor.charAt(1)) { - // We should not have come here if the text before the cursor is not a space. - throw new RuntimeException("Tried to revert a swap of punctuation but we didn't " + if (TextUtils.isEmpty(textBeforeCursor) + || (Keyboard.CODE_SPACE != textBeforeCursor.charAt(1))) { + // We may only come here if the application is changing the text while we are typing. + // This is quite a broken case, but not logically impossible, so we shouldn't crash, + // but some debugging log may be in order. + Log.d(TAG, "Tried to revert a swap of punctuation but we didn't " + "find a space just before the cursor."); + return false; } ic.beginBatchEdit(); ic.deleteSurroundingText(2, 0); diff --git a/java/src/com/android/inputmethod/latin/LatinImeLogger.java b/java/src/com/android/inputmethod/latin/LatinImeLogger.java index 6f1adfe71..e3dadf250 100644 --- a/java/src/com/android/inputmethod/latin/LatinImeLogger.java +++ b/java/src/com/android/inputmethod/latin/LatinImeLogger.java @@ -21,7 +21,6 @@ import android.content.SharedPreferences; import android.view.inputmethod.EditorInfo; import com.android.inputmethod.keyboard.Keyboard; -import com.android.inputmethod.latin.Dictionary.DataType; import java.util.List; @@ -75,7 +74,7 @@ public class LatinImeLogger implements SharedPreferences.OnSharedPreferenceChang public static void onStartSuggestion(CharSequence previousWords) { } - public static void onAddSuggestedWord(String word, int typeId, DataType dataType) { + public static void onAddSuggestedWord(String word, int typeId, int dataType) { } public static void onSetKeyboard(Keyboard kb) { diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 8e0d031f4..f6e177aaf 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -298,7 +298,7 @@ public class Suggest implements Dictionary.WordCallback { if (typedWord != null) { // Treating USER_TYPED as UNIGRAM suggestion for logging now. LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, - Dictionary.DataType.UNIGRAM); + Dictionary.UNIGRAM); } mConsideredWord = consideredWord; @@ -417,12 +417,12 @@ public class Suggest implements Dictionary.WordCallback { @Override public boolean addWord(final char[] word, final int offset, final int length, int score, - final int dicTypeId, final Dictionary.DataType dataType) { - Dictionary.DataType dataTypeForLog = dataType; + final int dicTypeId, final int dataType) { + int dataTypeForLog = dataType; final ArrayList<CharSequence> suggestions; final int[] sortedScores; final int prefMaxSuggestions; - if(dataType == Dictionary.DataType.BIGRAM) { + if (dataType == Dictionary.BIGRAM) { suggestions = mBigramSuggestions; sortedScores = mBigramScores; prefMaxSuggestions = PREF_MAX_BIGRAMS; @@ -450,11 +450,11 @@ public class Suggest implements Dictionary.WordCallback { } } } else { - if (dataType == Dictionary.DataType.UNIGRAM) { + if (dataType == Dictionary.UNIGRAM) { // Check if the word was already added before (by bigram data) int bigramSuggestion = searchBigramSuggestion(word,offset,length); if(bigramSuggestion >= 0) { - dataTypeForLog = Dictionary.DataType.BIGRAM; + dataTypeForLog = Dictionary.BIGRAM; // turn freq from bigram into multiplier specified above double multiplier = (((double) mBigramScores[bigramSuggestion]) / MAXIMUM_BIGRAM_FREQUENCY) diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 39e47f661..88ac043ed 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -31,7 +31,6 @@ import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.Dictionary.DataType; import com.android.inputmethod.latin.Dictionary.WordCallback; import com.android.inputmethod.latin.DictionaryCollection; import com.android.inputmethod.latin.DictionaryFactory; @@ -237,7 +236,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService @Override synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, - int dicTypeId, DataType dataType) { + int dicTypeId, int dataType) { final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); // binarySearch returns the index if the element exists, and -<insertion index> - 1 // if it doesn't. See documentation for binarySearch. diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 63dd283c8..2458bca86 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -269,7 +269,7 @@ bool Correction::needsToPrune() const { // TODO: use edit distance here return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance // Allow one char longer word for missing character - || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength)); + || (!mDoAutoCompletion && (mOutputIndex > mInputLength)); } void Correction::addCharToCurrentWord(const int32_t c) { @@ -555,55 +555,6 @@ Correction::CorrectionType Correction::processCharAndCalcState( Correction::~Correction() { } -///////////////////////// -// static inline utils // -///////////////////////// - -static const int TWO_31ST_DIV_255 = S_INT_MAX / 255; -static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) { - return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX); -} - -static const int TWO_31ST_DIV_2 = S_INT_MAX / 2; -inline static void multiplyIntCapped(const int multiplier, int *base) { - const int temp = *base; - if (temp != S_INT_MAX) { - // Branch if multiplier == 2 for the optimization - if (multiplier == 2) { - *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX; - } else { - // TODO: This overflow check gives a wrong answer when, for example, - // temp = 2^16 + 1 and multiplier = 2^17 + 1. - // Fix this behavior. - const int tempRetval = temp * multiplier; - *base = tempRetval >= temp ? tempRetval : S_INT_MAX; - } - } -} - -inline static int powerIntCapped(const int base, const int n) { - if (n <= 0) return 1; - if (base == 2) { - return n < 31 ? 1 << n : S_INT_MAX; - } else { - int ret = base; - for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); - return ret; - } -} - -inline static void multiplyRate(const int rate, int *freq) { - if (*freq != S_INT_MAX) { - if (*freq > 1000000) { - *freq /= 100; - multiplyIntCapped(rate, freq); - } else { - multiplyIntCapped(rate, freq); - *freq /= 100; - } - } -} - inline static int getQuoteCount(const unsigned short* word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { @@ -939,102 +890,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); } - multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); - - if (capitalizedWordDemotion) { - multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); - } - - return totalFreq; -} - -/* static */ -int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld( - const int firstFreq, const int secondFreq, const Correction* correction, - const unsigned short *word) { - const int spaceProximityPos = correction->mSpaceProximityPos; - const int missingSpacePos = correction->mMissingSpacePos; - if (DEBUG_DICT) { - int inputCount = 0; - if (spaceProximityPos >= 0) ++inputCount; - if (missingSpacePos >= 0) ++inputCount; - assert(inputCount <= 1); - } - const bool isSpaceProximity = spaceProximityPos >= 0; - const int inputLength = correction->mInputLength; - const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) - : (inputLength - missingSpacePos); - const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; - - bool firstCapitalizedWordDemotion = false; - if (firstWordLength >= 2) { - firstCapitalizedWordDemotion = isUpperCase(word[0]); - } - - bool secondCapitalizedWordDemotion = false; - if (secondWordLength >= 2) { - secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); - } - - const bool capitalizedWordDemotion = - firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; - - if (DEBUG_DICT_FULL) { - AKLOGI("Two words: %c, %c, %d", - word[0], word[firstWordLength + 1], capitalizedWordDemotion); - } - - if (firstWordLength == 0 || secondWordLength == 0) { - return 0; - } - const int firstDemotionRate = 100 - 100 / (firstWordLength + 1); - int tempFirstFreq = firstFreq; - multiplyRate(firstDemotionRate, &tempFirstFreq); - - const int secondDemotionRate = 100 - 100 / (secondWordLength + 1); - int tempSecondFreq = secondFreq; - multiplyRate(secondDemotionRate, &tempSecondFreq); - - const int totalLength = firstWordLength + secondWordLength; - - // Promote pairFreq with multiplying by 2, because the word length is the same as the typed - // length. - int totalFreq = tempFirstFreq + tempSecondFreq; - - // This is a workaround to try offsetting the not-enough-demotion which will be done in - // calcNormalizedScore in Utils.java. - // In calcNormalizedScore the score will be demoted by (1 - 1 / length) - // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by - // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length)) - const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength); - multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq); - - // At this moment, totalFreq is calculated by the following formula: - // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1))) - // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1)) - - multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq); - - // This is another workaround to offset the demotion which will be done in - // calcNormalizedScore in Utils.java. - // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote - // the same amount because we already have adjusted the synthetic freq of this "missing or - // mistyped space" suggestion candidate above in this method. - const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength); - multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq); - if (isSpaceProximity) { - // A word pair with one space proximity correction - if (DEBUG_DICT) { - AKLOGI("Found a word pair with space proximity correction."); - } - multiplyIntCapped(typedLetterMultiplier, &totalFreq); - multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); + multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq); + } else { + multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); } - multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); - if (capitalizedWordDemotion) { multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); } diff --git a/native/src/correction.h b/native/src/correction.h index 0715551d0..aec7bbd73 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -36,6 +36,55 @@ class Correction { NOT_ON_TERMINAL } CorrectionType; + ///////////////////////// + // static inline utils // + ///////////////////////// + + static const int TWO_31ST_DIV_255 = S_INT_MAX / 255; + static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) { + return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX); + } + + static const int TWO_31ST_DIV_2 = S_INT_MAX / 2; + inline static void multiplyIntCapped(const int multiplier, int *base) { + const int temp = *base; + if (temp != S_INT_MAX) { + // Branch if multiplier == 2 for the optimization + if (multiplier == 2) { + *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX; + } else { + // TODO: This overflow check gives a wrong answer when, for example, + // temp = 2^16 + 1 and multiplier = 2^17 + 1. + // Fix this behavior. + const int tempRetval = temp * multiplier; + *base = tempRetval >= temp ? tempRetval : S_INT_MAX; + } + } + } + + inline static int powerIntCapped(const int base, const int n) { + if (n <= 0) return 1; + if (base == 2) { + return n < 31 ? 1 << n : S_INT_MAX; + } else { + int ret = base; + for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); + return ret; + } + } + + inline static void multiplyRate(const int rate, int *freq) { + if (*freq != S_INT_MAX) { + if (*freq > 1000000) { + *freq /= 100; + multiplyIntCapped(rate, freq); + } else { + multiplyIntCapped(rate, freq); + *freq /= 100; + } + } + } + Correction(const int typedLetterMultiplier, const int fullWordMultiplier); void initCorrection( const ProximityInfo *pi, const int inputLength, const int maxWordLength); @@ -103,8 +152,6 @@ class Correction { const int inputLength); static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const Correction* correction, const unsigned short *word); - static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq, - const Correction* correction, const unsigned short *word); static double calcNormalizedScore(const unsigned short* before, const int beforeLength, const unsigned short* after, const int afterLength, const int score); static int editDistance(const unsigned short* before, diff --git a/native/src/defines.h b/native/src/defines.h index 096f1fbf6..9c2d08777 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -189,6 +189,7 @@ static void prof_out(void) { #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58 +#define WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE 50 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 @@ -222,6 +223,9 @@ static void prof_out(void) { #define MAX_DEPTH_MULTIPLIER 3 +#define FIRST_WORD_INDEX 1 +#define SECOND_WORD_INDEX 2 + // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German // word in the dictionary #define DEFAULT_MAX_UMLAUT_SEARCH_DEPTH 5 diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index e998ee486..6a8973761 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -159,19 +159,26 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, } PROF_START(20); + if (DEBUG_DICT) { + double ns = queuePool->getMasterQueue()->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); + ns += 0; + AKLOGI("Max normalized score = %f", ns); + } const int suggestedWordsCount = queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { + double ns = queuePool->getMasterQueue()->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); + ns += 0; AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { -#ifdef FLAG_DBG short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; AKLOGI("%s %i", s, frequencies[j]); -#endif } } PROF_END(20); @@ -205,6 +212,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(4); // Note: This line is intentionally left blank + bool hasAutoCorrectionCandidate = false; + WordsPriorityQueue* masterQueue = queuePool->getMasterQueue(); + if (masterQueue->size() > 0) { + double nsForMaster = masterQueue->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); + hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD); + } PROF_END(4); PROF_START(5); @@ -216,7 +230,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, AKLOGI("--- Suggest missing space characters %d", i); } getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, inputLength, i, correction, queuePool); + useFullEditDistance, inputLength, i, correction, queuePool, + hasAutoCorrectionCandidate); } } PROF_END(5); @@ -236,7 +251,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } if (proximityInfo->hasSpaceProximity(x, y)) { getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, inputLength, i, correction, queuePool); + useFullEditDistance, inputLength, i, correction, queuePool, + hasAutoCorrectionCandidate); } } } @@ -281,12 +297,12 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool) { initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool, - true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); + true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool, - const bool doAutoCompletion, const int maxErrors) { + const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, @@ -305,7 +321,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, int firstChildPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - correction, &childCount, &firstChildPos, &siblingPos, queuePool); + correction, &childCount, &firstChildPos, &siblingPos, queuePool, + currentWordIndex); // Update next sibling pos correction->setTreeSiblingPos(outputIndex, siblingPos); @@ -323,31 +340,32 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Correction *correction, - WordsPriorityQueuePool* queuePool) { + WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) { getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */, - correction, queuePool); + correction, queuePool, hasAutoCorrectionCandidate); } void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Correction *correction, - WordsPriorityQueuePool* queuePool) { + WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) { getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos, - correction, queuePool); + correction, queuePool, hasAutoCorrectionCandidate); } inline void UnigramDictionary::onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, - WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { + WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, + const int currentWordIndex) { const int inputIndex = correction->getInputIndex(); const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; int wordLength; unsigned short* wordPointer; - if (addToMasterQueue) { + if ((currentWordIndex == 1) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { @@ -376,9 +394,14 @@ inline void UnigramDictionary::onTerminal(const int freq, // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { - // TODO: Check the validity of "inputIndex == wordLength" - //if (addToSubQueue && inputIndex == wordLength) { - WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); + WordsPriorityQueue *subQueue; + if (currentWordIndex == 1) { + subQueue = queuePool->getSubQueue1(inputIndex); + } else if (currentWordIndex == 2) { + subQueue = queuePool->getSubQueue2(inputIndex); + } else { + return; + } const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, inputIndex); addWord(wordPointer, wordLength, finalFreq, subQueue); @@ -388,17 +411,21 @@ inline void UnigramDictionary::onTerminal(const int freq, void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, - const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { + const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool, + const bool hasAutoCorrectionCandidate) { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; if (missingSpacePos >= 0) ++inputCount; assert(inputCount <= 1); + // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 + assert(MAX_PROXIMITY_CHARS == 16); } + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, + inputLength, correction); WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - const bool isSpaceProximity = spaceProximityPos >= 0; // First word @@ -411,26 +438,22 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo if (firstFreq > 0) { firstOutputWordLength = firstInputWordLength; firstOutputWord = mWord; - } else { - if (masterQueue->size() > 0) { - double nsForMaster = masterQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); - if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) { - // Do nothing if the highest suggestion exceeds the threshold. - return; - } - } + } else if (!hasAutoCorrectionCandidate) { WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); - if (firstWordQueue->size() < 1) { + if (!firstWordQueue || firstWordQueue->size() < 1) { return; } int score = 0; const double ns = firstWordQueue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), firstInputWordLength, &firstOutputWord, &score, &firstOutputWordLength); + if (DEBUG_DICT) { + AKLOGI("NS1 = %f, Score = %d", ns, score); + } // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. - if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) { + if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD + || firstOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { return; } firstFreq = score >> (firstOutputWordLength @@ -456,14 +479,6 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo outputWord[firstOutputWordLength] = SPACE; outputWordLength = firstOutputWordLength + 1; - //const int outputWordLength = firstOutputWordLength + secondWordLength + 1; - // Space proximity preparation - //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); - //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstOutputWordLength, - //subQueue, correction); - //getSuggestionCandidates(useFullEditDistance, firstOutputWordLength, correction, subQueue, - //false, MAX_ERRORS_FOR_TWO_WORDS); - // Second word const int secondInputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) @@ -478,9 +493,42 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo if (secondFreq > 0) { secondOutputWordLength = secondInputWordLength; secondOutputWord = mWord; + } else if (!hasAutoCorrectionCandidate) { + const int offset = secondInputWordStartPos; + initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], + codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction); + queuePool->clearSubQueue2(); + getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction, + queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX); + if (DEBUG_DICT) { + AKLOGI("Dump second word candidates %d", secondInputWordLength); + for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { + queuePool->getSubQueue2(i)->dumpTopWord(); + } + } + WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength); + if (!secondWordQueue || secondWordQueue->size() < 1) { + return; + } + int score = 0; + const double ns = secondWordQueue->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), secondInputWordLength, + &secondOutputWord, &score, &secondOutputWordLength); + if (DEBUG_DICT) { + AKLOGI("NS2 = %f, Score = %d", ns, score); + } + // Two words correction won't be done if the score of the first word doesn't exceed the + // threshold. + if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD + || secondOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { + return; + } + secondFreq = score >> (secondOutputWordLength + + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } if (DEBUG_DICT) { + DUMP_WORD(secondOutputWord, secondOutputWordLength); AKLOGI("Second freq: %d", secondFreq); } @@ -509,80 +557,6 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo return; } -void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, const int missingSpacePos, - const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { - WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - - if (DEBUG_DICT) { - int inputCount = 0; - if (spaceProximityPos >= 0) ++inputCount; - if (missingSpacePos >= 0) ++inputCount; - assert(inputCount <= 1); - } - const bool isSpaceProximity = spaceProximityPos >= 0; - const int firstWordStartPos = 0; - const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; - const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - const int secondWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) - : (inputLength - missingSpacePos); - - if (inputLength >= MAX_WORD_LENGTH) return; - if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos - || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) - return; - - const int newWordLength = firstWordLength + secondWordLength + 1; - - - // Space proximity preparation - //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); - //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, - //correction); - //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false, - //MAX_ERRORS_FOR_TWO_WORDS); - - // Allocating variable length array on stack - unsigned short word[newWordLength]; - const int firstFreq = getMostFrequentWordLike( - firstWordStartPos, firstWordLength, proximityInfo, mWord); - if (DEBUG_DICT) { - AKLOGI("First freq: %d", firstFreq); - } - if (firstFreq <= 0) return; - - for (int i = 0; i < firstWordLength; ++i) { - word[i] = mWord[i]; - } - - const int secondFreq = getMostFrequentWordLike( - secondWordStartPos, secondWordLength, proximityInfo, mWord); - if (DEBUG_DICT) { - AKLOGI("Second freq: %d", secondFreq); - } - if (secondFreq <= 0) return; - - word[firstWordLength] = SPACE; - for (int i = (firstWordLength + 1); i < newWordLength; ++i) { - word[i] = mWord[i - firstWordLength - 1]; - } - - // TODO: Remove initSuggestions and correction->setCorrectionParams - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - - correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, - -1 /* transposedPos */, spaceProximityPos, missingSpacePos, - useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); - const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); - if (DEBUG_DICT) { - AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); - } - addWord(word, newWordLength, pairFreq, masterQueue); - return; -} - // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, @@ -742,7 +716,8 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, Correction *correction, int *newCount, - int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) { + int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, + const int currentWordIndex) { if (DEBUG_DICT) { correction->checkState(); } @@ -823,7 +798,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); - onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal); + onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal, + currentWordIndex); // If there are more chars in this node, then this virtual node has children. // If we are on the last char, this virtual node has children if this node has. diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index b950971bb..0b8271954 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -99,30 +99,30 @@ class UnigramDictionary { const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, Correction *correction, - WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors); + WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors, + const int currentWordIndex); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, - const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool); - void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, - const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool); + const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool, + const bool hasAutoCorrectionCandidate); void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Correction *correction, - WordsPriorityQueuePool* queuePool); + WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate); void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Correction *correction, - WordsPriorityQueuePool* queuePool); + WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate); void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, - Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue); + Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, + const int currentWordIndex); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, - int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool); + int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, + const int currentWordIndex); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h index c85f2b9b3..e8cd983b1 100644 --- a/native/src/words_priority_queue.h +++ b/native/src/words_priority_queue.h @@ -137,7 +137,7 @@ class WordsPriorityQueue { if (size() <= 0) { return; } - DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength); + DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength); } double getHighestNormalizedScore(const unsigned short* before, const int beforeLength, diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h index 5fa254852..599b89711 100644 --- a/native/src/words_priority_queue_pool.h +++ b/native/src/words_priority_queue_pool.h @@ -45,15 +45,21 @@ class WordsPriorityQueuePool { // TODO: Come up with more generic pool WordsPriorityQueue* getSubQueue1(const int id) { - if (DEBUG_WORDS_PRIORITY_QUEUE) { - assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT); + if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { + if (DEBUG_WORDS_PRIORITY_QUEUE) { + assert(false); + } + return 0; } return mSubQueues1[id]; } WordsPriorityQueue* getSubQueue2(const int id) { - if (DEBUG_WORDS_PRIORITY_QUEUE) { - assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT); + if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { + if (DEBUG_WORDS_PRIORITY_QUEUE) { + assert(false); + } + return 0; } return mSubQueues2[id]; } @@ -66,6 +72,18 @@ class WordsPriorityQueuePool { } } + inline void clearSubQueue1() { + for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { + mSubQueues1[i]->clear(); + } + } + + inline void clearSubQueue2() { + for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { + mSubQueues2[i]->clear(); + } + } + void dumpSubQueue1TopSuggestions() { AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS"); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { diff --git a/tests/src/com/android/inputmethod/latin/InputLogicTests.java b/tests/src/com/android/inputmethod/latin/InputLogicTests.java index 59ca22df4..7b87be6d3 100644 --- a/tests/src/com/android/inputmethod/latin/InputLogicTests.java +++ b/tests/src/com/android/inputmethod/latin/InputLogicTests.java @@ -177,4 +177,19 @@ public class InputLogicTests extends ServiceTestCase<LatinIME> { type(STRING_TO_TYPE); assertEquals("simple auto-correct", EXPECTED_RESULT, mTextView.getText().toString()); } + + public void testDoubleSpace() { + final String STRING_TO_TYPE = "this "; + final String EXPECTED_RESULT = "this. "; + type(STRING_TO_TYPE); + assertEquals("double space make a period", EXPECTED_RESULT, mTextView.getText().toString()); + } + + public void testCancelDoubleSpace() { + final String STRING_TO_TYPE = "tgis "; + final String EXPECTED_RESULT = "this "; + type(STRING_TO_TYPE); + type(Keyboard.CODE_DELETE); + assertEquals("double space make a period", EXPECTED_RESULT, mTextView.getText().toString()); + } } |