diff options
-rw-r--r-- | java/src/com/android/inputmethod/latin/LastComposedWord.java | 74 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/LatinIME.java | 63 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/WordComposer.java | 73 | ||||
-rw-r--r-- | native/src/defines.h | 1 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 205 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 7 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 37 |
7 files changed, 254 insertions, 206 deletions
diff --git a/java/src/com/android/inputmethod/latin/LastComposedWord.java b/java/src/com/android/inputmethod/latin/LastComposedWord.java new file mode 100644 index 000000000..767c3a7da --- /dev/null +++ b/java/src/com/android/inputmethod/latin/LastComposedWord.java @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.text.TextUtils; + +import java.util.ArrayList; + +/** + * This class encapsulates data about a word previously composed, but that has been + * committed already. This is used for resuming suggestion, and cancel auto-correction. + */ +public class LastComposedWord { + // COMMIT_TYPE_USER_TYPED_WORD is used when the word committed is the exact typed word, with + // no hinting from the IME. It happens when some external event happens (rotating the device, + // for example) or when auto-correction is off by settings or editor attributes. + public static final int COMMIT_TYPE_USER_TYPED_WORD = 0; + // COMMIT_TYPE_MANUAL_PICK is used when the user pressed a field in the suggestion strip. + public static final int COMMIT_TYPE_MANUAL_PICK = 1; + // COMMIT_TYPE_DECIDED_WORD is used when the IME commits the word it decided was best + // for the current user input. It may be different from what the user typed (true auto-correct) + // or it may be exactly what the user typed if it's in the dictionary or the IME does not have + // enough confidence in any suggestion to auto-correct (auto-correct to typed word). + public static final int COMMIT_TYPE_DECIDED_WORD = 2; + // COMMIT_TYPE_CANCEL_AUTO_CORRECT is used upon committing back the old word upon cancelling + // an auto-correction. + public static final int COMMIT_TYPE_CANCEL_AUTO_CORRECT = 3; + + public final int mType; + public final ArrayList<int[]> mCodes; + public final int[] mXCoordinates; + public final int[] mYCoordinates; + public final String mTypedWord; + public final String mAutoCorrection; + + private boolean mActive; + + public static final LastComposedWord NOT_A_COMPOSED_WORD = + new LastComposedWord(COMMIT_TYPE_USER_TYPED_WORD, null, null, null, "", ""); + + public LastComposedWord(final int type, final ArrayList<int[]> codes, final int[] xCoordinates, + final int[] yCoordinates, final String typedWord, final String autoCorrection) { + mType = type; + mCodes = codes; + mXCoordinates = xCoordinates; + mYCoordinates = yCoordinates; + mTypedWord = typedWord; + mAutoCorrection = autoCorrection; + mActive = true; + } + + public void deactivate() { + mActive = false; + } + + public boolean canCancelAutoCorrect() { + return mActive && !TextUtils.isEmpty(mAutoCorrection) + && !TextUtils.equals(mTypedWord, mAutoCorrection); + } +} diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index f2fa7880f..2e7e82637 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -199,6 +199,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar private UserUnigramDictionary mUserUnigramDictionary; private boolean mIsUserDictionaryAvailable; + private LastComposedWord mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD; private WordComposer mWordComposer = new WordComposer(); private int mCorrectionMode; @@ -769,7 +770,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar inputView.closing(); mEnteredText = null; - mWordComposer.reset(); + resetComposingState(true /* alsoResetLastComposedWord */); mDeleteCount = 0; mSpaceState = SPACE_STATE_NONE; @@ -881,7 +882,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar if (((mWordComposer.isComposingWord()) || mVoiceProxy.isVoiceInputHighlighted()) && (selectionChanged || candidatesCleared)) { - mWordComposer.reset(); + resetComposingState(true /* alsoResetLastComposedWord */); updateSuggestions(); final InputConnection ic = getCurrentInputConnection(); if (ic != null) { @@ -890,7 +891,9 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar mComposingStateManager.onFinishComposingText(); mVoiceProxy.setVoiceInputHighlighted(false); } else if (!mWordComposer.isComposingWord()) { - mWordComposer.reset(); + // TODO: is the following reset still needed, given that we are not composing + // a word? + resetComposingState(true /* alsoResetLastComposedWord */); updateSuggestions(); } } @@ -974,7 +977,9 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar .setHasMinimalSuggestion(false); // When in fullscreen mode, show completions generated by the application setSuggestions(builder.build()); - mWordComposer.deleteAutoCorrection(); + // TODO: is this the right thing to do? What should we auto-correct to in + // this case? This says to keep whatever the user typed. + mWordComposer.setAutoCorrection(mWordComposer.getTypedWord()); setSuggestionStripShown(true); } } @@ -1093,10 +1098,16 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar return super.onKeyUp(keyCode, event); } + private void resetComposingState(final boolean alsoResetLastComposedWord) { + mWordComposer.reset(); + if (alsoResetLastComposedWord) + mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD; + } + public void commitTyped(final InputConnection ic) { if (!mWordComposer.isComposingWord()) return; final CharSequence typedWord = mWordComposer.getTypedWord(); - mWordComposer.onCommitWord(WordComposer.COMMIT_TYPE_USER_TYPED_WORD); + mLastComposedWord = mWordComposer.commitWord(LastComposedWord.COMMIT_TYPE_USER_TYPED_WORD); if (typedWord.length() > 0) { if (ic != null) { ic.commitText(typedWord, 1); @@ -1263,6 +1274,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar mHandler.cancelDoubleSpacesTimer(); } + boolean didAutoCorrect = false; switch (primaryCode) { case Keyboard.CODE_DELETE: mSpaceState = SPACE_STATE_NONE; @@ -1299,7 +1311,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar default: mSpaceState = SPACE_STATE_NONE; if (mSettingsValues.isWordSeparator(primaryCode)) { - handleSeparator(primaryCode, x, y, spaceState); + didAutoCorrect = handleSeparator(primaryCode, x, y, spaceState); } else { handleCharacter(primaryCode, keyCodes, x, y, spaceState); } @@ -1308,6 +1320,8 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar } switcher.onCodeInput(primaryCode); // Reset after any single keystroke + if (!didAutoCorrect) + mLastComposedWord.deactivate(); mEnteredText = null; } @@ -1325,7 +1339,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar mKeyboardSwitcher.onCodeInput(Keyboard.CODE_OUTPUT_TEXT); mSpaceState = SPACE_STATE_NONE; mEnteredText = text; - mWordComposer.reset(); + resetComposingState(true /* alsoResetLastComposedWord */); } @Override @@ -1383,7 +1397,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar // resuming here. The behavior needs to be different according to text field types, // and it would be much clearer to test for them explicitly here rather than // relying on implicit values like "whether the suggestion strip is displayed". - if (mWordComposer.didAutoCorrectToAnotherWord()) { + if (mLastComposedWord.canCancelAutoCorrect()) { Utils.Stats.onAutoCorrectionCancellation(); cancelAutoCorrect(ic); return; @@ -1495,7 +1509,11 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar // separator and it should be treated as a normal character, except in the first // position where it should not start composing a word. isComposingWord = (Keyboard.CODE_SINGLE_QUOTE != code); - mWordComposer.reset(); + // Here we don't need to reset the last composed word. It will be reset + // when we commit this one, if we ever do; if on the other hand we backspace + // it entirely and resume suggestions on the previous word, we'd like to still + // have touch coordinates for it. + resetComposingState(false /* alsoResetLastComposedWord */); clearSuggestions(); mComposingStateManager.onFinishComposingText(); } @@ -1547,7 +1565,8 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar } } - private void handleSeparator(final int primaryCode, final int x, final int y, + // Returns true if we did an autocorrection, false otherwise. + private boolean handleSeparator(final int primaryCode, final int x, final int y, final int spaceState) { mVoiceProxy.handleSeparator(); mComposingStateManager.onFinishComposingText(); @@ -1558,6 +1577,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar mHandler.postUpdateSuggestions(); } + boolean didAutoCorrect = false; // Handle separator final InputConnection ic = getCurrentInputConnection(); if (ic != null) { @@ -1572,6 +1592,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar && !mInputAttributes.mInputTypeNoAutoCorrect; if (shouldAutoCorrect && primaryCode != Keyboard.CODE_SINGLE_QUOTE) { commitCurrentAutoCorrection(primaryCode, ic); + didAutoCorrect = true; } else { commitTyped(ic); } @@ -1627,6 +1648,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar if (ic != null) { ic.endBatchEdit(); } + return didAutoCorrect; } private CharSequence getTextWithUnderline(final CharSequence text) { @@ -1847,7 +1869,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar } Utils.Stats.onAutoCorrection(typedWord, autoCorrection.toString(), separatorCodePoint); mExpectingUpdateSelection = true; - commitChosenWord(autoCorrection, WordComposer.COMMIT_TYPE_DECIDED_WORD); + commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD); // Add the word to the user unigram dictionary if it's not a known word addToUserUnigramAndBigramDictionaries(autoCorrection, UserUnigramDictionary.FREQUENCY_FOR_TYPED); @@ -1917,7 +1939,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar LatinImeLogger.logOnManualSuggestion(mWordComposer.getTypedWord().toString(), suggestion.toString(), index, suggestions.mWords); mExpectingUpdateSelection = true; - commitChosenWord(suggestion, WordComposer.COMMIT_TYPE_MANUAL_PICK); + commitChosenWord(suggestion, LastComposedWord.COMMIT_TYPE_MANUAL_PICK); // Add the word to the auto dictionary if it's not a known word if (index == 0) { addToUserUnigramAndBigramDictionaries(suggestion, @@ -1985,9 +2007,9 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar } // TODO: figure out here if this is an auto-correct or if the best word is actually // what user typed. Note: currently this is done much later in - // WordComposer#didAutoCorrectToAnotherWord by string equality of the remembered + // LastComposedWord#canCancelAutoCorrect by string equality of the remembered // strings. - mWordComposer.onCommitWord(commitType); + mLastComposedWord = mWordComposer.commitWord(commitType); } private static final WordComposer sEmptyWordComposer = new WordComposer(); @@ -2151,12 +2173,14 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar // "ic" must not be null private void cancelAutoCorrect(final InputConnection ic) { - mWordComposer.resumeSuggestionOnKeptWord(); - final String originallyTypedWord = mWordComposer.getTypedWord(); - final CharSequence autoCorrectedTo = mWordComposer.getAutoCorrectionOrNull(); + final String originallyTypedWord = mLastComposedWord.mTypedWord; + final CharSequence autoCorrectedTo = mLastComposedWord.mAutoCorrection; final int cancelLength = autoCorrectedTo.length(); final CharSequence separator = ic.getTextBeforeCursor(1, 0); if (DEBUG) { + if (mWordComposer.isComposingWord()) { + throw new RuntimeException("cancelAutoCorrect, but we are composing a word"); + } final String wordBeforeCursor = ic.getTextBeforeCursor(cancelLength + 1, 0).subSequence(0, cancelLength) .toString(); @@ -2175,8 +2199,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar ic.commitText(originallyTypedWord, 1); // Re-insert the separator ic.commitText(separator, 1); - mWordComposer.deleteAutoCorrection(); - mWordComposer.onCommitWord(WordComposer.COMMIT_TYPE_CANCEL_AUTO_CORRECT); + mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD; Utils.Stats.onSeparator(separator.charAt(0), WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); mHandler.cancelUpdateBigramPredictions(); @@ -2190,7 +2213,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar // Note: in the interest of code simplicity, we may want to just call // restartSuggestionsOnWordBeforeCursorIfAtEndOfWord instead, but retrieving // the old WordComposer allows to reuse the actual typed coordinates. - mWordComposer.resumeSuggestionOnKeptWord(); + mWordComposer.resumeSuggestionOnLastComposedWord(mLastComposedWord); // We resume suggestion, and then we want to set the composing text to the content // of the word composer again. But since we just manually picked a word, there is // no composing text at the moment, so we have to delete the word before we set a diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index e95dcfdc9..bf132ed8c 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -33,23 +33,6 @@ public class WordComposer { public static final int NOT_A_CODE = KeyDetector.NOT_A_CODE; public static final int NOT_A_COORDINATE = -1; - // TODO: Straighten out commit behavior so that the flags here are more understandable, - // and possibly adjust their names. - // COMMIT_TYPE_USER_TYPED_WORD is used when the word committed is the exact typed word, with - // no hinting from the IME. It happens when some external event happens (rotating the device, - // for example) or when auto-correction is off by settings or editor attributes. - public static final int COMMIT_TYPE_USER_TYPED_WORD = 0; - // COMMIT_TYPE_MANUAL_PICK is used when the user pressed a field in the suggestion strip. - public static final int COMMIT_TYPE_MANUAL_PICK = 1; - // COMMIT_TYPE_DECIDED_WORD is used when the IME commits the word it decided was best - // for the current user input. It may be different from what the user typed (true auto-correct) - // or it may be exactly what the user typed if it's in the dictionary or the IME does not have - // enough confidence in any suggestion to auto-correct (auto-correct to typed word). - public static final int COMMIT_TYPE_DECIDED_WORD = 2; - // COMMIT_TYPE_CANCEL_AUTO_CORRECT is used upon committing back the old word upon cancelling - // an auto-correction. - public static final int COMMIT_TYPE_CANCEL_AUTO_CORRECT = 3; - // Storage for all the info about the current input. private static class CharacterStore { /** @@ -84,8 +67,6 @@ public class WordComposer { // The currently typing word. May not be null. private CharacterStore mCurrentWord; - // The information being kept for resuming suggestion. May be null if wiped. - private CharacterStore mCommittedWordSavedForSuggestionResuming; private int mCapsCount; @@ -100,7 +81,6 @@ public class WordComposer { public WordComposer() { mCurrentWord = new CharacterStore(); - mCommittedWordSavedForSuggestionResuming = null; mTrailingSingleQuotesCount = 0; } @@ -110,7 +90,6 @@ public class WordComposer { public void init(WordComposer source) { mCurrentWord = new CharacterStore(source.mCurrentWord); - mCommittedWordSavedForSuggestionResuming = source.mCommittedWordSavedForSuggestionResuming; mCapsCount = source.mCapsCount; mIsFirstCharCapitalized = source.mIsFirstCharCapitalized; mAutoCapitalized = source.mAutoCapitalized; @@ -122,7 +101,6 @@ public class WordComposer { */ public void reset() { mCurrentWord.reset(); - mCommittedWordSavedForSuggestionResuming = null; mCapsCount = 0; mIsFirstCharCapitalized = false; mTrailingSingleQuotesCount = 0; @@ -218,7 +196,6 @@ public class WordComposer { int codePoint = word.charAt(i); addKeyInfo(codePoint, keyboard, keyDetector); } - mCommittedWordSavedForSuggestionResuming = null; } /** @@ -333,22 +310,14 @@ public class WordComposer { } /** - * Remove any auto-correction that may have been set. - */ - public void deleteAutoCorrection() { - mCurrentWord.mAutoCorrection = null; - } - - /** * @return the auto-correction for this word, or null if none. */ public CharSequence getAutoCorrectionOrNull() { return mCurrentWord.mAutoCorrection; } - // `type' should be one of the COMMIT_TYPE_* constants above. - public void onCommitWord(final int type) { - mCommittedWordSavedForSuggestionResuming = mCurrentWord; + // `type' should be one of the LastComposedWord.COMMIT_TYPE_* constants above. + public LastComposedWord commitWord(final int type) { // Note: currently, we come here whenever we commit a word. If it's any *other* kind than // DECIDED_WORD, we should reset mAutoCorrection so that we don't attempt to cancel later. // If it's a DECIDED_WORD, it may be an actual auto-correction by the IME, or what the user @@ -356,30 +325,26 @@ public class WordComposer { // Ideally we would also null it when it was a DECIDED_WORD that was not an auto-correct. // As it happens these two cases should behave differently, because the former can be // canceled while the latter can't. Currently, we figure this out in - // #didAutoCorrectToAnotherWord with #equals(). It would be marginally cleaner to do it - // here, but it would be slower (since we would #equals() for each commit, instead of - // only on cancel), and ultimately we want to figure it out even earlier anyway. - if (type != COMMIT_TYPE_DECIDED_WORD) { - // Only ever revert an auto-correct. - mCommittedWordSavedForSuggestionResuming.mAutoCorrection = null; - } + // LastComposedWord#didAutoCorrectToAnotherWord with #equals(). It would be marginally + // cleaner to do it here, but it would be slower (since we would #equals() for each commit, + // instead of only on cancel), and ultimately we want to figure it out even earlier anyway. + final LastComposedWord lastComposedWord = new LastComposedWord(type, mCurrentWord.mCodes, + mCurrentWord.mXCoordinates, mCurrentWord.mYCoordinates, + mCurrentWord.mTypedWord.toString(), + (type != LastComposedWord.COMMIT_TYPE_DECIDED_WORD) + || (null == mCurrentWord.mAutoCorrection) + ? null : mCurrentWord.mAutoCorrection.toString()); // TODO: improve performance by swapping buffers instead of creating a new object. mCurrentWord = new CharacterStore(); + return lastComposedWord; } - public boolean hasWordKeptForSuggestionResuming() { - return null != mCommittedWordSavedForSuggestionResuming; - } - - public void resumeSuggestionOnKeptWord() { - mCurrentWord = mCommittedWordSavedForSuggestionResuming; - mCommittedWordSavedForSuggestionResuming = null; - } - - public boolean didAutoCorrectToAnotherWord() { - return null != mCommittedWordSavedForSuggestionResuming - && !TextUtils.isEmpty(mCommittedWordSavedForSuggestionResuming.mAutoCorrection) - && !TextUtils.equals(mCommittedWordSavedForSuggestionResuming.mTypedWord, - mCommittedWordSavedForSuggestionResuming.mAutoCorrection); + public void resumeSuggestionOnLastComposedWord(final LastComposedWord lastComposedWord) { + mCurrentWord.mCodes = lastComposedWord.mCodes; + mCurrentWord.mXCoordinates = lastComposedWord.mXCoordinates; + mCurrentWord.mYCoordinates = lastComposedWord.mYCoordinates; + mCurrentWord.mTypedWord.setLength(0); + mCurrentWord.mTypedWord.append(lastComposedWord.mTypedWord); + mCurrentWord.mAutoCorrection = lastComposedWord.mAutoCorrection; } } diff --git a/native/src/defines.h b/native/src/defines.h index 9c2d08777..7e171acfd 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -217,6 +217,7 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 +#define SUB_QUEUE_MAX_WORD_INDEX 2 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 6a8973761..fd6f14af8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -260,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = queuePool->getSubQueue1(i); + WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord* sw = queue->top(); const int score = sw->mScore; @@ -395,11 +395,8 @@ inline void UnigramDictionary::onTerminal(const int freq, // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { WordsPriorityQueue *subQueue; - if (currentWordIndex == 1) { - subQueue = queuePool->getSubQueue1(inputIndex); - } else if (currentWordIndex == 2) { - subQueue = queuePool->getSubQueue2(inputIndex); - } else { + subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex); + if (!subQueue) { return; } const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, @@ -408,6 +405,78 @@ inline void UnigramDictionary::onTerminal(const int freq, } } +int UnigramDictionary::getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, Correction *correction, + WordsPriorityQueuePool* queuePool, const int inputLength, + const bool hasAutoCorrectionCandidate, const int currentWordIndex, + const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) { + unsigned short* tempOutputWord = 0; + int tempOutputWordLength = 0; + int freq = getMostFrequentWordLike( + inputWordStartPos, inputWordLength, proximityInfo, mWord); + if (freq > 0) { + tempOutputWordLength = inputWordLength; + tempOutputWord = mWord; + } else if (!hasAutoCorrectionCandidate) { + if (inputWordStartPos > 0) { + const int offset = inputWordStartPos; + initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], + codes + offset * MAX_PROXIMITY_CHARS, inputWordLength, correction); + queuePool->clearSubQueue(currentWordIndex); + getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, + queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); + if (DEBUG_DICT) { + if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) { + AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); + for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { + queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord(); + } + } + } + } + WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); + if (!queue || queue->size() < 1) { + return 0; + } + int score = 0; + const double ns = queue->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), inputWordLength, + &tempOutputWord, &score, &tempOutputWordLength); + if (DEBUG_DICT) { + AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); + } + // Two words correction won't be done if the score of the first word doesn't exceed the + // threshold. + if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD + || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { + return 0; + } + freq = score >> (tempOutputWordLength + + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); + } + if (DEBUG_DICT) { + AKLOGI("Freq(%d): %d", currentWordIndex, freq); + } + if (freq <= 0 || tempOutputWordLength <= 0 + || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) { + return 0; + } + for (int i = 0; i < tempOutputWordLength; ++i) { + outputWord[outputWordStartPos + i] = tempOutputWord[i]; + } + if ((inputWordStartPos + inputWordLength) < inputLength) { + if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) { + return 0; + } + outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; + ++tempOutputWordLength; + } + *outputWordLength = outputWordStartPos + tempOutputWordLength; + return freq; +} + void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, @@ -425,124 +494,36 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - const bool isSpaceProximity = spaceProximityPos >= 0; - - // First word - const int firstInputWordStartPos = 0; - const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - int firstFreq = getMostFrequentWordLike( - firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord); - unsigned short* firstOutputWord = 0; - int firstOutputWordLength = 0; - if (firstFreq > 0) { - firstOutputWordLength = firstInputWordLength; - firstOutputWord = mWord; - } else if (!hasAutoCorrectionCandidate) { - WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); - if (!firstWordQueue || firstWordQueue->size() < 1) { - return; - } - int score = 0; - const double ns = firstWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), firstInputWordLength, - &firstOutputWord, &score, &firstOutputWordLength); - if (DEBUG_DICT) { - AKLOGI("NS1 = %f, Score = %d", ns, score); - } - // Two words correction won't be done if the score of the first word doesn't exceed the - // threshold. - if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD - || firstOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { - return; - } - firstFreq = score >> (firstOutputWordLength - + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); - } - - if (DEBUG_DICT) { - AKLOGI("First freq: %d", firstFreq); - } - - if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) { - return; - } // Allocating fixed length array on stack unsigned short outputWord[MAX_WORD_LENGTH]; int outputWordLength = 0; - for (int i = 0; i < firstOutputWordLength; ++i) { - outputWord[i] = firstOutputWord[i]; - } + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + const bool isSpaceProximity = spaceProximityPos >= 0; - outputWord[firstOutputWordLength] = SPACE; - outputWordLength = firstOutputWordLength + 1; + // First word + int inputWordStartPos = 0; + int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; + const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength); + if (firstFreq <= 0) { + return; + } // Second word - const int secondInputWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) + inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; + inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); - const int secondInputWordStartPos = - isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; - int secondFreq = getMostFrequentWordLike( - secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord); - unsigned short* secondOutputWord = 0; - int secondOutputWordLength = 0; - - if (secondFreq > 0) { - secondOutputWordLength = secondInputWordLength; - secondOutputWord = mWord; - } else if (!hasAutoCorrectionCandidate) { - const int offset = secondInputWordStartPos; - initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], - codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction); - queuePool->clearSubQueue2(); - getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction, - queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX); - if (DEBUG_DICT) { - AKLOGI("Dump second word candidates %d", secondInputWordLength); - for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - queuePool->getSubQueue2(i)->dumpTopWord(); - } - } - WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength); - if (!secondWordQueue || secondWordQueue->size() < 1) { - return; - } - int score = 0; - const double ns = secondWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), secondInputWordLength, - &secondOutputWord, &score, &secondOutputWordLength); - if (DEBUG_DICT) { - AKLOGI("NS2 = %f, Score = %d", ns, score); - } - // Two words correction won't be done if the score of the first word doesn't exceed the - // threshold. - if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD - || secondOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { - return; - } - secondFreq = score >> (secondOutputWordLength - + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); - } - - if (DEBUG_DICT) { - DUMP_WORD(secondOutputWord, secondOutputWordLength); - AKLOGI("Second freq: %d", secondFreq); - } - - if (secondFreq <= 0 || secondOutputWordLength <= 0 - || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) { + const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord, + &outputWordLength); + if (secondFreq <= 0) { return; } - for (int i = 0; i < secondOutputWordLength; ++i) { - outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i]; - } - - outputWordLength += secondOutputWordLength; - // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 0b8271954..0f50ccbd8 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -127,6 +127,13 @@ class UnigramDictionary { ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, short unsigned int *outWord); + int getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, Correction *correction, + WordsPriorityQueuePool* queuePool, const int inputLength, + const bool hasAutoCorrectionCandidate, const int currentWordIndex, + const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h index 599b89711..a4aa8b6ca 100644 --- a/native/src/words_priority_queue_pool.h +++ b/native/src/words_priority_queue_pool.h @@ -43,25 +43,24 @@ class WordsPriorityQueuePool { return mMasterQueue; } - // TODO: Come up with more generic pool - WordsPriorityQueue* getSubQueue1(const int id) { - if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { - if (DEBUG_WORDS_PRIORITY_QUEUE) { - assert(false); - } + WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { + if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) { return 0; } - return mSubQueues1[id]; - } - - WordsPriorityQueue* getSubQueue2(const int id) { - if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { + if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) { if (DEBUG_WORDS_PRIORITY_QUEUE) { assert(false); } return 0; } - return mSubQueues2[id]; + // TODO: Come up with more generic pool + if (wordIndex == 1) { + return mSubQueues1[inputWordLength]; + } else if (wordIndex == 2) { + return mSubQueues2[inputWordLength]; + } else { + return 0; + } } inline void clearAll() { @@ -72,15 +71,13 @@ class WordsPriorityQueuePool { } } - inline void clearSubQueue1() { + inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - mSubQueues1[i]->clear(); - } - } - - inline void clearSubQueue2() { - for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - mSubQueues2[i]->clear(); + if (wordIndex == 1) { + mSubQueues1[i]->clear(); + } else if (wordIndex == 2) { + mSubQueues2[i]->clear(); + } } } |