diff options
-rw-r--r-- | java/src/com/android/inputmethod/latin/CandidateView.java | 357 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java | 141 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/SpellChecker.java | 116 | ||||
-rw-r--r-- | native/src/correction_state.cpp | 131 | ||||
-rw-r--r-- | native/src/correction_state.h | 63 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 221 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 12 |
7 files changed, 572 insertions, 469 deletions
diff --git a/java/src/com/android/inputmethod/latin/CandidateView.java b/java/src/com/android/inputmethod/latin/CandidateView.java index 9b39e36a0..915e73ccb 100644 --- a/java/src/com/android/inputmethod/latin/CandidateView.java +++ b/java/src/com/android/inputmethod/latin/CandidateView.java @@ -76,8 +76,6 @@ public class CandidateView extends LinearLayout implements OnClickListener { private final ArrayList<TextView> mInfos = new ArrayList<TextView>(); private final ArrayList<View> mDividers = new ArrayList<View>(); - private final int mCandidateStripHeight; - private final PopupWindow mPreviewPopup; private final TextView mPreviewText; @@ -149,19 +147,113 @@ public class CandidateView extends LinearLayout implements OnClickListener { public final int mDividerWidth; public final int mDividerHeight; public final int mControlWidth; + public final int mCandidateStripHeight; + + protected final List<TextView> mWords; + protected final List<View> mDividers; + protected final List<TextView> mInfos; - protected CandidateViewParams(TextView word, View divider, View control) { + protected CandidateViewParams(List<TextView> words, List<View> dividers, + List<TextView> infos, View control) { + mWords = words; + mDividers = dividers; + mInfos = infos; + + final TextView word = words.get(0); + final View divider = dividers.get(0); mPadding = word.getCompoundPaddingLeft() + word.getCompoundPaddingRight(); divider.measure(WRAP_CONTENT, MATCH_PARENT); mDividerWidth = divider.getMeasuredWidth(); mDividerHeight = divider.getMeasuredHeight(); mControlWidth = control.getMeasuredWidth(); + + final Resources res = word.getResources(); + mCandidateStripHeight = res.getDimensionPixelOffset(R.dimen.candidate_strip_height); } } private static class SuggestionsPaneParams extends CandidateViewParams { - public SuggestionsPaneParams(List<TextView> words, List<View> dividers, View control) { - super(words.get(0), dividers.get(0), control); + public SuggestionsPaneParams(List<TextView> words, List<View> dividers, + List<TextView> infos, View control) { + super(words, dividers, infos, control); + } + + public int layout(SuggestedWords suggestions, ViewGroup paneView, int from, int textColor, + int paneWidth) { + final int count = Math.min(mWords.size(), suggestions.size()); + View centeringFrom = null, lastView = null; + int x = 0, y = 0; + for (int index = from; index < count; index++) { + final int pos = index; + final TextView word = mWords.get(pos); + final View divider = mDividers.get(pos); + final TextPaint paint = word.getPaint(); + word.setTextColor(textColor); + final CharSequence styled = suggestions.getWord(pos); + + final TextView info; + if (DBG) { + final CharSequence debugInfo = getDebugInfo(suggestions, index); + if (debugInfo != null) { + info = mInfos.get(index); + info.setText(debugInfo); + } else { + info = null; + } + } else { + info = null; + } + + final CharSequence text; + final float scaleX; + paint.setTextScaleX(1.0f); + final int textWidth = getTextWidth(styled, paint); + int available = paneWidth - x - mPadding; + if (textWidth >= available) { + // Needs new row, centering previous row. + centeringCandidates(paneView, centeringFrom, lastView, x, paneWidth); + x = 0; + y += mCandidateStripHeight; + } + if (x != 0) { + // Add divider if this isn't the left most suggestion in current row. + paneView.addView(divider); + FrameLayoutCompatUtils.placeViewAt(divider, x, y + + (mCandidateStripHeight - mDividerHeight) / 2, mDividerWidth, + mDividerHeight); + x += mDividerWidth; + } + available = paneWidth - x - mPadding; + text = getEllipsizedText(styled, available, paint); + scaleX = paint.getTextScaleX(); + word.setText(text); + word.setTextScaleX(scaleX); + paneView.addView(word); + lastView = word; + if (x == 0) + centeringFrom = word; + word.measure(WRAP_CONTENT, + MeasureSpec.makeMeasureSpec(mCandidateStripHeight, MeasureSpec.EXACTLY)); + final int width = word.getMeasuredWidth(); + final int height = word.getMeasuredHeight(); + FrameLayoutCompatUtils.placeViewAt(word, x, y + (mCandidateStripHeight - height) + / 2, width, height); + x += width; + if (info != null) { + paneView.addView(info); + lastView = info; + info.measure(WRAP_CONTENT, WRAP_CONTENT); + final int infoWidth = info.getMeasuredWidth(); + FrameLayoutCompatUtils.placeViewAt(info, x - infoWidth, y, infoWidth, + info.getMeasuredHeight()); + } + } + if (x != 0) { + // Centering last candidates row. + centeringCandidates(paneView, centeringFrom, lastView, x, paneWidth); + } + + return count - from; } } @@ -182,25 +274,25 @@ public class CandidateView extends LinearLayout implements OnClickListener { private static final int AUTO_CORRECT_UNDERLINE = 0x02; private static final int AUTO_CORRECT_INVERT = 0x04; - public final TextPaint mPaint; + private final TextPaint mPaint; private final int mAutoCorrectHighlight; private final ArrayList<CharSequence> mTexts = new ArrayList<CharSequence>(); private SuggestedWords mSuggestedWords; - public int mCountInStrip; + private int mCountInStrip; // True if the mCountInStrip suggestions can fit in suggestion strip in equally divided // width without squeezing the text. - public boolean mCanUseFixedWidthColumns; - public int mMaxWidth; - public int mAvailableWidthForWords; - public int mConstantWidthForPaddings; - public int mVariableWidthForWords; - public float mScaleX; + private boolean mCanUseFixedWidthColumns; + private int mMaxWidth; + private int mAvailableWidthForWords; + private int mConstantWidthForPaddings; + private int mVariableWidthForWords; + private float mScaleX; public SuggestionsStripParams(Context context, AttributeSet attrs, int defStyle, - List<TextView> words, List<View> dividers, View control) { - super(words.get(0), dividers.get(0), control); + List<TextView> words, List<View> dividers, List<TextView> infos, View control) { + super(words, dividers, infos, control); final TypedArray a = context.obtainStyledAttributes( attrs, R.styleable.CandidateView, defStyle, R.style.CandidateViewStyle); mAutoCorrectHighlight = a.getInt(R.styleable.CandidateView_autoCorrectHighlight, 0); @@ -220,24 +312,11 @@ public class CandidateView extends LinearLayout implements OnClickListener { mPaint.setTextSize(textSize); } - public CharSequence getWord(int pos) { - return mTexts.get(pos); + public int getTextColor() { + return mColorTypedWord; } - public CharSequence getDebugInfo(int pos) { - if (DBG) { - final SuggestedWordInfo wordInfo = mSuggestedWords.getInfo(pos); - if (wordInfo != null) { - final CharSequence debugInfo = wordInfo.getDebugString(); - if (!TextUtils.isEmpty(debugInfo)) { - return debugInfo; - } - } - } - return null; - } - - public CharSequence getStyledCandidateWord(CharSequence word, boolean isAutoCorrect) { + private CharSequence getStyledCandidateWord(CharSequence word, boolean isAutoCorrect) { if (!isAutoCorrect) return word; final int len = word.length(); @@ -249,7 +328,7 @@ public class CandidateView extends LinearLayout implements OnClickListener { return spannedWord; } - public int getWordPosition(int index) { + private int getWordPosition(int index) { if (index >= 2) { return index; } @@ -258,7 +337,7 @@ public class CandidateView extends LinearLayout implements OnClickListener { return willAutoCorrect ? 1 - index : index; } - public int getCandidateTextColor(int pos) { + private int getCandidateTextColor(int pos) { final SuggestedWords suggestions = mSuggestedWords; final boolean isAutoCorrect = suggestions.mHasMinimalSuggestion && ((pos == 1 && !suggestions.mTypedWordValid) @@ -300,7 +379,8 @@ public class CandidateView extends LinearLayout implements OnClickListener { return word; } - public void layoutStrip(SuggestedWords suggestions, int maxWidth) { + public int layout(SuggestedWords suggestions, ViewGroup stripView, ViewGroup paneView, + int stripWidth) { mSuggestedWords = suggestions; final int maxCount = suggestions.isPunctuationSuggestions() ? PUNCTUATIONS_IN_STRIP : mCandidateCountInStrip; @@ -308,7 +388,68 @@ public class CandidateView extends LinearLayout implements OnClickListener { setupTexts(suggestions, size); mCountInStrip = Math.min(maxCount, size); mScaleX = 1.0f; + calculateParameters(size, stripWidth); + + int infoX = 0; + for (int index = 0; index < mCountInStrip; index++) { + final int pos = getWordPosition(index); + final TextView word = mWords.get(pos); + final View divider = mDividers.get(pos); + final TextPaint paint = word.getPaint(); + // TODO: Reorder candidates in strip as appropriate. The center candidate should + // hold the word when space is typed (valid typed word or auto corrected word). + word.setTextColor(getCandidateTextColor(pos)); + final CharSequence styled = mTexts.get(pos); + + final TextView info; + if (DBG) { + final CharSequence debugInfo = getDebugInfo(mSuggestedWords, index); + if (debugInfo != null) { + info = mInfos.get(index); + info.setText(debugInfo); + } else { + info = null; + } + } else { + info = null; + } + final CharSequence text; + final float scaleX; + if (index == 0 && mCountInStrip == 1) { + text = getEllipsizedText(styled, mMaxWidth, paint); + scaleX = paint.getTextScaleX(); + } else { + text = styled; + scaleX = mScaleX; + } + word.setText(text); + word.setTextScaleX(scaleX); + if (index != 0) { + // Add divider if this isn't the left most suggestion in candidate strip. + stripView.addView(divider); + } + stripView.addView(word); + if (mCanUseFixedWidthColumns) { + setLayoutWeight(word, 1.0f, mCandidateStripHeight); + } else { + final int width = getTextWidth(text, paint) + mPadding; + setLayoutWeight(word, width, mCandidateStripHeight); + } + if (info != null) { + paneView.addView(info); + info.measure(WRAP_CONTENT, WRAP_CONTENT); + final int width = info.getMeasuredWidth(); + final int y = info.getMeasuredHeight(); + FrameLayoutCompatUtils.placeViewAt(info, infoX, 0, width, y); + infoX += width * 2; + } + } + + return mCountInStrip; + } + + private void calculateParameters(int size, int maxWidth) { do { mMaxWidth = maxWidth; if (size > mCountInStrip) { @@ -334,7 +475,7 @@ public class CandidateView extends LinearLayout implements OnClickListener { } while (mCountInStrip > 1); } - public void tryLayout() { + private void tryLayout() { final int maxCount = mCountInStrip; final int dividers = mDividerWidth * (maxCount - 1); mConstantWidthForPaddings = dividers + mPadding * maxCount; @@ -396,8 +537,7 @@ public class CandidateView extends LinearLayout implements OnClickListener { setBackgroundDrawable(LinearLayoutCompatUtils.getBackgroundDrawable( context, attrs, defStyle, R.style.CandidateViewStyle)); - Resources res = context.getResources(); - LayoutInflater inflater = LayoutInflater.from(context); + final LayoutInflater inflater = LayoutInflater.from(context); inflater.inflate(R.layout.candidates_strip, this); mPreviewPopup = new PopupWindow(context); @@ -408,7 +548,6 @@ public class CandidateView extends LinearLayout implements OnClickListener { mPreviewPopup.setBackgroundDrawable(null); mCandidatesStrip = (ViewGroup)findViewById(R.id.candidates_strip); - mCandidateStripHeight = res.getDimensionPixelOffset(R.dimen.candidate_strip_height); for (int i = 0; i < MAX_SUGGESTIONS; i++) { final TextView word = (TextView)inflater.inflate(R.layout.candidate_word, null); word.setTag(i); @@ -457,8 +596,9 @@ public class CandidateView extends LinearLayout implements OnClickListener { mCandidatesPaneControl.measure(WRAP_CONTENT, WRAP_CONTENT); mStripParams = new SuggestionsStripParams(context, attrs, defStyle, - mWords, mDividers, mCandidatesPaneControl); - mPaneParams = new SuggestionsPaneParams(mWords, mDividers, mCandidatesPaneControl); + mWords, mDividers, mInfos, mCandidatesPaneControl); + mPaneParams = new SuggestionsPaneParams( + mWords, mDividers, mInfos, mCandidatesPaneControl); } /** @@ -490,128 +630,35 @@ public class CandidateView extends LinearLayout implements OnClickListener { private void updateSuggestions() { clear(); closeCandidatesPane(); - final SuggestedWords suggestions = mSuggestions; - if (suggestions.size() == 0) + if (mSuggestions.size() == 0) return; - final int paneWidth = getWidth(); - final SuggestionsStripParams stripParams = mStripParams; - final SuggestionsPaneParams paneParams = mPaneParams; - stripParams.layoutStrip(suggestions, paneWidth); + final int width = getWidth(); + final int countInStrip = mStripParams.layout( + mSuggestions, mCandidatesStrip, mCandidatesPane, width); + final int countInPane = mPaneParams.layout( + mSuggestions, mCandidatesPane, countInStrip, mStripParams.getTextColor(), width); - final int count = Math.min(mWords.size(), suggestions.size()); - if (count <= stripParams.mCountInStrip && !DBG) { + if (countInPane <= 0 && !DBG) { mCandidatesPaneControl.setVisibility(GONE); } else { mCandidatesPaneControl.setVisibility(VISIBLE); mExpandCandidatesPane.setVisibility(VISIBLE); mExpandCandidatesPane.setEnabled(true); } + } - final int countInStrip = stripParams.mCountInStrip; - View centeringFrom = null, lastView = null; - int x = 0, y = 0, infoX = 0; - for (int index = 0; index < count; index++) { - final int pos = stripParams.getWordPosition(index); - final TextView word = mWords.get(pos); - final View divider = mDividers.get(pos); - final TextPaint paint = word.getPaint(); - // TODO: Reorder candidates in strip as appropriate. The center candidate should hold - // the word when space is typed (valid typed word or auto corrected word). - word.setTextColor(stripParams.getCandidateTextColor(pos)); - final CharSequence styled = stripParams.getWord(pos); - - final TextView info; - if (DBG) { - final CharSequence debugInfo = stripParams.getDebugInfo(index); - if (debugInfo != null) { - info = mInfos.get(index); - info.setText(debugInfo); - } else { - info = null; - } - } else { - info = null; - } - - final CharSequence text; - final float scaleX; - if (index < countInStrip) { - if (index == 0 && stripParams.mCountInStrip == 1) { - text = getEllipsizedText(styled, stripParams.mMaxWidth, paint); - scaleX = paint.getTextScaleX(); - } else { - text = styled; - scaleX = stripParams.mScaleX; - } - word.setText(text); - word.setTextScaleX(scaleX); - if (index != 0) { - // Add divider if this isn't the left most suggestion in candidate strip. - mCandidatesStrip.addView(divider); - } - mCandidatesStrip.addView(word); - if (stripParams.mCanUseFixedWidthColumns) { - setLayoutWeight(word, 1.0f, mCandidateStripHeight); - } else { - final int width = getTextWidth(text, paint) + stripParams.mPadding; - setLayoutWeight(word, width, mCandidateStripHeight); - } - if (info != null) { - mCandidatesPane.addView(info); - info.measure(WRAP_CONTENT, WRAP_CONTENT); - final int width = info.getMeasuredWidth(); - y = info.getMeasuredHeight(); - FrameLayoutCompatUtils.placeViewAt(info, infoX, 0, width, y); - infoX += width * 2; - } - } else { - paint.setTextScaleX(1.0f); - final int textWidth = getTextWidth(styled, paint); - int available = paneWidth - x - paneParams.mPadding; - if (textWidth >= available) { - // Needs new row, centering previous row. - centeringCandidates(centeringFrom, lastView, x, paneWidth); - x = 0; - y += mCandidateStripHeight; - } - if (x != 0) { - // Add divider if this isn't the left most suggestion in current row. - mCandidatesPane.addView(divider); - FrameLayoutCompatUtils.placeViewAt( - divider, x, y + (mCandidateStripHeight - paneParams.mDividerHeight) / 2, - paneParams.mDividerWidth, paneParams.mDividerHeight); - x += paneParams.mDividerWidth; - } - available = paneWidth - x - paneParams.mPadding; - text = getEllipsizedText(styled, available, paint); - scaleX = paint.getTextScaleX(); - word.setText(text); - word.setTextScaleX(scaleX); - mCandidatesPane.addView(word); - lastView = word; - if (x == 0) centeringFrom = word; - word.measure(WRAP_CONTENT, - MeasureSpec.makeMeasureSpec(mCandidateStripHeight, MeasureSpec.EXACTLY)); - final int width = word.getMeasuredWidth(); - final int height = word.getMeasuredHeight(); - FrameLayoutCompatUtils.placeViewAt( - word, x, y + (mCandidateStripHeight - height) / 2, width, height); - x += width; - if (info != null) { - mCandidatesPane.addView(info); - lastView = info; - info.measure(WRAP_CONTENT, WRAP_CONTENT); - final int infoWidth = info.getMeasuredWidth(); - FrameLayoutCompatUtils.placeViewAt( - info, x - infoWidth, y, infoWidth, info.getMeasuredHeight()); + private static CharSequence getDebugInfo(SuggestedWords suggestions, int pos) { + if (DBG) { + final SuggestedWordInfo wordInfo = suggestions.getInfo(pos); + if (wordInfo != null) { + final CharSequence debugInfo = wordInfo.getDebugString(); + if (!TextUtils.isEmpty(debugInfo)) { + return debugInfo; } } } - if (x != 0) { - // Centering last candidates row. - centeringCandidates(centeringFrom, lastView, x, paneWidth); - } + return null; } private static void setLayoutWeight(View v, float weight, int height) { @@ -624,13 +671,13 @@ public class CandidateView extends LinearLayout implements OnClickListener { } } - private void centeringCandidates(View from, View to, int width, int paneWidth) { - final ViewGroup pane = mCandidatesPane; - final int fromIndex = pane.indexOfChild(from); - final int toIndex = pane.indexOfChild(to); - final int offset = (paneWidth - width) / 2; + private static void centeringCandidates(ViewGroup parent, View from, View to, int width, + int parentWidth) { + final int fromIndex = parent.indexOfChild(from); + final int toIndex = parent.indexOfChild(to); + final int offset = (parentWidth - width) / 2; for (int index = fromIndex; index <= toIndex; index++) { - offsetMargin(pane.getChildAt(index), offset, 0); + offsetMargin(parent.getChildAt(index), offset, 0); } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 4a822d7b0..7c92bc82a 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -16,35 +16,144 @@ package com.android.inputmethod.latin.spellcheck; +import android.content.res.Resources; import android.service.textservice.SpellCheckerService; +import android.service.textservice.SpellCheckerService.Session; import android.util.Log; import android.view.textservice.SuggestionsInfo; import android.view.textservice.TextInfo; +import com.android.inputmethod.compat.ArraysCompatUtils; +import com.android.inputmethod.keyboard.Key; +import com.android.inputmethod.keyboard.ProximityInfo; +import com.android.inputmethod.latin.Dictionary; +import com.android.inputmethod.latin.Dictionary.DataType; +import com.android.inputmethod.latin.Dictionary.WordCallback; +import com.android.inputmethod.latin.DictionaryFactory; +import com.android.inputmethod.latin.Utils; +import com.android.inputmethod.latin.WordComposer; + +import java.util.Collections; +import java.util.List; +import java.util.LinkedList; +import java.util.Locale; +import java.util.Map; +import java.util.TreeMap; + /** * Service for spell checking, using LatinIME's dictionaries and mechanisms. */ public class AndroidSpellCheckerService extends SpellCheckerService { private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); private static final boolean DBG = true; + + private final static String[] emptyArray = new String[0]; + private final ProximityInfo mProximityInfo = ProximityInfo.getDummyProximityInfo(); + private final Map<String, Dictionary> mDictionaries = + Collections.synchronizedMap(new TreeMap<String, Dictionary>()); + @Override - public SuggestionsInfo getSuggestions(TextInfo textInfo, int suggestionsLimit, - String locale) { - // TODO: implement this - final String text = textInfo.getText(); - if (DBG) { - Log.w(TAG, "getSuggestions: " + text); + public Session createSession() { + return new AndroidSpellCheckerSession(); + } + + private static class SuggestionsGatherer implements WordCallback { + private final int DEFAULT_SUGGESTION_LENGTH = 16; + private final String[] mSuggestions; + private final int[] mScores; + private final int mMaxLength; + private int mLength = 0; + + SuggestionsGatherer(final int maxLength) { + mMaxLength = maxLength; + mSuggestions = new String[mMaxLength]; + mScores = new int[mMaxLength]; + } + + @Override + synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, + int dicTypeId, DataType dataType) { + final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); + // binarySearch returns the index if the element exists, and -<insertion index> - 1 + // if it doesn't. See documentation for binarySearch. + final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; + + if (mLength < mMaxLength) { + final int copyLen = mLength - insertIndex; + ++mLength; + System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); + System.arraycopy(mSuggestions, insertIndex, mSuggestions, insertIndex + 1, copyLen); + } else { + if (insertIndex == 0) return true; + System.arraycopy(mScores, 1, mScores, 0, insertIndex); + System.arraycopy(mSuggestions, 1, mSuggestions, 0, insertIndex); + } + mScores[insertIndex] = score; + mSuggestions[insertIndex] = new String(word, wordOffset, wordLength); + + return true; } - String[] candidates0 = new String[] {text, "candidate1", "candidate2", "candidate3"}; - String[] candidates1 = new String[] {text, "candidateA", "candidateB"}; - final int textLength = textInfo.getText().length() % 3; - if (textLength % 3 == 0) { - return new SuggestionsInfo(2 - | SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, candidates0); - } else if (textLength % 3 == 1) { - return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, candidates1); - } else { - return new SuggestionsInfo(0, null); + + public String[] getGatheredSuggestions() { + if (0 == mLength) return null; + + final String[] results = new String[mLength]; + for (int i = mLength - 1; i >= 0; --i) { + results[mLength - i - 1] = mSuggestions[i]; + } + return results; + } + } + + private Dictionary getDictionary(final String locale) { + Dictionary dictionary = mDictionaries.get(locale); + if (null == dictionary) { + final Resources resources = getResources(); + final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); + final Locale localeObject = Utils.constructLocaleFromString(locale); + dictionary = DictionaryFactory.createDictionaryFromManager(this, localeObject, + fallbackResourceId); + mDictionaries.put(locale, dictionary); + } + return dictionary; + } + + private class AndroidSpellCheckerSession extends Session { + @Override + public void onCreate() { + } + + // Note : this must be reentrant + /** + * Gets a list of suggestions for a specific string. This returns a list of possible + * corrections for the text passed as an arguments. It may split or group words, and + * even perform grammatical analysis. + */ + @Override + public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, + final int suggestionsLimit) { + final String locale = getLocale(); + final Dictionary dictionary = getDictionary(locale); + final String text = textInfo.getText(); + + final SuggestionsGatherer suggestionsGatherer = + new SuggestionsGatherer(suggestionsLimit); + final WordComposer composer = new WordComposer(); + final int length = text.length(); + for (int i = 0; i < length; ++i) { + int character = text.codePointAt(i); + composer.add(character, new int[] { character }, + WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); + } + dictionary.getWords(composer, suggestionsGatherer, mProximityInfo); + final boolean isInDict = dictionary.isValidWord(text); + final String[] suggestions = suggestionsGatherer.getGatheredSuggestions(); + + final int flags = + (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : 0) + | (null != suggestions + ? SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO : 0); + return new SuggestionsInfo(flags, suggestions); } } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SpellChecker.java b/java/src/com/android/inputmethod/latin/spellcheck/SpellChecker.java deleted file mode 100644 index d7283515b..000000000 --- a/java/src/com/android/inputmethod/latin/spellcheck/SpellChecker.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.latin.spellcheck; - -import android.content.Context; -import android.content.res.Resources; - -import com.android.inputmethod.compat.ArraysCompatUtils; -import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.Dictionary.DataType; -import com.android.inputmethod.latin.Dictionary.WordCallback; -import com.android.inputmethod.latin.DictionaryFactory; -import com.android.inputmethod.latin.Utils; -import com.android.inputmethod.latin.WordComposer; - -import java.util.LinkedList; -import java.util.List; -import java.util.Locale; - -/** - * Implements spell checking methods. - */ -public class SpellChecker { - - public final Dictionary mDictionary; - - public SpellChecker(final Context context, final Locale locale) { - final Resources resources = context.getResources(); - final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); - mDictionary = DictionaryFactory.createDictionaryFromManager(context, locale, - fallbackResourceId); - } - - // Note : this must be reentrant - /** - * Finds out whether a word is in the dictionary or not. - * - * @param text the sequence containing the word to check for. - * @param start the index of the first character of the word in text. - * @param end the index of the next-to-last character in text. - * @return true if the word is in the dictionary, false otherwise. - */ - public boolean isCorrect(final CharSequence text, final int start, final int end) { - return mDictionary.isValidWord(text.subSequence(start, end)); - } - - private static class SuggestionsGatherer implements WordCallback { - private final int DEFAULT_SUGGESTION_LENGTH = 16; - private final List<String> mSuggestions = new LinkedList<String>(); - private int[] mScores = new int[DEFAULT_SUGGESTION_LENGTH]; - private int mLength = 0; - - @Override - synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, - int dicTypeId, DataType dataType) { - if (mLength >= mScores.length) { - final int newLength = mScores.length * 2; - mScores = new int[newLength]; - } - final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); - // binarySearch returns the index if the element exists, and -<insertion index> - 1 - // if it doesn't. See documentation for binarySearch. - final int insertionIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; - System.arraycopy(mScores, insertionIndex, mScores, insertionIndex + 1, - mLength - insertionIndex); - mLength += 1; - mScores[insertionIndex] = score; - mSuggestions.add(insertionIndex, new String(word, wordOffset, wordLength)); - return true; - } - - public List<String> getGatheredSuggestions() { - return mSuggestions; - } - } - - // Note : this must be reentrant - /** - * Gets a list of suggestions for a specific string. - * - * This returns a list of possible corrections for the text passed as an - * arguments. It may split or group words, and even perform grammatical - * analysis. - * - * @param text the sequence containing the word to check for. - * @param start the index of the first character of the word in text. - * @param end the index of the next-to-last character in text. - * @return a list of possible suggestions to replace the text. - */ - public List<String> getSuggestions(final CharSequence text, final int start, final int end) { - final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(); - final WordComposer composer = new WordComposer(); - for (int i = start; i < end; ++i) { - int character = text.charAt(i); - composer.add(character, new int[] { character }, - WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); - } - mDictionary.getWords(composer, suggestionsGatherer, ProximityInfo.getDummyProximityInfo()); - return suggestionsGatherer.getGatheredSuggestions(); - } -} diff --git a/native/src/correction_state.cpp b/native/src/correction_state.cpp index b2c77b00d..9000e9e9c 100644 --- a/native/src/correction_state.cpp +++ b/native/src/correction_state.cpp @@ -25,13 +25,31 @@ namespace latinime { +////////////////////// +// inline functions // +////////////////////// +static const char QUOTE = '\''; + +inline bool CorrectionState::needsToSkipCurrentNode(const unsigned short c) { + const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex); + // Skip the ' or other letter and continue deeper + return (c == QUOTE && userTypedChar != QUOTE) || mSkipPos == mOutputIndex; +} + +///////////////////// +// CorrectionState // +///////////////////// + CorrectionState::CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier) : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) { } -void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength) { +void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength, + const int maxDepth) { mProximityInfo = pi; mInputLength = inputLength; + mMaxDepth = maxDepth; + mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; } void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos, @@ -58,27 +76,37 @@ int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int seco return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); } -int CorrectionState::getFinalFreq(const unsigned short *word, const int freq) { - if (mProximityInfo->sameAsTyped(word, mOutputIndex + 1) || mOutputIndex < MIN_SUGGEST_DEPTH) { +int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { + const int outputIndex = mOutputIndex - 1; + const int inputIndex = (mCurrentStateType == TRAVERSE_ALL_ON_TERMINAL + || mCurrentStateType == TRAVERSE_ALL_NOT_ON_TERMINAL) ? mInputIndex : mInputIndex - 1; + *wordLength = outputIndex + 1; + if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) { return -1; } - const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == mInputIndex + 2) - : (mInputLength == mInputIndex + 1); + *word = mWord; + const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2) + : (mInputLength == inputIndex + 1); return CorrectionState::RankingAlgorithm::calculateFinalFreq( - mInputIndex, mOutputIndex, mMatchedCharCount, freq, sameLength, this); + inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this); } -void CorrectionState::initProcessState( - const int matchCount, const int inputIndex, const int outputIndex) { +void CorrectionState::initProcessState(const int matchCount, const int inputIndex, + const int outputIndex, const bool traverseAllNodes, const int diffs) { mMatchedCharCount = matchCount; mInputIndex = inputIndex; mOutputIndex = outputIndex; + mTraverseAllNodes = traverseAllNodes; + mDiffs = diffs; } -void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex) { +void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex, + bool *traverseAllNodes, int *diffs) { *matchedCount = mMatchedCharCount; *inputIndex = mInputIndex; *outputIndex = mOutputIndex; + *traverseAllNodes = mTraverseAllNodes; + *diffs = mDiffs; } void CorrectionState::charMatched() { @@ -95,6 +123,11 @@ int CorrectionState::getInputIndex() { return mInputIndex; } +// TODO: remove +bool CorrectionState::needsToTraverseAll() { + return mTraverseAllNodes; +} + void CorrectionState::incrementInputIndex() { ++mInputIndex; } @@ -103,6 +136,86 @@ void CorrectionState::incrementOutputIndex() { ++mOutputIndex; } +void CorrectionState::startTraverseAll() { + mTraverseAllNodes = true; +} + +bool CorrectionState::needsToPrune() const { + return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth) + || mDiffs > mMaxEditDistance); +} + +CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState( + const int32_t c, const bool isTerminal) { + mCurrentStateType = NOT_ON_TERMINAL; + // This has to be done for each virtual char (this forwards the "inputIndex" which + // is the index in the user-inputted chars, as read by proximity chars. + if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) { + incrementInputIndex(); + } + + if (mTraverseAllNodes || needsToSkipCurrentNode(c)) { + mWord[mOutputIndex] = c; + if (needsToTraverseAll() && isTerminal) { + mCurrentStateType = TRAVERSE_ALL_ON_TERMINAL; + } else { + mCurrentStateType = TRAVERSE_ALL_NOT_ON_TERMINAL; + } + } else { + int inputIndexForProximity = mInputIndex; + + if (mTransposedPos >= 0) { + if (mInputIndex == mTransposedPos) { + ++inputIndexForProximity; + } + if (mInputIndex == (mTransposedPos + 1)) { + --inputIndexForProximity; + } + } + + int matchedProximityCharId = mProximityInfo->getMatchedProximityId( + inputIndexForProximity, c, this); + if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) { + mCurrentStateType = UNRELATED; + return mCurrentStateType; + } + mWord[mOutputIndex] = c; + // If inputIndex is greater than mInputLength, that means there is no + // proximity chars. So, we don't need to check proximity. + if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { + charMatched(); + } + + if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) { + incrementDiffs(); + } + + const bool isSameAsUserTypedLength = mInputLength + == getInputIndex() + 1 + || (mExcessivePos == mInputLength - 1 + && getInputIndex() == mInputLength - 2); + if (isSameAsUserTypedLength && isTerminal) { + mCurrentStateType = ON_TERMINAL; + } + // Start traversing all nodes after the index exceeds the user typed length + if (isSameAsUserTypedLength) { + startTraverseAll(); + } + + // Finally, we are ready to go to the next character, the next "virtual node". + // We should advance the input index. + // We do this in this branch of the 'if traverseAllNodes' because we are still matching + // characters to input; the other branch is not matching them but searching for + // completions, this is why it does not have to do it. + incrementInputIndex(); + } + + // Also, the next char is one "virtual node" depth more than this char. + incrementOutputIndex(); + + return mCurrentStateType; +} + CorrectionState::~CorrectionState() { } diff --git a/native/src/correction_state.h b/native/src/correction_state.h index cc3c3e669..a548bcb68 100644 --- a/native/src/correction_state.h +++ b/native/src/correction_state.h @@ -29,49 +29,76 @@ class CorrectionState { public: typedef enum { - ALLOW_ALL, + TRAVERSE_ALL_ON_TERMINAL, + TRAVERSE_ALL_NOT_ON_TERMINAL, UNRELATED, - RELATED + ON_TERMINAL, + NOT_ON_TERMINAL } CorrectionStateType; CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier); - void initCorrectionState(const ProximityInfo *pi, const int inputLength); + void initCorrectionState( + const ProximityInfo *pi, const int inputLength, const int maxWordLength); void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos); void checkState(); - void initProcessState(const int matchCount, const int inputIndex, const int outputIndex); - void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex); - void charMatched(); - void incrementInputIndex(); - void incrementOutputIndex(); + void initProcessState(const int matchCount, const int inputIndex, const int outputIndex, + const bool traverseAllNodes, const int diffs); + void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex, + bool *traverseAllNodes, int *diffs); int getOutputIndex(); int getInputIndex(); + bool needsToTraverseAll(); virtual ~CorrectionState(); + int getSpaceProximityPos() const { + return mSpaceProximityPos; + } + int getMissingSpacePos() const { + return mMissingSpacePos; + } + int getSkipPos() const { return mSkipPos; } + int getExcessivePos() const { return mExcessivePos; } + int getTransposedPos() const { return mTransposedPos; } - int getSpaceProximityPos() const { - return mSpaceProximityPos; - } - int getMissingSpacePos() const { - return mMissingSpacePos; - } + + bool needsToPrune() const; + int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); - int getFinalFreq(const unsigned short *word, const int freq); + int getFinalFreq(const int freq, unsigned short **word, int* wordLength); + + CorrectionStateType processCharAndCalcState(const int32_t c, const bool isTerminal); + int getDiffs() const { + return mDiffs; + } private: + void charMatched(); + void incrementInputIndex(); + void incrementOutputIndex(); + void startTraverseAll(); + + // TODO: remove + + void incrementDiffs() { + ++mDiffs; + } const int TYPED_LETTER_MULTIPLIER; const int FULL_WORD_MULTIPLIER; const ProximityInfo *mProximityInfo; + + int mMaxEditDistance; + int mMaxDepth; int mInputLength; int mSkipPos; int mExcessivePos; @@ -82,6 +109,12 @@ private: int mMatchedCharCount; int mInputIndex; int mOutputIndex; + int mDiffs; + bool mTraverseAllNodes; + CorrectionStateType mCurrentStateType; + unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + + inline bool needsToSkipCurrentNode(const unsigned short c); class RankingAlgorithm { public: diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index b95da99a3..93d2b8418 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -181,14 +181,14 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(0); initSuggestions( proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies); - mCorrectionState->initCorrectionState(mProximityInfo, mInputLength); if (DEBUG_DICT) assert(codesSize == mInputLength); - const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); + const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); + mCorrectionState->initCorrectionState(mProximityInfo, mInputLength, maxDepth); PROF_END(0); PROF_START(1); - getSuggestionCandidates(-1, -1, -1, MAX_DEPTH); + getSuggestionCandidates(-1, -1, -1); PROF_END(1); PROF_START(2); @@ -198,7 +198,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { LOGI("--- Suggest missing characters %d", i); } - getSuggestionCandidates(i, -1, -1, MAX_DEPTH); + getSuggestionCandidates(i, -1, -1); } } PROF_END(2); @@ -211,7 +211,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { LOGI("--- Suggest excessive characters %d", i); } - getSuggestionCandidates(-1, i, -1, MAX_DEPTH); + getSuggestionCandidates(-1, i, -1); } } PROF_END(3); @@ -224,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { LOGI("--- Suggest transposed characters %d", i); } - getSuggestionCandidates(-1, -1, i, mInputLength - 1); + getSuggestionCandidates(-1, -1, i); } } PROF_END(4); @@ -272,7 +272,6 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int mFrequencies = frequencies; mOutputChars = outWords; mInputLength = codesSize; - mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; proximityInfo->setInputParams(codes, codesSize); mProximityInfo = proximityInfo; } @@ -342,9 +341,8 @@ static const char QUOTE = '\''; static const char SPACE = ' '; void UnigramDictionary::getSuggestionCandidates(const int skipPos, - const int excessivePos, const int transposedPos, const int maxDepth) { + const int excessivePos, const int transposedPos) { if (DEBUG_DICT) { - LOGI("getSuggestionCandidates %d", maxDepth); assert(transposedPos + 1 < mInputLength); assert(excessivePos < mInputLength); assert(missingPos < mInputLength); @@ -368,32 +366,26 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, while (depth >= 0) { if (mStackChildCount[depth] > 0) { --mStackChildCount[depth]; - bool traverseAllNodes = mStackTraverseAll[depth]; - int diffs = mStackDiffs[depth]; int siblingPos = mStackSiblingPos[depth]; int firstChildPos; mCorrectionState->initProcessState( - mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth]); + mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth], + mStackTraverseAll[depth], mStackDiffs[depth]); - // depth will never be greater than maxDepth because in that case, // needsToTraverseChildrenNodes should be false const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - maxDepth, traverseAllNodes, diffs, - mCorrectionState, &childCount, - &firstChildPos, &traverseAllNodes, &diffs, - &siblingPos); + mCorrectionState, &childCount, &firstChildPos, &siblingPos); // Update next sibling pos mStackSiblingPos[depth] = siblingPos; if (needsToTraverseChildrenNodes) { // Goes to child node ++depth; mStackChildCount[depth] = childCount; - mStackTraverseAll[depth] = traverseAllNodes; - mStackDiffs[depth] = diffs; mStackSiblingPos[depth] = firstChildPos; mCorrectionState->getProcessState(&mStackMatchedCount[depth], - &mStackInputIndex[depth], &mStackOutputIndex[depth]); + &mStackInputIndex[depth], &mStackOutputIndex[depth], + &mStackTraverseAll[depth], &mStackDiffs[depth]); } } else { // Goes to parent sibling node @@ -437,12 +429,12 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; } - -inline void UnigramDictionary::onTerminal( - unsigned short int* word, const int freq, CorrectionState *correctionState) { - const int finalFreq = correctionState->getFinalFreq(word, freq); +inline void UnigramDictionary::onTerminal(const int freq, CorrectionState *correctionState) { + int wordLength; + unsigned short* wordPointer; + const int finalFreq = correctionState->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { - addWord(word, correctionState->getOutputIndex() + 1, finalFreq); + addWord(wordPointer, wordLength, finalFreq); } } @@ -657,20 +649,13 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // there aren't any more nodes at this level, it merely returns the address of the first byte after // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. -inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int maxDepth, - const bool initialTraverseAllNodes, const int initialDiffs, - CorrectionState *correctionState, int *newCount, int *newChildrenPosition, - bool *newTraverseAllNodes, int *newDiffs, int *nextSiblingPosition) { - const int skipPos = correctionState->getSkipPos(); - const int excessivePos = correctionState->getExcessivePos(); - const int transposedPos = correctionState->getTransposedPos(); +inline bool UnigramDictionary::processCurrentNode(const int initialPos, + CorrectionState *correctionState, int *newCount, + int *newChildrenPosition, int *nextSiblingPosition) { if (DEBUG_DICT) { correctionState->checkState(); } int pos = initialPos; - int traverseAllNodes = initialTraverseAllNodes; - int diffs = initialDiffs; - const int initialInputIndex = correctionState->getInputIndex(); // Flags contain the following information: // - Address type (MASK_GROUP_ADDRESS_TYPE) on two bits: @@ -682,6 +667,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in // - FLAG_HAS_BIGRAMS: whether this node has bigrams or not const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos); const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags)); + const bool isTerminalNode = (0 != (FLAG_IS_TERMINAL & flags)); + + bool needsToInvokeOnTerminal = false; // This gets only ONE character from the stream. Next there will be: // if FLAG_HAS_MULTIPLE CHARS: the other characters of the same node @@ -707,111 +695,21 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in const bool isLastChar = (NOT_A_CHARACTER == nextc); // If there are more chars in this nodes, then this virtual node is not a terminal. // If we are on the last char, this virtual node is a terminal if this node is. - const bool isTerminal = isLastChar && (0 != (FLAG_IS_TERMINAL & flags)); - // If there are more chars in this node, then this virtual node has children. - // If we are on the last char, this virtual node has children if this node has. - const bool hasChildren = (!isLastChar) || BinaryFormat::hasChildrenInFlags(flags); - - // This has to be done for each virtual char (this forwards the "inputIndex" which - // is the index in the user-inputted chars, as read by proximity chars. - if (excessivePos == correctionState->getOutputIndex() - && correctionState->getInputIndex() < mInputLength - 1) { - correctionState->incrementInputIndex(); - } - if (traverseAllNodes || needsToSkipCurrentNode( - c, correctionState->getInputIndex(), skipPos, correctionState->getOutputIndex())) { - mWord[correctionState->getOutputIndex()] = c; - if (traverseAllNodes && isTerminal) { - // The frequency should be here, because we come here only if this is actually - // a terminal node, and we are on its last char. - const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); - onTerminal(mWord, freq, mCorrectionState); - } - if (!hasChildren) { - // If we don't have children here, that means we finished processing all - // characters of this node (we are on the last virtual node), AND we are in - // traverseAllNodes mode, which means we are searching for *completions*. We - // should skip the frequency if we have a terminal, and report the position - // of the next sibling. We don't have to return other values because we are - // returning false, as in "don't traverse children". - if (isTerminal) pos = BinaryFormat::skipFrequency(flags, pos); - *nextSiblingPosition = - BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); - return false; - } - } else { - int inputIndexForProximity = correctionState->getInputIndex(); - - if (transposedPos >= 0) { - if (correctionState->getInputIndex() == transposedPos) { - ++inputIndexForProximity; - } - if (correctionState->getInputIndex() == (transposedPos + 1)) { - --inputIndexForProximity; - } - } - - int matchedProximityCharId = mProximityInfo->getMatchedProximityId( - inputIndexForProximity, c, mCorrectionState); - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) { - // We found that this is an unrelated character, so we should give up traversing - // this node and its children entirely. - // However we may not be on the last virtual node yet so we skip the remaining - // characters in this node, the frequency if it's there, read the next sibling - // position to output it, then return false. - // We don't have to output other values because we return false, as in - // "don't traverse children". - if (!isLastChar) { - pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos); - } - pos = BinaryFormat::skipFrequency(flags, pos); - *nextSiblingPosition = - BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); - return false; - } - mWord[correctionState->getOutputIndex()] = c; - // If inputIndex is greater than mInputLength, that means there is no - // proximity chars. So, we don't need to check proximity. - if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { - correctionState->charMatched(); - } - const bool isSameAsUserTypedLength = mInputLength - == correctionState->getInputIndex() + 1 - || (excessivePos == mInputLength - 1 - && correctionState->getInputIndex() == mInputLength - 2); - if (isSameAsUserTypedLength && isTerminal) { - const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); - onTerminal(mWord, freq, mCorrectionState); - } - // Start traversing all nodes after the index exceeds the user typed length - traverseAllNodes = isSameAsUserTypedLength; - diffs = diffs - + ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0); - // Finally, we are ready to go to the next character, the next "virtual node". - // We should advance the input index. - // We do this in this branch of the 'if traverseAllNodes' because we are still matching - // characters to input; the other branch is not matching them but searching for - // completions, this is why it does not have to do it. - correctionState->incrementInputIndex(); - - // This character matched the typed character (enough to traverse the node at least) - // so we just evaluated it. Now we should evaluate this virtual node's children - that - // is, if it has any. If it has no children, we're done here - so we skip the end of - // the node, output the siblings position, and return false "don't traverse children". - // Note that !hasChildren implies isLastChar, so we know we don't have to skip any - // remaining char in this group for there can't be any. - if (!hasChildren) { - pos = BinaryFormat::skipFrequency(flags, pos); - *nextSiblingPosition = - BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); - return false; - } - } - // Optimization: Prune out words that are too long compared to how much was typed. - if (isTerminal - && (correctionState->getOutputIndex() >= maxDepth || diffs > mMaxEditDistance)) { - // We are giving up parsing this node and its children. Skip the rest of the node, - // output the sibling position, and return that we don't want to traverse children. + const bool isTerminal = isLastChar && isTerminalNode; + + CorrectionState::CorrectionStateType stateType = correctionState->processCharAndCalcState( + c, isTerminal); + if (stateType == CorrectionState::TRAVERSE_ALL_ON_TERMINAL + || stateType == CorrectionState::ON_TERMINAL) { + needsToInvokeOnTerminal = true; + } else if (stateType == CorrectionState::UNRELATED) { + // We found that this is an unrelated character, so we should give up traversing + // this node and its children entirely. + // However we may not be on the last virtual node yet so we skip the remaining + // characters in this node, the frequency if it's there, read the next sibling + // position to output it, then return false. + // We don't have to output other values because we return false, as in + // "don't traverse children". if (!isLastChar) { pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos); } @@ -820,8 +718,6 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); return false; } - // Also, the next char is one "virtual node" depth more than this char. - correctionState->incrementOutputIndex(); // Prepare for the next character. Promote the prefetched char to current char - the loop // will take care of prefetching the next. If we finally found our last char, nextc will @@ -829,16 +725,39 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in c = nextc; } while (NOT_A_CHARACTER != c); - // If inputIndex is greater than mInputLength, that means there are no proximity chars. - // Here, that's all we are interested in so we don't need to check for isSameAsUserTypedLength. - if (mInputLength <= initialInputIndex) { - traverseAllNodes = true; - } + if (isTerminalNode) { + if (needsToInvokeOnTerminal) { + // The frequency should be here, because we come here only if this is actually + // a terminal node, and we are on its last char. + const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); + onTerminal(freq, mCorrectionState); + } + + // If there are more chars in this node, then this virtual node has children. + // If we are on the last char, this virtual node has children if this node has. + const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags); + + // This character matched the typed character (enough to traverse the node at least) + // so we just evaluated it. Now we should evaluate this virtual node's children - that + // is, if it has any. If it has no children, we're done here - so we skip the end of + // the node, output the siblings position, and return false "don't traverse children". + // Note that !hasChildren implies isLastChar, so we know we don't have to skip any + // remaining char in this group for there can't be any. + if (!hasChildren) { + pos = BinaryFormat::skipFrequency(flags, pos); + *nextSiblingPosition = + BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); + return false; + } - // All the output values that are purely computation by this function are held in local - // variables. Output them to the caller. - *newTraverseAllNodes = traverseAllNodes; - *newDiffs = diffs; + // Optimization: Prune out words that are too long compared to how much was typed. + if (correctionState->needsToPrune()) { + pos = BinaryFormat::skipFrequency(flags, pos); + *nextSiblingPosition = + BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); + return false; + } + } // Now we finished processing this node, and we want to traverse children. If there are no // children, we can't come here. diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index cb86da41c..a45df24fb 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -87,21 +87,20 @@ private: const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies); void getSuggestionCandidates(const int skipPos, const int excessivePos, - const int transposedPos, const int maxDepth); + const int transposedPos); bool addWord(unsigned short *word, int length, int frequency); void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState); void getMissingSpaceWords( const int inputLength, const int missingSpacePos, CorrectionState *correctionState); void getMistypedSpaceWords( const int inputLength, const int spaceProximityPos, CorrectionState *correctionState); - void onTerminal(unsigned short int* word, const int freq, CorrectionState *correctionState); + void onTerminal(const int freq, CorrectionState *correctionState); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character - bool processCurrentNode(const int initialPos, const int maxDepth, - const bool initialTraverseAllNodes, const int initialDiffs, - CorrectionState *correctionState, int *newCount, int *newChildPosition, - bool *newTraverseAllNodes, int *newDiffs, int *nextSiblingPosition); + bool processCurrentNode(const int initialPos, + CorrectionState *correctionState, int *newCount, + int *newChildPosition, int *nextSiblingPosition); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, @@ -134,7 +133,6 @@ private: int mInputLength; // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; - int mMaxEditDistance; int mStackMatchedCount[MAX_WORD_LENGTH_INTERNAL]; int mStackChildCount[MAX_WORD_LENGTH_INTERNAL]; |