diff options
author | 2012-07-10 15:33:18 +0900 | |
---|---|---|
committer | 2012-07-10 15:34:40 +0900 | |
commit | 84ed0966417d93b07c4da2b295244b160d223ce9 (patch) | |
tree | 7eca078a3ca8e217c0edd6f96e7284bba080fc63 /java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java | |
parent | 7389c601e34dc96807aa1c3c0fef30b77198ca58 (diff) | |
download | latinime-84ed0966417d93b07c4da2b295244b160d223ce9.tar.gz latinime-84ed0966417d93b07c4da2b295244b160d223ce9.tar.xz latinime-84ed0966417d93b07c4da2b295244b160d223ce9.zip |
Separate SpellCheckerSession from SpellCheckerService
Bug: 6789576
Change-Id: I7c55d36afad7ef6046353b3c9e849a54a6dc83ae
Diffstat (limited to 'java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java new file mode 100644 index 000000000..e0f340879 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java @@ -0,0 +1,420 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.spellcheck; + + +import android.service.textservice.SpellCheckerService.Session; +import android.text.TextUtils; +import android.util.Log; +import android.util.LruCache; +import android.view.textservice.SentenceSuggestionsInfo; +import android.view.textservice.SuggestionsInfo; +import android.view.textservice.TextInfo; + +import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; +import com.android.inputmethod.latin.LocaleUtils; +import com.android.inputmethod.latin.WordComposer; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; + +import java.util.ArrayList; +import java.util.Locale; + +public class AndroidSpellCheckerSession extends Session { + private static final String TAG = AndroidSpellCheckerSession.class.getSimpleName(); + private static final boolean DBG = false; + private final static String[] EMPTY_STRING_ARRAY = new String[0]; + + // Immutable, but need the locale which is not available in the constructor yet + private DictionaryPool mDictionaryPool; + // Likewise + private Locale mLocale; + // Cache this for performance + private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. + private final AndroidSpellCheckerService mService; + private final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); + + private static class SuggestionsParams { + public final String[] mSuggestions; + public final int mFlags; + public SuggestionsParams(String[] suggestions, int flags) { + mSuggestions = suggestions; + mFlags = flags; + } + } + + private static class SuggestionsCache { + private static final char CHAR_DELIMITER = '\uFFFC'; + private static final int MAX_CACHE_SIZE = 50; + private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = + new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE); + + // TODO: Support n-gram input + private static String generateKey(String query, String prevWord) { + if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { + return query; + } + return query + CHAR_DELIMITER + prevWord; + } + + // TODO: Support n-gram input + public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { + return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); + } + + // TODO: Support n-gram input + public void putSuggestionsToCache( + String query, String prevWord, String[] suggestions, int flags) { + if (suggestions == null || TextUtils.isEmpty(query)) { + return; + } + mUnigramSuggestionsInfoCache.put( + generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); + } + } + + AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { + mService = service; + } + + @Override + public void onCreate() { + final String localeString = getLocale(); + mDictionaryPool = mService.getDictionaryPool(localeString); + mLocale = LocaleUtils.constructLocaleFromString(localeString); + mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale); + } + + /* + * Returns whether the code point is a letter that makes sense for the specified + * locale for this spell checker. + * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml + * and is limited to EFIGS languages and Russian. + * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters + * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. + */ + private static boolean isLetterCheckableByLanguage(final int codePoint, + final int script) { + switch (script) { + case AndroidSpellCheckerService.SCRIPT_LATIN: + // Our supported latin script dictionaries (EFIGS) at the moment only include + // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode + // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, + // so the below is a very efficient way to test for it. As for the 0-0x3F, it's + // excluded from isLetter anyway. + return codePoint <= 0x2AF && Character.isLetter(codePoint); + case AndroidSpellCheckerService.SCRIPT_CYRILLIC: + // All Cyrillic characters are in the 400~52F block. There are some in the upper + // Unicode range, but they are archaic characters that are not used in modern + // russian and are not used by our dictionary. + return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); + default: + // Should never come here + throw new RuntimeException("Impossible value of script: " + script); + } + } + + /** + * Finds out whether a particular string should be filtered out of spell checking. + * + * This will loosely match URLs, numbers, symbols. To avoid always underlining words that + * we know we will never recognize, this accepts a script identifier that should be one + * of the SCRIPT_* constants defined above, to rule out quickly characters from very + * different languages. + * + * @param text the string to evaluate. + * @param script the identifier for the script this spell checker recognizes + * @return true if we should filter this text out, false otherwise + */ + private static boolean shouldFilterOut(final String text, final int script) { + if (TextUtils.isEmpty(text) || text.length() <= 1) return true; + + // TODO: check if an equivalent processing can't be done more quickly with a + // compiled regexp. + // Filter by first letter + final int firstCodePoint = text.codePointAt(0); + // Filter out words that don't start with a letter or an apostrophe + if (!isLetterCheckableByLanguage(firstCodePoint, script) + && '\'' != firstCodePoint) return true; + + // Filter contents + final int length = text.length(); + int letterCount = 0; + for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { + final int codePoint = text.codePointAt(i); + // Any word containing a '@' is probably an e-mail address + // Any word containing a '/' is probably either an ad-hoc combination of two + // words or a URI - in either case we don't want to spell check that + if ('@' == codePoint || '/' == codePoint) return true; + if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; + } + // Guestimate heuristic: perform spell checking if at least 3/4 of the characters + // in this word are letters + return (letterCount * 4 < length * 3); + } + + private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote( + TextInfo ti, SentenceSuggestionsInfo ssi) { + final String typedText = ti.getText(); + if (!typedText.contains(AndroidSpellCheckerService.SINGLE_QUOTE)) { + return null; + } + final int N = ssi.getSuggestionsCount(); + final ArrayList<Integer> additionalOffsets = new ArrayList<Integer>(); + final ArrayList<Integer> additionalLengths = new ArrayList<Integer>(); + final ArrayList<SuggestionsInfo> additionalSuggestionsInfos = + new ArrayList<SuggestionsInfo>(); + String currentWord = null; + for (int i = 0; i < N; ++i) { + final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i); + final int flags = si.getSuggestionsAttributes(); + if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) { + continue; + } + final int offset = ssi.getOffsetAt(i); + final int length = ssi.getLengthAt(i); + final String subText = typedText.substring(offset, offset + length); + final String prevWord = currentWord; + currentWord = subText; + if (!subText.contains(AndroidSpellCheckerService.SINGLE_QUOTE)) { + continue; + } + final String[] splitTexts = subText.split(AndroidSpellCheckerService.SINGLE_QUOTE, -1); + if (splitTexts == null || splitTexts.length <= 1) { + continue; + } + final int splitNum = splitTexts.length; + for (int j = 0; j < splitNum; ++j) { + final String splitText = splitTexts[j]; + if (TextUtils.isEmpty(splitText)) { + continue; + } + if (mSuggestionsCache.getSuggestionsFromCache( + splitText, prevWord) == null) { + continue; + } + final int newLength = splitText.length(); + // Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO + final int newFlags = 0; + final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY); + newSi.setCookieAndSequence(si.getCookie(), si.getSequence()); + if (DBG) { + Log.d(TAG, "Override and remove old span over: " + + splitText + ", " + offset + "," + newLength); + } + additionalOffsets.add(offset); + additionalLengths.add(newLength); + additionalSuggestionsInfos.add(newSi); + } + } + final int additionalSize = additionalOffsets.size(); + if (additionalSize <= 0) { + return null; + } + final int suggestionsSize = N + additionalSize; + final int[] newOffsets = new int[suggestionsSize]; + final int[] newLengths = new int[suggestionsSize]; + final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize]; + int i; + for (i = 0; i < N; ++i) { + newOffsets[i] = ssi.getOffsetAt(i); + newLengths[i] = ssi.getLengthAt(i); + newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i); + } + for (; i < suggestionsSize; ++i) { + newOffsets[i] = additionalOffsets.get(i - N); + newLengths[i] = additionalLengths.get(i - N); + newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N); + } + return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths); + } + + @Override + public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple( + TextInfo[] textInfos, int suggestionsLimit) { + final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple( + textInfos, suggestionsLimit); + if (retval == null || retval.length != textInfos.length) { + return retval; + } + for (int i = 0; i < retval.length; ++i) { + final SentenceSuggestionsInfo tempSsi = + fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]); + if (tempSsi != null) { + retval[i] = tempSsi; + } + } + return retval; + } + + @Override + public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos, + int suggestionsLimit, boolean sequentialWords) { + final int length = textInfos.length; + final SuggestionsInfo[] retval = new SuggestionsInfo[length]; + for (int i = 0; i < length; ++i) { + final String prevWord; + if (sequentialWords && i > 0) { + final String prevWordCandidate = textInfos[i - 1].getText(); + // Note that an empty string would be used to indicate the initial word + // in the future. + prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate; + } else { + prevWord = null; + } + retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit); + retval[i].setCookieAndSequence( + textInfos[i].getCookie(), textInfos[i].getSequence()); + } + return retval; + } + + // Note : this must be reentrant + /** + * Gets a list of suggestions for a specific string. This returns a list of possible + * corrections for the text passed as an argument. It may split or group words, and + * even perform grammatical analysis. + */ + @Override + public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, + final int suggestionsLimit) { + return onGetSuggestions(textInfo, null, suggestionsLimit); + } + + private SuggestionsInfo onGetSuggestions( + final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { + try { + final String inText = textInfo.getText(); + final SuggestionsParams cachedSuggestionsParams = + mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); + if (cachedSuggestionsParams != null) { + if (DBG) { + Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); + } + return new SuggestionsInfo( + cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); + } + + if (shouldFilterOut(inText, mScript)) { + DictAndProximity dictInfo = null; + try { + dictInfo = mDictionaryPool.takeOrGetNull(); + if (null == dictInfo) { + return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } + return dictInfo.mDictionary.isValidWord(inText) + ? AndroidSpellCheckerService.getInDictEmptySuggestions() + : AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } finally { + if (null != dictInfo) { + if (!mDictionaryPool.offer(dictInfo)) { + Log.e(TAG, "Can't re-insert a dictionary into its pool"); + } + } + } + } + final String text = inText.replaceAll( + AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); + + // TODO: Don't gather suggestions if the limit is <= 0 unless necessary + //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, + //mService.mSuggestionThreshold, mService.mRecommendedThreshold, + //suggestionsLimit); + final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( + text, suggestionsLimit); + final WordComposer composer = new WordComposer(); + final int length = text.length(); + for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { + final int codePoint = text.codePointAt(i); + // The getXYForCodePointAndScript method returns (Y << 16) + X + final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( + codePoint, mScript); + if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { + composer.add(codePoint, WordComposer.NOT_A_COORDINATE, + WordComposer.NOT_A_COORDINATE); + } else { + composer.add(codePoint, xy & 0xFFFF, xy >> 16); + } + } + + final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text); + boolean isInDict = true; + DictAndProximity dictInfo = null; + try { + dictInfo = mDictionaryPool.takeOrGetNull(); + if (null == dictInfo) { + return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } + final ArrayList<SuggestedWordInfo> suggestions = dictInfo.mDictionary.getWords( + composer, prevWord, dictInfo.mProximityInfo); + for (final SuggestedWordInfo suggestion : suggestions) { + final String suggestionStr = suggestion.mWord.toString(); + suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, + suggestionStr.length(), suggestion.mScore); + } + isInDict = dictInfo.mDictionary.isValidWord(text); + if (!isInDict && AndroidSpellCheckerService.CAPITALIZE_NONE != capitalizeType) { + // We want to test the word again if it's all caps or first caps only. + // If it's fully down, we already tested it, if it's mixed case, we don't + // want to test a lowercase version of it. + isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); + } + } finally { + if (null != dictInfo) { + if (!mDictionaryPool.offer(dictInfo)) { + Log.e(TAG, "Can't re-insert a dictionary into its pool"); + } + } + } + + final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( + capitalizeType, mLocale); + + if (DBG) { + Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " + + suggestionsLimit); + Log.i(TAG, "IsInDict = " + isInDict); + Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); + Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); + if (null != result.mSuggestions) { + for (String suggestion : result.mSuggestions) { + Log.i(TAG, suggestion); + } + } + } + + final int flags = + (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY + : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) + | (result.mHasRecommendedSuggestions + ? SuggestionsInfoCompatUtils + .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() + : 0); + final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); + mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); + return retval; + } catch (RuntimeException e) { + // Don't kill the keyboard if there is a bug in the spell checker + if (DBG) { + throw e; + } else { + Log.e(TAG, "Exception while spellcheking: " + e); + return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } + } + } +} |