diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/spellcheck')
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java | 254 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java | 198 |
2 files changed, 343 insertions, 109 deletions
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 095c2c51c..8ac82ee5b 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -17,7 +17,9 @@ package com.android.inputmethod.latin.spellcheck; import android.content.Intent; +import android.content.SharedPreferences; import android.content.res.Resources; +import android.preference.PreferenceManager; import android.service.textservice.SpellCheckerService; import android.text.TextUtils; import android.util.Log; @@ -25,10 +27,10 @@ import android.view.textservice.SuggestionsInfo; import android.view.textservice.TextInfo; import com.android.inputmethod.compat.ArraysCompatUtils; +import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.Dictionary.DataType; import com.android.inputmethod.latin.Dictionary.WordCallback; import com.android.inputmethod.latin.DictionaryCollection; import com.android.inputmethod.latin.DictionaryFactory; @@ -41,21 +43,27 @@ import com.android.inputmethod.latin.Utils; import com.android.inputmethod.latin.WhitelistDictionary; import com.android.inputmethod.latin.WordComposer; +import java.lang.ref.WeakReference; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.Locale; import java.util.Map; import java.util.TreeMap; +import java.util.HashSet; /** * Service for spell checking, using LatinIME's dictionaries and mechanisms. */ -public class AndroidSpellCheckerService extends SpellCheckerService { +public class AndroidSpellCheckerService extends SpellCheckerService + implements SharedPreferences.OnSharedPreferenceChangeListener { private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); private static final boolean DBG = false; private static final int POOL_SIZE = 2; + public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; + private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case private static final int CAPITALIZE_FIRST = 1; // First only private static final int CAPITALIZE_ALL = 2; // All caps @@ -82,15 +90,100 @@ public class AndroidSpellCheckerService extends SpellCheckerService { // The threshold for a candidate to be offered as a suggestion. private double mSuggestionThreshold; - // The threshold for a suggestion to be considered "likely". - private double mLikelyThreshold; + // The threshold for a suggestion to be considered "recommended". + private double mRecommendedThreshold; + // Whether to use the contacts dictionary + private boolean mUseContactsDictionary; + private final Object mUseContactsLock = new Object(); + + private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList = + new HashSet<WeakReference<DictionaryCollection>>(); + + public static final int SCRIPT_LATIN = 0; + public static final int SCRIPT_CYRILLIC = 1; + private static final TreeMap<String, Integer> mLanguageToScript; + static { + // List of the supported languages and their associated script. We won't check + // words written in another script than the selected script, because we know we + // don't have those in our dictionary so we will underline everything and we + // will never have any suggestions, so it makes no sense checking them. + mLanguageToScript = new TreeMap<String, Integer>(); + mLanguageToScript.put("en", SCRIPT_LATIN); + mLanguageToScript.put("fr", SCRIPT_LATIN); + mLanguageToScript.put("de", SCRIPT_LATIN); + mLanguageToScript.put("nl", SCRIPT_LATIN); + mLanguageToScript.put("cs", SCRIPT_LATIN); + mLanguageToScript.put("es", SCRIPT_LATIN); + mLanguageToScript.put("it", SCRIPT_LATIN); + mLanguageToScript.put("ru", SCRIPT_CYRILLIC); + } @Override public void onCreate() { super.onCreate(); mSuggestionThreshold = Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value)); - mLikelyThreshold = - Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value)); + mRecommendedThreshold = + Double.parseDouble(getString(R.string.spellchecker_recommended_threshold_value)); + final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); + prefs.registerOnSharedPreferenceChangeListener(this); + onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); + } + + private static int getScriptFromLocale(final Locale locale) { + final Integer script = mLanguageToScript.get(locale.getLanguage()); + if (null == script) { + throw new RuntimeException("We have been called with an unsupported language: \"" + + locale.getLanguage() + "\". Framework bug?"); + } + return script; + } + + @Override + public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { + if (!PREF_USE_CONTACTS_KEY.equals(key)) return; + synchronized(mUseContactsLock) { + mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true); + if (mUseContactsDictionary) { + startUsingContactsDictionaryLocked(); + } else { + stopUsingContactsDictionaryLocked(); + } + } + } + + private void startUsingContactsDictionaryLocked() { + if (null == mContactsDictionary) { + mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); + } + final Iterator<WeakReference<DictionaryCollection>> iterator = + mDictionaryCollectionsList.iterator(); + while (iterator.hasNext()) { + final WeakReference<DictionaryCollection> dictRef = iterator.next(); + final DictionaryCollection dict = dictRef.get(); + if (null == dict) { + iterator.remove(); + } else { + dict.addDictionary(mContactsDictionary); + } + } + } + + private void stopUsingContactsDictionaryLocked() { + if (null == mContactsDictionary) return; + final SynchronouslyLoadedContactsDictionary contactsDict = mContactsDictionary; + mContactsDictionary = null; + final Iterator<WeakReference<DictionaryCollection>> iterator = + mDictionaryCollectionsList.iterator(); + while (iterator.hasNext()) { + final WeakReference<DictionaryCollection> dictRef = iterator.next(); + final DictionaryCollection dict = dictRef.get(); + if (null == dict) { + iterator.remove(); + } else { + dict.removeDictionary(contactsDict); + } + } + contactsDict.close(); } @Override @@ -110,10 +203,11 @@ public class AndroidSpellCheckerService extends SpellCheckerService { private static class SuggestionsGatherer implements WordCallback { public static class Result { public final String[] mSuggestions; - public final boolean mHasLikelySuggestions; - public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) { + public final boolean mHasRecommendedSuggestions; + public Result(final String[] gatheredSuggestions, + final boolean hasRecommendedSuggestions) { mSuggestions = gatheredSuggestions; - mHasLikelySuggestions = hasLikelySuggestions; + mHasRecommendedSuggestions = hasRecommendedSuggestions; } } @@ -121,7 +215,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService { private final int[] mScores; private final String mOriginalText; private final double mSuggestionThreshold; - private final double mLikelyThreshold; + private final double mRecommendedThreshold; private final int mMaxLength; private int mLength = 0; @@ -131,10 +225,10 @@ public class AndroidSpellCheckerService extends SpellCheckerService { private int mBestScore = Integer.MIN_VALUE; // As small as possible SuggestionsGatherer(final String originalText, final double suggestionThreshold, - final double likelyThreshold, final int maxLength) { + final double recommendedThreshold, final int maxLength) { mOriginalText = originalText; mSuggestionThreshold = suggestionThreshold; - mLikelyThreshold = likelyThreshold; + mRecommendedThreshold = recommendedThreshold; mMaxLength = maxLength; mSuggestions = new ArrayList<CharSequence>(maxLength + 1); mScores = new int[mMaxLength]; @@ -142,7 +236,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService { @Override synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, - int dicTypeId, DataType dataType) { + int dicTypeId, int dataType) { final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); // binarySearch returns the index if the element exists, and -<insertion index> - 1 // if it doesn't. See documentation for binarySearch. @@ -175,7 +269,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService { // make the threshold. final String wordString = new String(word, wordOffset, wordLength); final double normalizedScore = - Utils.calcNormalizedScore(mOriginalText, wordString, score); + BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); if (normalizedScore < mSuggestionThreshold) { if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); return true; @@ -198,19 +292,19 @@ public class AndroidSpellCheckerService extends SpellCheckerService { public Result getResults(final int capitalizeType, final Locale locale) { final String[] gatheredSuggestions; - final boolean hasLikelySuggestions; + final boolean hasRecommendedSuggestions; if (0 == mLength) { // Either we found no suggestions, or we found some BUT the max length was 0. // If we found some mBestSuggestion will not be null. If it is null, then // we found none, regardless of the max length. if (null == mBestSuggestion) { gatheredSuggestions = null; - hasLikelySuggestions = false; + hasRecommendedSuggestions = false; } else { gatheredSuggestions = EMPTY_STRING_ARRAY; - final double normalizedScore = - Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore); - hasLikelySuggestions = (normalizedScore > mLikelyThreshold); + final double normalizedScore = BinaryDictionary.calcNormalizedScore( + mOriginalText, mBestSuggestion, mBestScore); + hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); } } else { if (DBG) { @@ -243,16 +337,17 @@ public class AndroidSpellCheckerService extends SpellCheckerService { final int bestScore = mScores[mLength - 1]; final CharSequence bestSuggestion = mSuggestions.get(0); final double normalizedScore = - Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore); - hasLikelySuggestions = (normalizedScore > mLikelyThreshold); + BinaryDictionary.calcNormalizedScore( + mOriginalText, bestSuggestion.toString(), bestScore); + hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); if (DBG) { Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); Log.i(TAG, "Normalized score = " + normalizedScore - + " (threshold " + mLikelyThreshold - + ") => hasLikelySuggestions = " + hasLikelySuggestions); + + " (threshold " + mRecommendedThreshold + + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); } } - return new Result(gatheredSuggestions, hasLikelySuggestions); + return new Result(gatheredSuggestions, hasRecommendedSuggestions); } } @@ -273,13 +368,15 @@ public class AndroidSpellCheckerService extends SpellCheckerService { for (Dictionary dict : oldWhitelistDictionaries.values()) { dict.close(); } - if (null != mContactsDictionary) { - // The synchronously loaded contacts dictionary should have been in one - // or several pools, but it is shielded against multiple closing and it's - // safe to call it several times. - final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary; - mContactsDictionary = null; - dictToClose.close(); + synchronized(mUseContactsLock) { + if (null != mContactsDictionary) { + // The synchronously loaded contacts dictionary should have been in one + // or several pools, but it is shielded against multiple closing and it's + // safe to call it several times. + final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary; + mContactsDictionary = null; + dictToClose.close(); + } } return false; } @@ -295,7 +392,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService { } public DictAndProximity createDictAndProximity(final Locale locale) { - final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); + final int script = getScriptFromLocale(locale); + final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo( + SpellCheckerProximityInfo.getProximityForScript(script)); final Resources resources = getResources(); final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); final DictionaryCollection dictionaryCollection = @@ -314,11 +413,16 @@ public class AndroidSpellCheckerService extends SpellCheckerService { mWhitelistDictionaries.put(localeStr, whitelistDictionary); } dictionaryCollection.addDictionary(whitelistDictionary); - if (null == mContactsDictionary) { - mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); + synchronized(mUseContactsLock) { + if (mUseContactsDictionary) { + if (null == mContactsDictionary) { + mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); + } + } + dictionaryCollection.addDictionary(mContactsDictionary); + mDictionaryCollectionsList.add( + new WeakReference<DictionaryCollection>(dictionaryCollection)); } - // TODO: add a setting to use or not contacts when checking spelling - dictionaryCollection.addDictionary(mContactsDictionary); return new DictAndProximity(dictionaryCollection, proximityInfo); } @@ -327,9 +431,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService { // If the first char is not uppercase, then the word is either all lower case, // and in either case we return CAPITALIZE_NONE. if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; - final int len = text.codePointCount(0, text.length()); + final int len = text.length(); int capsCount = 1; - for (int i = 1; i < len; ++i) { + for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { if (1 != capsCount && i != capsCount) break; if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; } @@ -346,6 +450,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService { private DictionaryPool mDictionaryPool; // Likewise private Locale mLocale; + // Cache this for performance + private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. private final AndroidSpellCheckerService mService; @@ -358,17 +464,51 @@ public class AndroidSpellCheckerService extends SpellCheckerService { final String localeString = getLocale(); mDictionaryPool = mService.getDictionaryPool(localeString); mLocale = LocaleUtils.constructLocaleFromString(localeString); + mScript = getScriptFromLocale(mLocale); + } + + /* + * Returns whether the code point is a letter that makes sense for the specified + * locale for this spell checker. + * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml + * and is limited to EFIGS languages and Russian. + * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters + * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. + */ + private static boolean isLetterCheckableByLanguage(final int codePoint, + final int script) { + switch (script) { + case SCRIPT_LATIN: + // Our supported latin script dictionaries (EFIGS) at the moment only include + // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode + // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, + // so the below is a very efficient way to test for it. As for the 0-0x3F, it's + // excluded from isLetter anyway. + return codePoint <= 0x2AF && Character.isLetter(codePoint); + case SCRIPT_CYRILLIC: + // All Cyrillic characters are in the 400~52F block. There are some in the upper + // Unicode range, but they are archaic characters that are not used in modern + // russian and are not used by our dictionary. + return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); + default: + // Should never come here + throw new RuntimeException("Impossible value of script: " + script); + } } /** * Finds out whether a particular string should be filtered out of spell checking. * - * This will loosely match URLs, numbers, symbols. + * This will loosely match URLs, numbers, symbols. To avoid always underlining words that + * we know we will never recognize, this accepts a script identifier that should be one + * of the SCRIPT_* constants defined above, to rule out quickly characters from very + * different languages. * * @param text the string to evaluate. + * @param script the identifier for the script this spell checker recognizes * @return true if we should filter this text out, false otherwise */ - private boolean shouldFilterOut(final String text) { + private static boolean shouldFilterOut(final String text, final int script) { if (TextUtils.isEmpty(text) || text.length() <= 1) return true; // TODO: check if an equivalent processing can't be done more quickly with a @@ -376,20 +516,19 @@ public class AndroidSpellCheckerService extends SpellCheckerService { // Filter by first letter final int firstCodePoint = text.codePointAt(0); // Filter out words that don't start with a letter or an apostrophe - if (!Character.isLetter(firstCodePoint) + if (!isLetterCheckableByLanguage(firstCodePoint, script) && '\'' != firstCodePoint) return true; // Filter contents final int length = text.length(); int letterCount = 0; - for (int i = 0; i < length; ++i) { + for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int codePoint = text.codePointAt(i); // Any word containing a '@' is probably an e-mail address // Any word containing a '/' is probably either an ad-hoc combination of two // words or a URI - in either case we don't want to spell check that - if ('@' == codePoint - || '/' == codePoint) return true; - if (Character.isLetter(codePoint)) ++letterCount; + if ('@' == codePoint || '/' == codePoint) return true; + if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; } // Guestimate heuristic: perform spell checking if at least 3/4 of the characters // in this word are letters @@ -408,7 +547,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService { try { final String text = textInfo.getText(); - if (shouldFilterOut(text)) { + if (shouldFilterOut(text, mScript)) { DictAndProximity dictInfo = null; try { dictInfo = mDictionaryPool.takeOrGetNull(); @@ -426,17 +565,23 @@ public class AndroidSpellCheckerService extends SpellCheckerService { // TODO: Don't gather suggestions if the limit is <= 0 unless necessary final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, - mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit); + mService.mSuggestionThreshold, mService.mRecommendedThreshold, + suggestionsLimit); final WordComposer composer = new WordComposer(); final int length = text.length(); - for (int i = 0; i < length; ++i) { + for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int character = text.codePointAt(i); - final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); + final int proximityIndex = + SpellCheckerProximityInfo.getIndexOfCodeForScript(character, mScript); final int[] proximities; if (-1 == proximityIndex) { proximities = new int[] { character }; } else { - proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, + // TODO: an initial examination seems to reveal this is actually used + // read-only. It should be possible to compute the arrays statically once + // and skip doing a copy each time here. + proximities = Arrays.copyOfRange( + SpellCheckerProximityInfo.getProximityForScript(mScript), proximityIndex, proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); } @@ -475,7 +620,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService { + suggestionsLimit); Log.i(TAG, "IsInDict = " + isInDict); Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); - Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions); + Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); if (null != result.mSuggestions) { for (String suggestion : result.mSuggestions) { Log.i(TAG, suggestion); @@ -483,10 +628,13 @@ public class AndroidSpellCheckerService extends SpellCheckerService { } } - // TODO: actually use result.mHasLikelySuggestions final int flags = (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY - : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO); + : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) + | (result.mHasRecommendedSuggestions + ? SuggestionsInfoCompatUtils + .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() + : 0); return new SuggestionsInfo(flags, result.mSuggestions); } catch (RuntimeException e) { // Don't kill the keyboard if there is a bug in the spell checker diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java index d5b04b27c..db3544987 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java @@ -22,72 +22,158 @@ import com.android.inputmethod.keyboard.ProximityInfo; import java.util.TreeMap; public class SpellCheckerProximityInfo { - final private static int NUL = KeyDetector.NOT_A_CODE; + /* public for test */ + final public static int NUL = KeyDetector.NOT_A_CODE; // This must be the same as MAX_PROXIMITY_CHARS_SIZE else it will not work inside // native code - this value is passed at creation of the binary object and reused // as the size of the passed array afterwards so they can't be different. final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE; - // This is a map from the code point to the index in the PROXIMITY array. - // At the time the native code to read the binary dictionary needs the proximity info be passed - // as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input character. - // Since we need to build such an array, we want to be able to search in our big proximity data - // quickly by character, and a map is probably the best way to do this. - final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); + // Helper methods + final protected static void buildProximityIndices(final int[] proximity, + final TreeMap<Integer, Integer> indices) { + for (int i = 0; i < proximity.length; i += ROW_SIZE) { + if (NUL != proximity[i]) indices.put(proximity[i], i); + } + } + final protected static int computeIndex(final int characterCode, + final TreeMap<Integer, Integer> indices) { + final Integer result = indices.get(characterCode); + if (null == result) return -1; + return result; + } - // The proximity here is the union of - // - the proximity for a QWERTY keyboard. - // - the proximity for an AZERTY keyboard. - // - the proximity for a QWERTZ keyboard. - // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other. - // - // The reasoning behind this construction is, almost any alphabetic text we may want - // to spell check has been entered with one of the keyboards above. Also, specifically - // to English, many spelling errors consist of the last vowel of the word being wrong - // because in English vowels tend to merge with each other in pronunciation. - final public static int[] PROXIMITY = { - 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, - 'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL, - 'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + private static class Latin { + // This is a map from the code point to the index in the PROXIMITY array. + // At the time the native code to read the binary dictionary needs the proximity info be + // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input + // character. + // Since we need to build such an array, we want to be able to search in our big proximity + // data quickly by character, and a map is probably the best way to do this. + final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); - 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, - 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, - 'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + // The proximity here is the union of + // - the proximity for a QWERTY keyboard. + // - the proximity for an AZERTY keyboard. + // - the proximity for a QWERTZ keyboard. + // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other. + // + // The reasoning behind this construction is, almost any alphabetic text we may want + // to spell check has been entered with one of the keyboards above. Also, specifically + // to English, many spelling errors consist of the last vowel of the word being wrong + // because in English vowels tend to merge with each other in pronunciation. + final static int[] PROXIMITY = { + 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, + 'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL, + 'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL, - 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - 'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, - }; - static { - for (int i = 0; i < PROXIMITY.length; i += ROW_SIZE) { - if (NUL != PROXIMITY[i]) INDICES.put(PROXIMITY[i], i); + 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, + 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, + 'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL, + 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + }; + static { + buildProximityIndices(PROXIMITY, INDICES); + } + static int getIndexOf(int characterCode) { + return computeIndex(characterCode, INDICES); } } - public static int getIndexOf(int characterCode) { - final Integer result = INDICES.get(characterCode); - if (null == result) return -1; - return result; + + private static class Cyrillic { + final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); + final static int[] PROXIMITY = { + // TODO: This table is solely based on the keyboard layout. Consult with Russian + // speakers on commonly misspelled words/letters. + 'й', 'ц', 'ф', 'ы', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ц', 'й', 'ф', 'ы', 'в', 'у', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'у', 'ц', 'ы', 'в', 'а', 'к', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'к', 'у', 'в', 'а', 'п', 'е', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'е', 'к', 'а', 'п', 'р', 'н', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'н', 'е', 'п', 'р', 'о', 'г', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'г', 'н', 'р', 'о', 'л', 'ш', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ш', 'г', 'о', 'л', 'д', 'щ', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'щ', 'ш', 'л', 'д', 'ж', 'з', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'з', 'щ', 'д', 'ж', 'э', 'х', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'х', 'з', 'ж', 'э', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + 'ф', 'й', 'ц', 'ы', 'я', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ы', 'й', 'ц', 'у', 'ф', 'в', 'я', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'в', 'ц', 'у', 'к', 'ы', 'а', 'я', 'ч', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'а', 'у', 'к', 'е', 'в', 'п', 'ч', 'с', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'п', 'к', 'е', 'н', 'а', 'р', 'с', 'м', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'р', 'е', 'н', 'г', 'п', 'о', 'м', 'и', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'о', 'н', 'г', 'ш', 'р', 'л', 'и', 'т', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'л', 'г', 'ш', 'щ', 'о', 'д', 'т', 'ь', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'д', 'ш', 'щ', 'з', 'л', 'ж', 'ь', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ж', 'щ', 'з', 'х', 'д', 'э', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'э', 'з', 'х', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + 'я', 'ф', 'ы', 'в', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ч', 'ы', 'в', 'а', 'я', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'с', 'в', 'а', 'п', 'ч', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'м', 'а', 'п', 'р', 'с', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'и', 'п', 'р', 'о', 'м', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'т', 'р', 'о', 'л', 'и', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ь', 'о', 'л', 'д', 'т', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'б', 'л', 'д', 'ж', 'ь', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ю', 'д', 'ж', 'э', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + }; + static { + buildProximityIndices(PROXIMITY, INDICES); + } + static int getIndexOf(int characterCode) { + return computeIndex(characterCode, INDICES); + } + } + + public static int[] getProximityForScript(final int script) { + switch (script) { + case AndroidSpellCheckerService.SCRIPT_LATIN: + return Latin.PROXIMITY; + case AndroidSpellCheckerService.SCRIPT_CYRILLIC: + return Cyrillic.PROXIMITY; + default: + throw new RuntimeException("Wrong script supplied: " + script); + } + } + public static int getIndexOfCodeForScript(final int characterCode, final int script) { + switch (script) { + case AndroidSpellCheckerService.SCRIPT_LATIN: + return Latin.getIndexOf(characterCode); + case AndroidSpellCheckerService.SCRIPT_CYRILLIC: + return Cyrillic.getIndexOf(characterCode); + default: + throw new RuntimeException("Wrong script supplied: " + script); + } } } |