aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/spellcheck
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/spellcheck')
-rw-r--r--java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java254
-rw-r--r--java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java198
2 files changed, 343 insertions, 109 deletions
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
index 095c2c51c..8ac82ee5b 100644
--- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
+++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
@@ -17,7 +17,9 @@
package com.android.inputmethod.latin.spellcheck;
import android.content.Intent;
+import android.content.SharedPreferences;
import android.content.res.Resources;
+import android.preference.PreferenceManager;
import android.service.textservice.SpellCheckerService;
import android.text.TextUtils;
import android.util.Log;
@@ -25,10 +27,10 @@ import android.view.textservice.SuggestionsInfo;
import android.view.textservice.TextInfo;
import com.android.inputmethod.compat.ArraysCompatUtils;
+import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.Dictionary;
-import com.android.inputmethod.latin.Dictionary.DataType;
import com.android.inputmethod.latin.Dictionary.WordCallback;
import com.android.inputmethod.latin.DictionaryCollection;
import com.android.inputmethod.latin.DictionaryFactory;
@@ -41,21 +43,27 @@ import com.android.inputmethod.latin.Utils;
import com.android.inputmethod.latin.WhitelistDictionary;
import com.android.inputmethod.latin.WordComposer;
+import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
+import java.util.HashSet;
/**
* Service for spell checking, using LatinIME's dictionaries and mechanisms.
*/
-public class AndroidSpellCheckerService extends SpellCheckerService {
+public class AndroidSpellCheckerService extends SpellCheckerService
+ implements SharedPreferences.OnSharedPreferenceChangeListener {
private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
private static final boolean DBG = false;
private static final int POOL_SIZE = 2;
+ public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts";
+
private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
private static final int CAPITALIZE_FIRST = 1; // First only
private static final int CAPITALIZE_ALL = 2; // All caps
@@ -82,15 +90,100 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
// The threshold for a candidate to be offered as a suggestion.
private double mSuggestionThreshold;
- // The threshold for a suggestion to be considered "likely".
- private double mLikelyThreshold;
+ // The threshold for a suggestion to be considered "recommended".
+ private double mRecommendedThreshold;
+ // Whether to use the contacts dictionary
+ private boolean mUseContactsDictionary;
+ private final Object mUseContactsLock = new Object();
+
+ private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
+ new HashSet<WeakReference<DictionaryCollection>>();
+
+ public static final int SCRIPT_LATIN = 0;
+ public static final int SCRIPT_CYRILLIC = 1;
+ private static final TreeMap<String, Integer> mLanguageToScript;
+ static {
+ // List of the supported languages and their associated script. We won't check
+ // words written in another script than the selected script, because we know we
+ // don't have those in our dictionary so we will underline everything and we
+ // will never have any suggestions, so it makes no sense checking them.
+ mLanguageToScript = new TreeMap<String, Integer>();
+ mLanguageToScript.put("en", SCRIPT_LATIN);
+ mLanguageToScript.put("fr", SCRIPT_LATIN);
+ mLanguageToScript.put("de", SCRIPT_LATIN);
+ mLanguageToScript.put("nl", SCRIPT_LATIN);
+ mLanguageToScript.put("cs", SCRIPT_LATIN);
+ mLanguageToScript.put("es", SCRIPT_LATIN);
+ mLanguageToScript.put("it", SCRIPT_LATIN);
+ mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
+ }
@Override public void onCreate() {
super.onCreate();
mSuggestionThreshold =
Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value));
- mLikelyThreshold =
- Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value));
+ mRecommendedThreshold =
+ Double.parseDouble(getString(R.string.spellchecker_recommended_threshold_value));
+ final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
+ prefs.registerOnSharedPreferenceChangeListener(this);
+ onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
+ }
+
+ private static int getScriptFromLocale(final Locale locale) {
+ final Integer script = mLanguageToScript.get(locale.getLanguage());
+ if (null == script) {
+ throw new RuntimeException("We have been called with an unsupported language: \""
+ + locale.getLanguage() + "\". Framework bug?");
+ }
+ return script;
+ }
+
+ @Override
+ public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
+ if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
+ synchronized(mUseContactsLock) {
+ mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true);
+ if (mUseContactsDictionary) {
+ startUsingContactsDictionaryLocked();
+ } else {
+ stopUsingContactsDictionaryLocked();
+ }
+ }
+ }
+
+ private void startUsingContactsDictionaryLocked() {
+ if (null == mContactsDictionary) {
+ mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
+ }
+ final Iterator<WeakReference<DictionaryCollection>> iterator =
+ mDictionaryCollectionsList.iterator();
+ while (iterator.hasNext()) {
+ final WeakReference<DictionaryCollection> dictRef = iterator.next();
+ final DictionaryCollection dict = dictRef.get();
+ if (null == dict) {
+ iterator.remove();
+ } else {
+ dict.addDictionary(mContactsDictionary);
+ }
+ }
+ }
+
+ private void stopUsingContactsDictionaryLocked() {
+ if (null == mContactsDictionary) return;
+ final SynchronouslyLoadedContactsDictionary contactsDict = mContactsDictionary;
+ mContactsDictionary = null;
+ final Iterator<WeakReference<DictionaryCollection>> iterator =
+ mDictionaryCollectionsList.iterator();
+ while (iterator.hasNext()) {
+ final WeakReference<DictionaryCollection> dictRef = iterator.next();
+ final DictionaryCollection dict = dictRef.get();
+ if (null == dict) {
+ iterator.remove();
+ } else {
+ dict.removeDictionary(contactsDict);
+ }
+ }
+ contactsDict.close();
}
@Override
@@ -110,10 +203,11 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
private static class SuggestionsGatherer implements WordCallback {
public static class Result {
public final String[] mSuggestions;
- public final boolean mHasLikelySuggestions;
- public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) {
+ public final boolean mHasRecommendedSuggestions;
+ public Result(final String[] gatheredSuggestions,
+ final boolean hasRecommendedSuggestions) {
mSuggestions = gatheredSuggestions;
- mHasLikelySuggestions = hasLikelySuggestions;
+ mHasRecommendedSuggestions = hasRecommendedSuggestions;
}
}
@@ -121,7 +215,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
private final int[] mScores;
private final String mOriginalText;
private final double mSuggestionThreshold;
- private final double mLikelyThreshold;
+ private final double mRecommendedThreshold;
private final int mMaxLength;
private int mLength = 0;
@@ -131,10 +225,10 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
private int mBestScore = Integer.MIN_VALUE; // As small as possible
SuggestionsGatherer(final String originalText, final double suggestionThreshold,
- final double likelyThreshold, final int maxLength) {
+ final double recommendedThreshold, final int maxLength) {
mOriginalText = originalText;
mSuggestionThreshold = suggestionThreshold;
- mLikelyThreshold = likelyThreshold;
+ mRecommendedThreshold = recommendedThreshold;
mMaxLength = maxLength;
mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
mScores = new int[mMaxLength];
@@ -142,7 +236,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
@Override
synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
- int dicTypeId, DataType dataType) {
+ int dicTypeId, int dataType) {
final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
// binarySearch returns the index if the element exists, and -<insertion index> - 1
// if it doesn't. See documentation for binarySearch.
@@ -175,7 +269,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
// make the threshold.
final String wordString = new String(word, wordOffset, wordLength);
final double normalizedScore =
- Utils.calcNormalizedScore(mOriginalText, wordString, score);
+ BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
if (normalizedScore < mSuggestionThreshold) {
if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
return true;
@@ -198,19 +292,19 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
public Result getResults(final int capitalizeType, final Locale locale) {
final String[] gatheredSuggestions;
- final boolean hasLikelySuggestions;
+ final boolean hasRecommendedSuggestions;
if (0 == mLength) {
// Either we found no suggestions, or we found some BUT the max length was 0.
// If we found some mBestSuggestion will not be null. If it is null, then
// we found none, regardless of the max length.
if (null == mBestSuggestion) {
gatheredSuggestions = null;
- hasLikelySuggestions = false;
+ hasRecommendedSuggestions = false;
} else {
gatheredSuggestions = EMPTY_STRING_ARRAY;
- final double normalizedScore =
- Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
- hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
+ final double normalizedScore = BinaryDictionary.calcNormalizedScore(
+ mOriginalText, mBestSuggestion, mBestScore);
+ hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
}
} else {
if (DBG) {
@@ -243,16 +337,17 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
final int bestScore = mScores[mLength - 1];
final CharSequence bestSuggestion = mSuggestions.get(0);
final double normalizedScore =
- Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
- hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
+ BinaryDictionary.calcNormalizedScore(
+ mOriginalText, bestSuggestion.toString(), bestScore);
+ hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
if (DBG) {
Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
Log.i(TAG, "Normalized score = " + normalizedScore
- + " (threshold " + mLikelyThreshold
- + ") => hasLikelySuggestions = " + hasLikelySuggestions);
+ + " (threshold " + mRecommendedThreshold
+ + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
}
}
- return new Result(gatheredSuggestions, hasLikelySuggestions);
+ return new Result(gatheredSuggestions, hasRecommendedSuggestions);
}
}
@@ -273,13 +368,15 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
for (Dictionary dict : oldWhitelistDictionaries.values()) {
dict.close();
}
- if (null != mContactsDictionary) {
- // The synchronously loaded contacts dictionary should have been in one
- // or several pools, but it is shielded against multiple closing and it's
- // safe to call it several times.
- final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary;
- mContactsDictionary = null;
- dictToClose.close();
+ synchronized(mUseContactsLock) {
+ if (null != mContactsDictionary) {
+ // The synchronously loaded contacts dictionary should have been in one
+ // or several pools, but it is shielded against multiple closing and it's
+ // safe to call it several times.
+ final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary;
+ mContactsDictionary = null;
+ dictToClose.close();
+ }
}
return false;
}
@@ -295,7 +392,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
}
public DictAndProximity createDictAndProximity(final Locale locale) {
- final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
+ final int script = getScriptFromLocale(locale);
+ final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
+ SpellCheckerProximityInfo.getProximityForScript(script));
final Resources resources = getResources();
final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
final DictionaryCollection dictionaryCollection =
@@ -314,11 +413,16 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
mWhitelistDictionaries.put(localeStr, whitelistDictionary);
}
dictionaryCollection.addDictionary(whitelistDictionary);
- if (null == mContactsDictionary) {
- mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
+ synchronized(mUseContactsLock) {
+ if (mUseContactsDictionary) {
+ if (null == mContactsDictionary) {
+ mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
+ }
+ }
+ dictionaryCollection.addDictionary(mContactsDictionary);
+ mDictionaryCollectionsList.add(
+ new WeakReference<DictionaryCollection>(dictionaryCollection));
}
- // TODO: add a setting to use or not contacts when checking spelling
- dictionaryCollection.addDictionary(mContactsDictionary);
return new DictAndProximity(dictionaryCollection, proximityInfo);
}
@@ -327,9 +431,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
// If the first char is not uppercase, then the word is either all lower case,
// and in either case we return CAPITALIZE_NONE.
if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
- final int len = text.codePointCount(0, text.length());
+ final int len = text.length();
int capsCount = 1;
- for (int i = 1; i < len; ++i) {
+ for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
if (1 != capsCount && i != capsCount) break;
if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
}
@@ -346,6 +450,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
private DictionaryPool mDictionaryPool;
// Likewise
private Locale mLocale;
+ // Cache this for performance
+ private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
private final AndroidSpellCheckerService mService;
@@ -358,17 +464,51 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
final String localeString = getLocale();
mDictionaryPool = mService.getDictionaryPool(localeString);
mLocale = LocaleUtils.constructLocaleFromString(localeString);
+ mScript = getScriptFromLocale(mLocale);
+ }
+
+ /*
+ * Returns whether the code point is a letter that makes sense for the specified
+ * locale for this spell checker.
+ * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
+ * and is limited to EFIGS languages and Russian.
+ * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
+ * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
+ */
+ private static boolean isLetterCheckableByLanguage(final int codePoint,
+ final int script) {
+ switch (script) {
+ case SCRIPT_LATIN:
+ // Our supported latin script dictionaries (EFIGS) at the moment only include
+ // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
+ // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
+ // so the below is a very efficient way to test for it. As for the 0-0x3F, it's
+ // excluded from isLetter anyway.
+ return codePoint <= 0x2AF && Character.isLetter(codePoint);
+ case SCRIPT_CYRILLIC:
+ // All Cyrillic characters are in the 400~52F block. There are some in the upper
+ // Unicode range, but they are archaic characters that are not used in modern
+ // russian and are not used by our dictionary.
+ return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
+ default:
+ // Should never come here
+ throw new RuntimeException("Impossible value of script: " + script);
+ }
}
/**
* Finds out whether a particular string should be filtered out of spell checking.
*
- * This will loosely match URLs, numbers, symbols.
+ * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
+ * we know we will never recognize, this accepts a script identifier that should be one
+ * of the SCRIPT_* constants defined above, to rule out quickly characters from very
+ * different languages.
*
* @param text the string to evaluate.
+ * @param script the identifier for the script this spell checker recognizes
* @return true if we should filter this text out, false otherwise
*/
- private boolean shouldFilterOut(final String text) {
+ private static boolean shouldFilterOut(final String text, final int script) {
if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
// TODO: check if an equivalent processing can't be done more quickly with a
@@ -376,20 +516,19 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
// Filter by first letter
final int firstCodePoint = text.codePointAt(0);
// Filter out words that don't start with a letter or an apostrophe
- if (!Character.isLetter(firstCodePoint)
+ if (!isLetterCheckableByLanguage(firstCodePoint, script)
&& '\'' != firstCodePoint) return true;
// Filter contents
final int length = text.length();
int letterCount = 0;
- for (int i = 0; i < length; ++i) {
+ for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
final int codePoint = text.codePointAt(i);
// Any word containing a '@' is probably an e-mail address
// Any word containing a '/' is probably either an ad-hoc combination of two
// words or a URI - in either case we don't want to spell check that
- if ('@' == codePoint
- || '/' == codePoint) return true;
- if (Character.isLetter(codePoint)) ++letterCount;
+ if ('@' == codePoint || '/' == codePoint) return true;
+ if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
}
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters
// in this word are letters
@@ -408,7 +547,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
try {
final String text = textInfo.getText();
- if (shouldFilterOut(text)) {
+ if (shouldFilterOut(text, mScript)) {
DictAndProximity dictInfo = null;
try {
dictInfo = mDictionaryPool.takeOrGetNull();
@@ -426,17 +565,23 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
// TODO: Don't gather suggestions if the limit is <= 0 unless necessary
final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
- mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit);
+ mService.mSuggestionThreshold, mService.mRecommendedThreshold,
+ suggestionsLimit);
final WordComposer composer = new WordComposer();
final int length = text.length();
- for (int i = 0; i < length; ++i) {
+ for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
final int character = text.codePointAt(i);
- final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
+ final int proximityIndex =
+ SpellCheckerProximityInfo.getIndexOfCodeForScript(character, mScript);
final int[] proximities;
if (-1 == proximityIndex) {
proximities = new int[] { character };
} else {
- proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
+ // TODO: an initial examination seems to reveal this is actually used
+ // read-only. It should be possible to compute the arrays statically once
+ // and skip doing a copy each time here.
+ proximities = Arrays.copyOfRange(
+ SpellCheckerProximityInfo.getProximityForScript(mScript),
proximityIndex,
proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
}
@@ -475,7 +620,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
+ suggestionsLimit);
Log.i(TAG, "IsInDict = " + isInDict);
Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
- Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions);
+ Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
if (null != result.mSuggestions) {
for (String suggestion : result.mSuggestions) {
Log.i(TAG, suggestion);
@@ -483,10 +628,13 @@ public class AndroidSpellCheckerService extends SpellCheckerService {
}
}
- // TODO: actually use result.mHasLikelySuggestions
final int flags =
(isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
- : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO);
+ : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
+ | (result.mHasRecommendedSuggestions
+ ? SuggestionsInfoCompatUtils
+ .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
+ : 0);
return new SuggestionsInfo(flags, result.mSuggestions);
} catch (RuntimeException e) {
// Don't kill the keyboard if there is a bug in the spell checker
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java
index d5b04b27c..db3544987 100644
--- a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java
+++ b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java
@@ -22,72 +22,158 @@ import com.android.inputmethod.keyboard.ProximityInfo;
import java.util.TreeMap;
public class SpellCheckerProximityInfo {
- final private static int NUL = KeyDetector.NOT_A_CODE;
+ /* public for test */
+ final public static int NUL = KeyDetector.NOT_A_CODE;
// This must be the same as MAX_PROXIMITY_CHARS_SIZE else it will not work inside
// native code - this value is passed at creation of the binary object and reused
// as the size of the passed array afterwards so they can't be different.
final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE;
- // This is a map from the code point to the index in the PROXIMITY array.
- // At the time the native code to read the binary dictionary needs the proximity info be passed
- // as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input character.
- // Since we need to build such an array, we want to be able to search in our big proximity data
- // quickly by character, and a map is probably the best way to do this.
- final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
+ // Helper methods
+ final protected static void buildProximityIndices(final int[] proximity,
+ final TreeMap<Integer, Integer> indices) {
+ for (int i = 0; i < proximity.length; i += ROW_SIZE) {
+ if (NUL != proximity[i]) indices.put(proximity[i], i);
+ }
+ }
+ final protected static int computeIndex(final int characterCode,
+ final TreeMap<Integer, Integer> indices) {
+ final Integer result = indices.get(characterCode);
+ if (null == result) return -1;
+ return result;
+ }
- // The proximity here is the union of
- // - the proximity for a QWERTY keyboard.
- // - the proximity for an AZERTY keyboard.
- // - the proximity for a QWERTZ keyboard.
- // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other.
- //
- // The reasoning behind this construction is, almost any alphabetic text we may want
- // to spell check has been entered with one of the keyboards above. Also, specifically
- // to English, many spelling errors consist of the last vowel of the word being wrong
- // because in English vowels tend to merge with each other in pronunciation.
- final public static int[] PROXIMITY = {
- 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
- 'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL,
- 'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ private static class Latin {
+ // This is a map from the code point to the index in the PROXIMITY array.
+ // At the time the native code to read the binary dictionary needs the proximity info be
+ // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input
+ // character.
+ // Since we need to build such an array, we want to be able to search in our big proximity
+ // data quickly by character, and a map is probably the best way to do this.
+ final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
- 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
- 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL,
- 'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ // The proximity here is the union of
+ // - the proximity for a QWERTY keyboard.
+ // - the proximity for an AZERTY keyboard.
+ // - the proximity for a QWERTZ keyboard.
+ // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other.
+ //
+ // The reasoning behind this construction is, almost any alphabetic text we may want
+ // to spell check has been entered with one of the keyboards above. Also, specifically
+ // to English, many spelling errors consist of the last vowel of the word being wrong
+ // because in English vowels tend to merge with each other in pronunciation.
+ final static int[] PROXIMITY = {
+ 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
+ 'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL,
+ 'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL,
- 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- 'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
- };
- static {
- for (int i = 0; i < PROXIMITY.length; i += ROW_SIZE) {
- if (NUL != PROXIMITY[i]) INDICES.put(PROXIMITY[i], i);
+ 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL,
+ 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL,
+ 'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+
+ 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL,
+ 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ };
+ static {
+ buildProximityIndices(PROXIMITY, INDICES);
+ }
+ static int getIndexOf(int characterCode) {
+ return computeIndex(characterCode, INDICES);
}
}
- public static int getIndexOf(int characterCode) {
- final Integer result = INDICES.get(characterCode);
- if (null == result) return -1;
- return result;
+
+ private static class Cyrillic {
+ final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>();
+ final static int[] PROXIMITY = {
+ // TODO: This table is solely based on the keyboard layout. Consult with Russian
+ // speakers on commonly misspelled words/letters.
+ 'й', 'ц', 'ф', 'ы', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ц', 'й', 'ф', 'ы', 'в', 'у', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'у', 'ц', 'ы', 'в', 'а', 'к', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'к', 'у', 'в', 'а', 'п', 'е', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'е', 'к', 'а', 'п', 'р', 'н', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'н', 'е', 'п', 'р', 'о', 'г', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'г', 'н', 'р', 'о', 'л', 'ш', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ш', 'г', 'о', 'л', 'д', 'щ', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'щ', 'ш', 'л', 'д', 'ж', 'з', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'з', 'щ', 'д', 'ж', 'э', 'х', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'х', 'з', 'ж', 'э', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+
+ 'ф', 'й', 'ц', 'ы', 'я', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ы', 'й', 'ц', 'у', 'ф', 'в', 'я', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'в', 'ц', 'у', 'к', 'ы', 'а', 'я', 'ч', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'а', 'у', 'к', 'е', 'в', 'п', 'ч', 'с', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'п', 'к', 'е', 'н', 'а', 'р', 'с', 'м', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'р', 'е', 'н', 'г', 'п', 'о', 'м', 'и', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'о', 'н', 'г', 'ш', 'р', 'л', 'и', 'т', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'л', 'г', 'ш', 'щ', 'о', 'д', 'т', 'ь', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'д', 'ш', 'щ', 'з', 'л', 'ж', 'ь', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ж', 'щ', 'з', 'х', 'д', 'э', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'э', 'з', 'х', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+
+ 'я', 'ф', 'ы', 'в', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ч', 'ы', 'в', 'а', 'я', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'с', 'в', 'а', 'п', 'ч', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'м', 'а', 'п', 'р', 'с', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'и', 'п', 'р', 'о', 'м', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'т', 'р', 'о', 'л', 'и', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ь', 'о', 'л', 'д', 'т', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'б', 'л', 'д', 'ж', 'ь', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ 'ю', 'д', 'ж', 'э', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL,
+ };
+ static {
+ buildProximityIndices(PROXIMITY, INDICES);
+ }
+ static int getIndexOf(int characterCode) {
+ return computeIndex(characterCode, INDICES);
+ }
+ }
+
+ public static int[] getProximityForScript(final int script) {
+ switch (script) {
+ case AndroidSpellCheckerService.SCRIPT_LATIN:
+ return Latin.PROXIMITY;
+ case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
+ return Cyrillic.PROXIMITY;
+ default:
+ throw new RuntimeException("Wrong script supplied: " + script);
+ }
+ }
+ public static int getIndexOfCodeForScript(final int characterCode, final int script) {
+ switch (script) {
+ case AndroidSpellCheckerService.SCRIPT_LATIN:
+ return Latin.getIndexOf(characterCode);
+ case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
+ return Cyrillic.getIndexOf(characterCode);
+ default:
+ throw new RuntimeException("Wrong script supplied: " + script);
+ }
}
}