diff options
author | 2011-12-05 17:35:37 +0900 | |
---|---|---|
committer | 2011-12-05 18:57:03 +0900 | |
commit | bb2b30fc7ff31182d314e4db9baf1913bf08522d (patch) | |
tree | 86be6ecd70ede02ac8975d5c54c8d0095ce76512 | |
parent | c410e243e983e95413dc2fc855a25fc0ede4bf3c (diff) | |
download | latinime-bb2b30fc7ff31182d314e4db9baf1913bf08522d.tar.gz latinime-bb2b30fc7ff31182d314e4db9baf1913bf08522d.tar.xz latinime-bb2b30fc7ff31182d314e4db9baf1913bf08522d.zip |
Add support for Russian spell checking.
Note that this is not enough: we still need to create a
reasonable proximity table for Cyrillic characters, or we
won't be able to show up suggestions.
Bug: 5701241
Change-Id: Idb141f7a230a6e1a46094308c26f43c01ab3b97a
-rw-r--r-- | java/res/xml/spellchecker.xml | 12 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java | 75 |
2 files changed, 69 insertions, 18 deletions
diff --git a/java/res/xml/spellchecker.xml b/java/res/xml/spellchecker.xml index d1269d878..b48dc52cd 100644 --- a/java/res/xml/spellchecker.xml +++ b/java/res/xml/spellchecker.xml @@ -43,4 +43,16 @@ android:label="@string/subtype_generic" android:subtypeLocale="es" /> + <subtype + android:label="@string/subtype_generic" + android:subtypeLocale="ru" + /> + <subtype + android:label="@string/subtype_generic" + android:subtypeLocale="cs" + /> + <subtype + android:label="@string/subtype_generic" + android:subtypeLocale="nl" + /> </spell-checker> diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 9dfbe7a54..a89ef001e 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -415,10 +415,31 @@ public class AndroidSpellCheckerService extends SpellCheckerService } private static class AndroidSpellCheckerSession extends Session { + private static final int SCRIPT_LATIN = 0; + private static final int SCRIPT_CYRILLIC = 1; + private static final TreeMap<String, Integer> mLanguageToScript; + static { + // List of the supported languages and their associated script. We won't check + // words written in another script than the selected script, because we know we + // don't have those in our dictionary so we will underline everything and we + // will never have any suggestions, so it makes no sense checking them. + mLanguageToScript = new TreeMap<String, Integer>(); + mLanguageToScript.put("en", SCRIPT_LATIN); + mLanguageToScript.put("fr", SCRIPT_LATIN); + mLanguageToScript.put("de", SCRIPT_LATIN); + mLanguageToScript.put("nl", SCRIPT_LATIN); + mLanguageToScript.put("cs", SCRIPT_LATIN); + mLanguageToScript.put("es", SCRIPT_LATIN); + mLanguageToScript.put("it", SCRIPT_LATIN); + mLanguageToScript.put("ru", SCRIPT_CYRILLIC); + } + // Immutable, but need the locale which is not available in the constructor yet private DictionaryPool mDictionaryPool; // Likewise private Locale mLocale; + // Cache this for performance + private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. private final AndroidSpellCheckerService mService; @@ -431,38 +452,56 @@ public class AndroidSpellCheckerService extends SpellCheckerService final String localeString = getLocale(); mDictionaryPool = mService.getDictionaryPool(localeString); mLocale = LocaleUtils.constructLocaleFromString(localeString); + final Integer script = mLanguageToScript.get(mLocale.getLanguage()); + if (null == script) { + throw new RuntimeException("We have been called with an unsupported language: \"" + + mLocale.getLanguage() + "\". Framework bug?"); + } + mScript = script; } /* * Returns whether the code point is a letter that makes sense for the specified * locale for this spell checker. * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml - * and is limited to EFIGS language. - * Hence at the moment this explicitly excludes non-Latin scripts, including CJK - * characters, but also Cyrillic, Arabic or Hebrew characters. - * The locale should be used to rule out inappropriate characters when we support - * spellchecking other languages like Russian. + * and is limited to EFIGS languages and Russian. + * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters + * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. */ private static boolean isLetterCheckableByLanguage(final int codePoint, - final Locale locale) { - // Our supported dictionaries (EFIGS) at the moment only includes characters - // in the C0, C1, Latin Extended A and B, IPA extensions unicode blocks. - // As it happens, those are back-to-back in the code range 0x40 to 0x2AF, so - // the below is a very efficient way to test for it. As for the 0-0x3F, it's - // excluded from isLetter anyway. - // TODO: change this to use locale when we support other scripts - return codePoint <= 0x2AF && Character.isLetter(codePoint); + final int script) { + switch (script) { + case SCRIPT_LATIN: + // Our supported latin script dictionaries (EFIGS) at the moment only include + // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode + // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, + // so the below is a very efficient way to test for it. As for the 0-0x3F, it's + // excluded from isLetter anyway. + return codePoint <= 0x2AF && Character.isLetter(codePoint); + case SCRIPT_CYRILLIC: + // All Cyrillic characters are in the 400~52F block. There are some in the upper + // Unicode range, but they are archaic characters that are not used in modern + // russian and are not used by our dictionary. + return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); + default: + // Should never come here + throw new RuntimeException("Impossible value of script: " + script); + } } /** * Finds out whether a particular string should be filtered out of spell checking. * - * This will loosely match URLs, numbers, symbols. + * This will loosely match URLs, numbers, symbols. To avoid always underlining words that + * we know we will never recognize, this accepts a script identifier that should be one + * of the SCRIPT_* constants defined above, to rule out quickly characters from very + * different languages. * * @param text the string to evaluate. + * @param script the identifier for the script this spell checker recognizes * @return true if we should filter this text out, false otherwise */ - private static boolean shouldFilterOut(final String text, final Locale locale) { + private static boolean shouldFilterOut(final String text, final int script) { if (TextUtils.isEmpty(text) || text.length() <= 1) return true; // TODO: check if an equivalent processing can't be done more quickly with a @@ -470,7 +509,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService // Filter by first letter final int firstCodePoint = text.codePointAt(0); // Filter out words that don't start with a letter or an apostrophe - if (!isLetterCheckableByLanguage(firstCodePoint, locale) + if (!isLetterCheckableByLanguage(firstCodePoint, script) && '\'' != firstCodePoint) return true; // Filter contents @@ -483,7 +522,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService // words or a URI - in either case we don't want to spell check that if ('@' == codePoint || '/' == codePoint) return true; - if (isLetterCheckableByLanguage(codePoint, locale)) ++letterCount; + if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; } // Guestimate heuristic: perform spell checking if at least 3/4 of the characters // in this word are letters @@ -502,7 +541,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService try { final String text = textInfo.getText(); - if (shouldFilterOut(text, mLocale)) { + if (shouldFilterOut(text, mScript)) { DictAndProximity dictInfo = null; try { dictInfo = mDictionaryPool.takeOrGetNull(); |