aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java50
-rw-r--r--java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java45
-rw-r--r--java/src/com/android/inputmethod/latin/utils/ScriptUtils.java103
3 files changed, 112 insertions, 86 deletions
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
index 8d495646d..d0316242b 100644
--- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
+++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
@@ -37,6 +37,7 @@ import com.android.inputmethod.latin.utils.AdditionalSubtypeUtils;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.LocaleUtils;
+import com.android.inputmethod.latin.utils.ScriptUtils;
import com.android.inputmethod.latin.utils.StringUtils;
import java.lang.ref.WeakReference;
@@ -78,40 +79,8 @@ public final class AndroidSpellCheckerService extends SpellCheckerService
private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
new HashSet<>();
- public static final int SCRIPT_LATIN = 0;
- public static final int SCRIPT_CYRILLIC = 1;
- public static final int SCRIPT_GREEK = 2;
public static final String SINGLE_QUOTE = "\u0027";
public static final String APOSTROPHE = "\u2019";
- private static final TreeMap<String, Integer> mLanguageToScript;
- static {
- // List of the supported languages and their associated script. We won't check
- // words written in another script than the selected script, because we know we
- // don't have those in our dictionary so we will underline everything and we
- // will never have any suggestions, so it makes no sense checking them, and this
- // is done in {@link #shouldFilterOut}. Also, the script is used to choose which
- // proximity to pass to the dictionary descent algorithm.
- // IMPORTANT: this only contains languages - do not write countries in there.
- // Only the language is searched from the map.
- mLanguageToScript = new TreeMap<>();
- mLanguageToScript.put("cs", SCRIPT_LATIN);
- mLanguageToScript.put("da", SCRIPT_LATIN);
- mLanguageToScript.put("de", SCRIPT_LATIN);
- mLanguageToScript.put("el", SCRIPT_GREEK);
- mLanguageToScript.put("en", SCRIPT_LATIN);
- mLanguageToScript.put("es", SCRIPT_LATIN);
- mLanguageToScript.put("fi", SCRIPT_LATIN);
- mLanguageToScript.put("fr", SCRIPT_LATIN);
- mLanguageToScript.put("hr", SCRIPT_LATIN);
- mLanguageToScript.put("it", SCRIPT_LATIN);
- mLanguageToScript.put("lt", SCRIPT_LATIN);
- mLanguageToScript.put("lv", SCRIPT_LATIN);
- mLanguageToScript.put("nb", SCRIPT_LATIN);
- mLanguageToScript.put("nl", SCRIPT_LATIN);
- mLanguageToScript.put("pt", SCRIPT_LATIN);
- mLanguageToScript.put("sl", SCRIPT_LATIN);
- mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
- }
@Override public void onCreate() {
super.onCreate();
@@ -122,22 +91,13 @@ public final class AndroidSpellCheckerService extends SpellCheckerService
onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
}
- public static int getScriptFromLocale(final Locale locale) {
- final Integer script = mLanguageToScript.get(locale.getLanguage());
- if (null == script) {
- throw new RuntimeException("We have been called with an unsupported language: \""
- + locale.getLanguage() + "\". Framework bug?");
- }
- return script;
- }
-
private static String getKeyboardLayoutNameForScript(final int script) {
switch (script) {
- case AndroidSpellCheckerService.SCRIPT_LATIN:
+ case ScriptUtils.SCRIPT_LATIN:
return "qwerty";
- case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
+ case ScriptUtils.SCRIPT_CYRILLIC:
return "east_slavic";
- case AndroidSpellCheckerService.SCRIPT_GREEK:
+ case ScriptUtils.SCRIPT_GREEK:
return "greek";
default:
throw new RuntimeException("Wrong script supplied: " + script);
@@ -413,7 +373,7 @@ public final class AndroidSpellCheckerService extends SpellCheckerService
}
public DictAndKeyboard createDictAndKeyboard(final Locale locale) {
- final int script = getScriptFromLocale(locale);
+ final int script = ScriptUtils.getScriptFromLocale(locale);
final String keyboardLayoutName = getKeyboardLayoutNameForScript(script);
final InputMethodSubtype subtype = AdditionalSubtypeUtils.createAdditionalSubtype(
locale.toString(), keyboardLayoutName, null);
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
index 54eebe399..f1c3468a3 100644
--- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
+++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
@@ -36,6 +36,7 @@ import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer;
import com.android.inputmethod.latin.utils.CoordinateUtils;
import com.android.inputmethod.latin.utils.LocaleUtils;
+import com.android.inputmethod.latin.utils.ScriptUtils;
import com.android.inputmethod.latin.utils.StringUtils;
import java.util.ArrayList;
@@ -116,7 +117,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
final String localeString = getLocale();
mDictionaryPool = mService.getDictionaryPool(localeString);
mLocale = LocaleUtils.constructLocaleFromString(localeString);
- mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale);
+ mScript = ScriptUtils.getScriptFromLocale(mLocale);
}
@Override
@@ -125,44 +126,6 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
cres.unregisterContentObserver(mObserver);
}
- /*
- * Returns whether the code point is a letter that makes sense for the specified
- * locale for this spell checker.
- * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
- * and is limited to EFIGS languages and Russian.
- * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
- * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
- */
- private static boolean isLetterCheckableByLanguage(final int codePoint,
- final int script) {
- switch (script) {
- case AndroidSpellCheckerService.SCRIPT_LATIN:
- // Our supported latin script dictionaries (EFIGS) at the moment only include
- // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
- // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
- // so the below is a very efficient way to test for it. As for the 0-0x3F, it's
- // excluded from isLetter anyway.
- return codePoint <= 0x2AF && Character.isLetter(codePoint);
- case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
- // All Cyrillic characters are in the 400~52F block. There are some in the upper
- // Unicode range, but they are archaic characters that are not used in modern
- // Russian and are not used by our dictionary.
- return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
- case AndroidSpellCheckerService.SCRIPT_GREEK:
- // Greek letters are either in the 370~3FF range (Greek & Coptic), or in the
- // 1F00~1FFF range (Greek extended). Our dictionary contains both sort of characters.
- // Our dictionary also contains a few words with 0xF2; it would be best to check
- // if that's correct, but a web search does return results for these words so
- // they are probably okay.
- return (codePoint >= 0x370 && codePoint <= 0x3FF)
- || (codePoint >= 0x1F00 && codePoint <= 0x1FFF)
- || codePoint == 0xF2;
- default:
- // Should never come here
- throw new RuntimeException("Impossible value of script: " + script);
- }
- }
-
private static final int CHECKABILITY_CHECKABLE = 0;
private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
@@ -189,7 +152,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
// Filter by first letter
final int firstCodePoint = text.codePointAt(0);
// Filter out words that don't start with a letter or an apostrophe
- if (!isLetterCheckableByLanguage(firstCodePoint, script)
+ if (!ScriptUtils.isLetterCheckableByScript(firstCodePoint, script)
&& '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
// Filter contents
@@ -210,7 +173,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
if (Constants.CODE_PERIOD == codePoint) {
return CHECKABILITY_CONTAINS_PERIOD;
}
- if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
+ if (ScriptUtils.isLetterCheckableByScript(codePoint, script)) ++letterCount;
}
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters
// in this word are letters
diff --git a/java/src/com/android/inputmethod/latin/utils/ScriptUtils.java b/java/src/com/android/inputmethod/latin/utils/ScriptUtils.java
new file mode 100644
index 000000000..4dfb38d80
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/ScriptUtils.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.utils;
+
+import java.util.Locale;
+import java.util.TreeMap;
+
+/**
+ * A class to help with handling different writing scripts.
+ */
+public class ScriptUtils {
+ public static final int SCRIPT_LATIN = 0;
+ public static final int SCRIPT_CYRILLIC = 1;
+ public static final int SCRIPT_GREEK = 2;
+ public static final TreeMap<String, Integer> mLanguageToScript;
+ static {
+ // List of the supported languages and their associated script. We won't check
+ // words written in another script than the selected script, because we know we
+ // don't have those in our dictionary so we will underline everything and we
+ // will never have any suggestions, so it makes no sense checking them, and this
+ // is done in {@link #shouldFilterOut}. Also, the script is used to choose which
+ // proximity to pass to the dictionary descent algorithm.
+ // IMPORTANT: this only contains languages - do not write countries in there.
+ // Only the language is searched from the map.
+ mLanguageToScript = new TreeMap<>();
+ mLanguageToScript.put("cs", SCRIPT_LATIN);
+ mLanguageToScript.put("da", SCRIPT_LATIN);
+ mLanguageToScript.put("de", SCRIPT_LATIN);
+ mLanguageToScript.put("el", SCRIPT_GREEK);
+ mLanguageToScript.put("en", SCRIPT_LATIN);
+ mLanguageToScript.put("es", SCRIPT_LATIN);
+ mLanguageToScript.put("fi", SCRIPT_LATIN);
+ mLanguageToScript.put("fr", SCRIPT_LATIN);
+ mLanguageToScript.put("hr", SCRIPT_LATIN);
+ mLanguageToScript.put("it", SCRIPT_LATIN);
+ mLanguageToScript.put("lt", SCRIPT_LATIN);
+ mLanguageToScript.put("lv", SCRIPT_LATIN);
+ mLanguageToScript.put("nb", SCRIPT_LATIN);
+ mLanguageToScript.put("nl", SCRIPT_LATIN);
+ mLanguageToScript.put("pt", SCRIPT_LATIN);
+ mLanguageToScript.put("sl", SCRIPT_LATIN);
+ mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
+ }
+ /*
+ * Returns whether the code point is a letter that makes sense for the specified
+ * locale for this spell checker.
+ * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
+ * and is limited to EFIGS languages and Russian.
+ * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
+ * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
+ */
+ public static boolean isLetterCheckableByScript(final int codePoint, final int script) {
+ switch (script) {
+ case SCRIPT_LATIN:
+ // Our supported latin script dictionaries (EFIGS) at the moment only include
+ // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
+ // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
+ // so the below is a very efficient way to test for it. As for the 0-0x3F, it's
+ // excluded from isLetter anyway.
+ return codePoint <= 0x2AF && Character.isLetter(codePoint);
+ case SCRIPT_CYRILLIC:
+ // All Cyrillic characters are in the 400~52F block. There are some in the upper
+ // Unicode range, but they are archaic characters that are not used in modern
+ // Russian and are not used by our dictionary.
+ return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
+ case SCRIPT_GREEK:
+ // Greek letters are either in the 370~3FF range (Greek & Coptic), or in the
+ // 1F00~1FFF range (Greek extended). Our dictionary contains both sort of characters.
+ // Our dictionary also contains a few words with 0xF2; it would be best to check
+ // if that's correct, but a web search does return results for these words so
+ // they are probably okay.
+ return (codePoint >= 0x370 && codePoint <= 0x3FF)
+ || (codePoint >= 0x1F00 && codePoint <= 0x1FFF)
+ || codePoint == 0xF2;
+ default:
+ // Should never come here
+ throw new RuntimeException("Impossible value of script: " + script);
+ }
+ }
+
+ public static int getScriptFromLocale(final Locale locale) {
+ final Integer script = mLanguageToScript.get(locale.getLanguage());
+ if (null == script) {
+ throw new RuntimeException("We have been called with an unsupported language: \""
+ + locale.getLanguage() + "\". Framework bug?");
+ }
+ return script;
+ }
+}