diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java new file mode 100644 index 000000000..0103e8423 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerProximityInfo.java @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.spellcheck; + +import com.android.inputmethod.keyboard.KeyDetector; +import com.android.inputmethod.keyboard.ProximityInfo; + +import java.util.TreeMap; + +public class SpellCheckerProximityInfo { + /* public for test */ + final public static int NUL = KeyDetector.NOT_A_CODE; + + // This must be the same as MAX_PROXIMITY_CHARS_SIZE else it will not work inside + // native code - this value is passed at creation of the binary object and reused + // as the size of the passed array afterwards so they can't be different. + final public static int ROW_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE; + + // The number of keys in a row of the grid used by the spell checker. + final public static int PROXIMITY_GRID_WIDTH = 11; + // The number of rows in the grid used by the spell checker. + final public static int PROXIMITY_GRID_HEIGHT = 3; + + final private static int NOT_AN_INDEX = -1; + final public static int NOT_A_COORDINATE_PAIR = -1; + + // Helper methods + final protected static void buildProximityIndices(final int[] proximity, + final TreeMap<Integer, Integer> indices) { + for (int i = 0; i < proximity.length; i += ROW_SIZE) { + if (NUL != proximity[i]) indices.put(proximity[i], i / ROW_SIZE); + } + } + final protected static int computeIndex(final int characterCode, + final TreeMap<Integer, Integer> indices) { + final Integer result = indices.get(characterCode); + if (null == result) return NOT_AN_INDEX; + return result; + } + + private static class Latin { + // This is a map from the code point to the index in the PROXIMITY array. + // At the time the native code to read the binary dictionary needs the proximity info be + // passed as a flat array spaced by MAX_PROXIMITY_CHARS_SIZE columns, one for each input + // character. + // Since we need to build such an array, we want to be able to search in our big proximity + // data quickly by character, and a map is probably the best way to do this. + final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); + + // The proximity here is the union of + // - the proximity for a QWERTY keyboard. + // - the proximity for an AZERTY keyboard. + // - the proximity for a QWERTZ keyboard. + // ...plus, add all characters in the ('a', 'e', 'i', 'o', 'u') set to each other. + // + // The reasoning behind this construction is, almost any alphabetic text we may want + // to spell check has been entered with one of the keyboards above. Also, specifically + // to English, many spelling errors consist of the last vowel of the word being wrong + // because in English vowels tend to merge with each other in pronunciation. + final static int[] PROXIMITY = { + // Proximity for row 1. This must have exactly ROW_SIZE entries for each letter, + // and exactly PROXIMITY_GRID_WIDTH letters for a row. Pad with NUL's. + // The number of rows must be exactly PROXIMITY_GRID_HEIGHT. + 'q', 'w', 's', 'a', 'z', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'w', 'q', 'a', 's', 'd', 'e', 'x', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'e', 'w', 's', 'd', 'f', 'r', 'a', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, + 'r', 'e', 'd', 'f', 'g', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 't', 'r', 'f', 'g', 'h', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'y', 't', 'g', 'h', 'j', 'u', 'a', 's', 'd', 'x', NUL, NUL, NUL, NUL, NUL, NUL, + 'u', 'y', 'h', 'j', 'k', 'i', 'a', 'e', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'i', 'u', 'j', 'k', 'l', 'o', 'a', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'o', 'i', 'k', 'l', 'p', 'a', 'e', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'p', 'o', 'l', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + // Proximity for row 2. See comment above about size. + 'a', 'z', 'x', 's', 'w', 'q', 'e', 'i', 'o', 'u', NUL, NUL, NUL, NUL, NUL, NUL, + 's', 'q', 'a', 'z', 'x', 'c', 'd', 'e', 'w', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'd', 'w', 's', 'x', 'c', 'v', 'f', 'r', 'e', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'f', 'e', 'd', 'c', 'v', 'b', 'g', 't', 'r', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'g', 'r', 'f', 'v', 'b', 'n', 'h', 'y', 't', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'h', 't', 'g', 'b', 'n', 'm', 'j', 'u', 'y', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'j', 'y', 'h', 'n', 'm', 'k', 'i', 'u', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'k', 'u', 'j', 'm', 'l', 'o', 'i', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'l', 'i', 'k', 'p', 'o', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + // Proximity for row 3. See comment above about size. + 'z', 'a', 's', 'd', 'x', 't', 'g', 'h', 'j', 'u', 'q', 'e', NUL, NUL, NUL, NUL, + 'x', 'z', 'a', 's', 'd', 'c', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'c', 'x', 's', 'd', 'f', 'v', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'v', 'c', 'd', 'f', 'g', 'b', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'b', 'v', 'f', 'g', 'h', 'n', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'n', 'b', 'g', 'h', 'j', 'm', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'm', 'n', 'h', 'j', 'k', 'l', 'o', 'p', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + }; + static { + buildProximityIndices(PROXIMITY, INDICES); + } + static int getIndexOf(int characterCode) { + return computeIndex(characterCode, INDICES); + } + } + + private static class Cyrillic { + final private static TreeMap<Integer, Integer> INDICES = new TreeMap<Integer, Integer>(); + // TODO: The following table is solely based on the keyboard layout. Consult with Russian + // speakers on commonly misspelled words/letters. + final static int[] PROXIMITY = { + // Proximity for row 1. This must have exactly ROW_SIZE entries for each letter, + // and exactly PROXIMITY_GRID_WIDTH letters for a row. Pad with NUL's. + // The number of rows must be exactly PROXIMITY_GRID_HEIGHT. + 'й', 'ц', 'ф', 'ы', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ц', 'й', 'ф', 'ы', 'в', 'у', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'у', 'ц', 'ы', 'в', 'а', 'к', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'к', 'у', 'в', 'а', 'п', 'е', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'е', 'к', 'а', 'п', 'р', 'н', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'н', 'е', 'п', 'р', 'о', 'г', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'г', 'н', 'р', 'о', 'л', 'ш', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ш', 'г', 'о', 'л', 'д', 'щ', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'щ', 'ш', 'л', 'д', 'ж', 'з', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'з', 'щ', 'д', 'ж', 'э', 'х', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'х', 'з', 'ж', 'э', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + // Proximity for row 2. See comment above about size. + 'ф', 'й', 'ц', 'ы', 'я', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ы', 'й', 'ц', 'у', 'ф', 'в', 'я', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'в', 'ц', 'у', 'к', 'ы', 'а', 'я', 'ч', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'а', 'у', 'к', 'е', 'в', 'п', 'ч', 'с', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'п', 'к', 'е', 'н', 'а', 'р', 'с', 'м', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'р', 'е', 'н', 'г', 'п', 'о', 'м', 'и', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'о', 'н', 'г', 'ш', 'р', 'л', 'и', 'т', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'л', 'г', 'ш', 'щ', 'о', 'д', 'т', 'ь', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'д', 'ш', 'щ', 'з', 'л', 'ж', 'ь', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ж', 'щ', 'з', 'х', 'д', 'э', 'б', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'э', 'з', 'х', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + + // Proximity for row 3. See comment above about size. + 'я', 'ф', 'ы', 'в', 'ч', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ч', 'ы', 'в', 'а', 'я', 'с', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'с', 'в', 'а', 'п', 'ч', 'м', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'м', 'а', 'п', 'р', 'с', 'и', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'и', 'п', 'р', 'о', 'м', 'т', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'т', 'р', 'о', 'л', 'и', 'ь', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ь', 'о', 'л', 'д', 'т', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'б', 'л', 'д', 'ж', 'ь', 'ю', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + 'ю', 'д', 'ж', 'э', 'б', NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, NUL, + }; + static { + buildProximityIndices(PROXIMITY, INDICES); + } + static int getIndexOf(int characterCode) { + return computeIndex(characterCode, INDICES); + } + } + + public static int[] getProximityForScript(final int script) { + switch (script) { + case AndroidSpellCheckerService.SCRIPT_LATIN: + return Latin.PROXIMITY; + case AndroidSpellCheckerService.SCRIPT_CYRILLIC: + return Cyrillic.PROXIMITY; + default: + throw new RuntimeException("Wrong script supplied: " + script); + } + } + + private static int getIndexOfCodeForScript(final int codePoint, final int script) { + switch (script) { + case AndroidSpellCheckerService.SCRIPT_LATIN: + return Latin.getIndexOf(codePoint); + case AndroidSpellCheckerService.SCRIPT_CYRILLIC: + return Cyrillic.getIndexOf(codePoint); + default: + throw new RuntimeException("Wrong script supplied: " + script); + } + } + + // Returns (Y << 16) + X to avoid creating a temporary object. This is okay because + // X and Y are limited to PROXIMITY_GRID_WIDTH resp. PROXIMITY_GRID_HEIGHT which is very + // inferior to 1 << 16 + // As an exception, this returns NOT_A_COORDINATE_PAIR if the key is not on the grid + public static int getXYForCodePointAndScript(final int codePoint, final int script) { + final int index = getIndexOfCodeForScript(codePoint, script); + if (NOT_AN_INDEX == index) return NOT_A_COORDINATE_PAIR; + final int y = index / PROXIMITY_GRID_WIDTH; + final int x = index % PROXIMITY_GRID_WIDTH; + if (y > PROXIMITY_GRID_HEIGHT) { + // Safety check, should be entirely useless + throw new RuntimeException("Wrong y coordinate in spell checker proximity"); + } + return (y << 16) + x; + } +} |