diff options
38 files changed, 1070 insertions, 280 deletions
diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml index fb341acc3..201fc7030 100644 --- a/java/res/values/strings.xml +++ b/java/res/values/strings.xml @@ -350,10 +350,10 @@ <!-- Inform the user that a particular language has an available dictionary --> <string name="has_dictionary">Dictionary available</string> - <!-- Preferences item for enabling to send user statistics to Google --> + <!-- Preferences item for enabling to send user statistics for development only diagnostics --> <string name="prefs_enable_log">Enable user feedback</string> - <!-- Description for enabling to send user statistics to Google --> - <string name="prefs_description_log">Help improve this input method editor by automatically sending usage statistics and crash reports to Google.</string> + <!-- Description for enabling to send user statistics for development only diagnostics --> + <string name="prefs_description_log">Help improve this input method editor by automatically sending usage statistics and crash reports</string> <!-- Title of the item to change the keyboard theme [CHAR LIMIT=20]--> <string name="keyboard_layout">Keyboard theme</string> @@ -422,12 +422,12 @@ <!-- Title of an option for usability study mode --> <string name="prefs_usability_study_mode">Usability study mode</string> - <!-- Title of the settings for key long press delay --> - <string name="prefs_key_longpress_timeout_settings">Key long press delay settings</string> - <!-- Title of the settings for keypress vibration duration --> - <string name="prefs_keypress_vibration_duration_settings">Keypress vibration duration settings</string> - <!-- Title of the settings for keypress sound volume --> - <string name="prefs_keypress_sound_volume_settings">Keypress sound volume settings</string> + <!-- Title of the settings for key long press delay [CHAR LIMIT=30] --> + <string name="prefs_key_longpress_timeout_settings">Key long press delay</string> + <!-- Title of the settings for keypress vibration duration [CHAR LIMIT=30] --> + <string name="prefs_keypress_vibration_duration_settings">Keypress vibration duration</string> + <!-- Title of the settings for keypress sound volume [CHAR LIMIT=30] --> + <string name="prefs_keypress_sound_volume_settings">Keypress sound volume</string> <!-- Title of the settings for reading an external dictionary file --> <string name="prefs_read_external_dictionary">Read external dictionary file</string> <!-- Message to show when there are no files to install as an external dictionary [CHAR LIMIT=100] --> @@ -498,9 +498,9 @@ <!-- Message about some dictionary indicating the file is installed, but the dictionary is disabled --> <string name="dictionary_disabled">Installed, disabled</string> - <!-- Message to display in the dictionaries setting screen when some error prevented us to list installed dictionaries [CHAR LIMIT=50] --> + <!-- Message to display in the dictionaries setting screen when some error prevented us to list installed dictionaries [CHAR LIMIT=20] --> <string name="cannot_connect_to_dict_service">Problem connecting to dictionary service</string> - <!-- Message to display in the dictionaries setting screen when we found that no dictionaries are available [CHAR LIMIT=50]--> + <!-- Message to display in the dictionaries setting screen when we found that no dictionaries are available [CHAR LIMIT=20]--> <string name="no_dictionaries_available">No dictionaries available</string> <!-- Title of the options to press to refresh the list (as in, check for updates now) [CHAR_LIMIT=50] --> diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index 90c3fcdd2..dcb514a5e 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -22,6 +22,10 @@ import java.util.ArrayList; import java.util.Locale; public final class StringUtils { + public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case + public static final int CAPITALIZE_FIRST = 1; // First only + public static final int CAPITALIZE_ALL = 2; // All caps + private StringUtils() { // This utility class is not publicly instantiable. } @@ -171,4 +175,31 @@ public final class StringUtils { } return list.toArray(new String[list.size()]); } + + // This method assumes the text is not empty or null. + public static int getCapitalizationType(final String text) { + // If the first char is not uppercase, then the word is either all lower case or + // camel case, and in either case we return CAPITALIZE_NONE. + if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; + final int len = text.length(); + int capsCount = 1; + int letterCount = 1; + for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { + if (1 != capsCount && letterCount != capsCount) break; + final int codePoint = text.codePointAt(i); + if (Character.isUpperCase(codePoint)) { + ++capsCount; + ++letterCount; + } else if (Character.isLetter(codePoint)) { + // We need to discount non-letters since they may not be upper-case, but may + // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME") + ++letterCount; + } + } + // We know the first char is upper case. So we want to test if either every letter other + // than the first is lower case, or if they are all upper case. If the string is exactly + // one char long, then we will arrive here with letterCount 1, and this is correct, too. + if (1 == capsCount) return CAPITALIZE_FIRST; + return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); + } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 38a26486d..8d3b062ff 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -58,10 +58,6 @@ public final class AndroidSpellCheckerService extends SpellCheckerService public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; - public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case - public static final int CAPITALIZE_FIRST = 1; // First only - public static final int CAPITALIZE_ALL = 2; // All caps - private final static String[] EMPTY_STRING_ARRAY = new String[0]; private Map<String, DictionaryPool> mDictionaryPools = CollectionUtils.newSynchronizedTreeMap(); private Map<String, UserBinaryDictionary> mUserDictionaries = @@ -325,13 +321,13 @@ public final class AndroidSpellCheckerService extends SpellCheckerService } Collections.reverse(mSuggestions); StringUtils.removeDupes(mSuggestions); - if (CAPITALIZE_ALL == capitalizeType) { + if (StringUtils.CAPITALIZE_ALL == capitalizeType) { for (int i = 0; i < mSuggestions.size(); ++i) { // get(i) returns a CharSequence which is actually a String so .toString() // should return the same object. mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); } - } else if (CAPITALIZE_FIRST == capitalizeType) { + } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) { for (int i = 0; i < mSuggestions.size(); ++i) { // Likewise mSuggestions.set(i, StringUtils.toTitleCase( @@ -438,31 +434,4 @@ public final class AndroidSpellCheckerService extends SpellCheckerService } return new DictAndProximity(dictionaryCollection, proximityInfo); } - - // This method assumes the text is not empty or null. - public static int getCapitalizationType(String text) { - // If the first char is not uppercase, then the word is either all lower case, - // and in either case we return CAPITALIZE_NONE. - if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; - final int len = text.length(); - int capsCount = 1; - int letterCount = 1; - for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { - if (1 != capsCount && letterCount != capsCount) break; - final int codePoint = text.codePointAt(i); - if (Character.isUpperCase(codePoint)) { - ++capsCount; - ++letterCount; - } else if (Character.isLetter(codePoint)) { - // We need to discount non-letters since they may not be upper-case, but may - // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME") - ++letterCount; - } - } - // We know the first char is upper case. So we want to test if either every letter other - // than the first is lower case, or if they are all upper case. If the string is exactly - // one char long, then we will arrive here with letterCount 1, and this is correct, too. - if (1 == capsCount) return CAPITALIZE_FIRST; - return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); - } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java index 4f86a3175..b15063235 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java @@ -150,7 +150,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { // Greek letters are either in the 370~3FF range (Greek & Coptic), or in the // 1F00~1FFF range (Greek extended). Our dictionary contains both sort of characters. // Our dictionary also contains a few words with 0xF2; it would be best to check - // if that's correct, but a Google search does return results for these words so + // if that's correct, but a web search does return results for these words so // they are probably okay. return (codePoint >= 0x370 && codePoint <= 0x3FF) || (codePoint >= 0x1F00 && codePoint <= 0x1FFF) @@ -214,14 +214,14 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { // If the word is in there as is, then it's in the dictionary. If not, we'll test lower // case versions, but only if the word is not already all-lower case or mixed case. if (dict.isValidWord(text)) return true; - if (AndroidSpellCheckerService.CAPITALIZE_NONE == capitalizeType) return false; + if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; // If we come here, we have a capitalized word (either First- or All-). // Downcase the word and look it up again. If the word is only capitalized, we // tested all possibilities, so if it's still negative we can return false. final String lowerCaseText = text.toLowerCase(mLocale); if (dict.isValidWord(lowerCaseText)) return true; - if (AndroidSpellCheckerService.CAPITALIZE_FIRST == capitalizeType) return false; + if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; // If the lower case version is not in the dictionary, it's still possible // that we have an all-caps version of a word that needs to be capitalized @@ -296,7 +296,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session { } } - final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text); + final int capitalizeType = StringUtils.getCapitalizationType(text); boolean isInDict = true; DictAndProximity dictInfo = null; try { diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 01a8c3392..cbe9515fe 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -24,15 +24,8 @@ FLAG_DO_PROFILE ?= false include $(CLEAR_VARS) LATIN_IME_SRC_DIR := src -LATIN_IME_SRC_FULLPATH_DIR := $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR) -LOCAL_C_INCLUDES += \ - $(LATIN_IME_SRC_FULLPATH_DIR) \ - $(LATIN_IME_SRC_FULLPATH_DIR)/suggest \ - $(LATIN_IME_SRC_FULLPATH_DIR)/suggest/core/dicnode \ - $(LATIN_IME_SRC_FULLPATH_DIR)/suggest/core/policy \ - $(LATIN_IME_SRC_FULLPATH_DIR)/suggest/core/session \ - $(LATIN_IME_SRC_FULLPATH_DIR)/suggest/policyimpl/typing +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR) LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \ -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls -Wno-system-headers @@ -59,23 +52,27 @@ LATIN_IME_CORE_SRC_FILES := \ correction.cpp \ dictionary.cpp \ dic_traverse_wrapper.cpp \ + digraph_utils.cpp \ proximity_info.cpp \ proximity_info_params.cpp \ proximity_info_state.cpp \ proximity_info_state_utils.cpp \ unigram_dictionary.cpp \ words_priority_queue.cpp \ - suggest/core/dicnode/dic_node.cpp \ - suggest/core/dicnode/dic_nodes_cache.cpp \ - suggest/core/dicnode/dic_node_utils.cpp \ + suggest/core/suggest.cpp \ + $(addprefix suggest/core/dicnode/, \ + dic_node.cpp \ + dic_node_utils.cpp \ + dic_nodes_cache.cpp) \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ - suggest/policyimpl/typing/scoring_params.cpp \ - suggest/policyimpl/typing/typing_scoring.cpp \ - suggest/policyimpl/typing/typing_traversal.cpp \ - suggest/policyimpl/typing/typing_weighting.cpp \ - suggest/gesture_suggest.cpp \ - suggest/typing_suggest.cpp + suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ + $(addprefix suggest/policyimpl/typing/, \ + scoring_params.cpp \ + typing_scoring.cpp \ + typing_suggest_policy.cpp \ + typing_traversal.cpp \ + typing_weighting.cpp) LOCAL_SRC_FILES := \ $(LATIN_IME_JNI_SRC_FILES) \ @@ -130,6 +127,4 @@ include $(BUILD_SHARED_LIBRARY) #################### Clean up the tmp vars LATIN_IME_CORE_SRC_FILES := LATIN_IME_JNI_SRC_FILES := -LATIN_IME_GESTURE_IMPL_SRC_FILES := LATIN_IME_SRC_DIR := -LATIN_IME_SRC_FULLPATH_DIR := diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp index 1ea204102..8e5c50880 100644 --- a/native/jni/jni_common.cpp +++ b/native/jni/jni_common.cpp @@ -16,12 +16,12 @@ #define LOG_TAG "LatinIME: jni" +#include "jni_common.h" + #include "com_android_inputmethod_keyboard_ProximityInfo.h" #include "com_android_inputmethod_latin_BinaryDictionary.h" #include "com_android_inputmethod_latin_DicTraverseSession.h" #include "defines.h" -#include "jni.h" -#include "jni_common.h" /* * Returns the JNI version on success, -1 on failure. diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 43e59a262..92890383a 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -39,7 +39,7 @@ BigramDictionary::~BigramDictionary() { void BigramDictionary::addWordBigram(int *word, int length, int probability, int *bigramProbability, int *bigramCodePoints, int *outputTypes) const { word[length] = 0; - if (DEBUG_DICT) { + if (DEBUG_DICT_FULL) { #ifdef FLAG_DBG char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]); @@ -57,7 +57,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int } insertAt++; } - if (DEBUG_DICT) { + if (DEBUG_DICT_FULL) { AKLOGI("Bigram: InsertAt -> %d MAX_RESULTS: %d", insertAt, MAX_RESULTS); } if (insertAt >= MAX_RESULTS) { @@ -76,7 +76,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int *dest++ = *word++; } *dest = 0; // NULL terminate - if (DEBUG_DICT) { + if (DEBUG_DICT_FULL) { AKLOGI("Bigram: Added word at %d", insertAt); } } diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 671507ee0..76234f840 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -954,7 +954,13 @@ inline static int editDistanceInternal(int *editDistanceTable, const int *before // In dictionary.cpp, getSuggestion() method, -// suggestion scores are computed using the below formula. +// When USE_SUGGEST_INTERFACE_FOR_TYPING is true: +// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert +// them to integers. +// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE) +// Undo the scaling here to recover the original score. +// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE +// Otherwise: suggestion scores are computed using the below formula. // original score // := powf(mTypedLetterMultiplier (this is defined 2), // (the number of matched characters between typed word and suggested word)) @@ -991,16 +997,20 @@ inline static int editDistanceInternal(int *editDistanceTable, const int *before return 0.0f; } + // add a weight based on edit distance. + // distance <= max(afterLength, beforeLength) == afterLength, + // so, 0 <= distance / afterLength <= 1 + const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); + + if (USE_SUGGEST_INTERFACE_FOR_TYPING) { + return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight; + } const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX) : static_cast<float>(MAX_INITIAL_SCORE) * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), static_cast<float>(min(beforeLength, afterLength - spaceCount))) * static_cast<float>(FULL_WORD_MULTIPLIER); - // add a weight based on edit distance. - // distance <= max(afterLength, beforeLength) == afterLength, - // so, 0 <= distance / afterLength <= 1 - const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); return (static_cast<float>(score) / maxScore) * weight; } } // namespace latinime diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 6e098157d..a45691261 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -287,6 +287,7 @@ static inline void prof_out(void) { #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true #define SUGGEST_MULTIPLE_WORDS true +#define USE_SUGGEST_INTERFACE_FOR_TYPING true #define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 6deab36b6..ed6ddb517 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -16,14 +16,18 @@ #define LOG_TAG "LatinIME: dictionary.cpp" +#include "dictionary.h" + +#include <map> // TODO: remove #include <stdint.h> #include "bigram_dictionary.h" #include "binary_format.h" #include "defines.h" -#include "dictionary.h" #include "dic_traverse_wrapper.h" -#include "gesture_suggest.h" +#include "suggest/core/suggest.h" +#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" +#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" #include "unigram_dictionary.h" namespace latinime { @@ -34,13 +38,15 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust) mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), mUnigramDictionary(new UnigramDictionary(mOffsetDict, BinaryFormat::getFlags(mDict))), mBigramDictionary(new BigramDictionary(mOffsetDict)), - mGestureSuggest(new GestureSuggest()) { + mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), + mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { } Dictionary::~Dictionary() { delete mUnigramDictionary; delete mBigramDictionary; delete mGestureSuggest; + delete mTypingSuggest; } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, @@ -60,14 +66,26 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi } return result; } else { - std::map<int, int> bigramMap; - uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; - mBigramDictionary->fillBigramAddressToProbabilityMapAndFilter(prevWordCodePoints, - prevWordLength, &bigramMap, bigramFilter); - result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, - inputCodePoints, inputSize, &bigramMap, bigramFilter, useFullEditDistance, outWords, - frequencies, outputTypes); - return result; + if (USE_SUGGEST_INTERFACE_FOR_TYPING) { + DicTraverseWrapper::initDicTraverseSession( + traverseSession, this, prevWordCodePoints, prevWordLength); + result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, + ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, + outWords, frequencies, spaceIndices, outputTypes); + if (DEBUG_DICT) { + DUMP_RESULT(outWords, frequencies); + } + return result; + } else { + std::map<int, int> bigramMap; + uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; + mBigramDictionary->fillBigramAddressToProbabilityMapAndFilter(prevWordCodePoints, + prevWordLength, &bigramMap, bigramFilter); + result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, + inputCodePoints, inputSize, &bigramMap, bigramFilter, useFullEditDistance, + outWords, frequencies, outputTypes); + return result; + } } } diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 449b95ab6..8c6a7de52 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -79,6 +79,7 @@ class Dictionary { const UnigramDictionary *mUnigramDictionary; const BigramDictionary *mBigramDictionary; SuggestInterface *mGestureSuggest; + SuggestInterface *mTypingSuggest; }; } // namespace latinime #endif // LATINIME_DICTIONARY_H diff --git a/native/jni/src/digraph_utils.cpp b/native/jni/src/digraph_utils.cpp new file mode 100644 index 000000000..8781c5077 --- /dev/null +++ b/native/jni/src/digraph_utils.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_format.h" +#include "defines.h" +#include "digraph_utils.h" + +namespace latinime { + +const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = + { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS + { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS + { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS +const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] = + { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE + { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE + +/* static */ bool DigraphUtils::hasDigraphForCodePoint( + const int dictFlags, const int compositeGlyphCodePoint) { + if (DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint)) { + return true; + } + return false; +} + +// Retrieves the set of all digraphs associated with the given dictionary. +// Returns the size of the digraph array, or 0 if none exist. +/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize( + const int dictFlags, const DigraphUtils::digraph_t **digraphs) { + if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) { + *digraphs = DigraphUtils::GERMAN_UMLAUT_DIGRAPHS; + return NELEMS(DigraphUtils::GERMAN_UMLAUT_DIGRAPHS); + } + if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) { + *digraphs = DigraphUtils::FRENCH_LIGATURES_DIGRAPHS; + return NELEMS(DigraphUtils::FRENCH_LIGATURES_DIGRAPHS); + } + return 0; +} + +// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index +// (which specifies the first or second codepoint in the digraph). +/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int dictFlags, + const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex) { + if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) { + return NOT_A_CODE_POINT; + } + const DigraphUtils::digraph_t *digraph = + DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint); + if (!digraph) { + return NOT_A_CODE_POINT; + } + if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) { + return digraph->first; + } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) { + return digraph->second; + } + ASSERT(false); + return NOT_A_CODE_POINT; +} + +/** + * Returns the digraph for the input composite glyph codepoint, or 0 if none exists. + * dictFlags: the dictionary flags needed to determine which digraphs are supported. + * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint. + */ +/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint( + const int dictFlags, const int compositeGlyphCodePoint) { + const DigraphUtils::digraph_t *digraphs = 0; + const int digraphsSize = + DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(dictFlags, &digraphs); + for (int i = 0; i < digraphsSize; i++) { + if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) { + return &digraphs[i]; + } + } + return 0; +} + +} // namespace latinime diff --git a/native/jni/src/digraph_utils.h b/native/jni/src/digraph_utils.h new file mode 100644 index 000000000..6e364b67a --- /dev/null +++ b/native/jni/src/digraph_utils.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIGRAPH_UTILS_H +#define DIGRAPH_UTILS_H + +namespace latinime { + +class DigraphUtils { + public: + typedef enum { + NOT_A_DIGRAPH_INDEX, + FIRST_DIGRAPH_CODEPOINT, + SECOND_DIGRAPH_CODEPOINT + } DigraphCodePointIndex; + + typedef struct { int first; int second; int compositeGlyph; } digraph_t; + + static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint); + static int getAllDigraphsForDictionaryAndReturnSize( + const int dictFlags, const digraph_t **digraphs); + static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint, + const DigraphCodePointIndex digraphCodePointIndex); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils); + static const digraph_t *getDigraphForCodePoint( + const int dictFlags, const int compositeGlyphCodePoint); + + static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; + static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; +}; +} // namespace latinime +#endif // DIGRAPH_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h new file mode 100644 index 000000000..c411408ec --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_UTILS +#define LATINIME_SHORTCUT_UTILS + +#include "defines.h" +#include "suggest/core/dicnode/dic_node_utils.h" +#include "terminal_attributes.h" + +namespace latinime { + +class ShortcutUtils { + public: + static int outputShortcuts(const TerminalAttributes *const terminalAttributes, + int outputWordIndex, const int finalScore, int *const outputCodePoints, + int *const frequencies, int *const outputTypes, const bool sameAsTyped) { + TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); + while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { + int shortcutTarget[MAX_WORD_LENGTH]; + int shortcutProbability; + const int shortcutTargetStringLength = iterator.getNextShortcutTarget( + MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability); + int shortcutScore; + int kind; + if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY + && sameAsTyped) { + shortcutScore = S_INT_MAX; + kind = Dictionary::KIND_WHITELIST; + } else { + // shortcut entry's score == its base entry's score - 1 + shortcutScore = finalScore; + // Protection against int underflow + shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1; + kind = Dictionary::KIND_CORRECTION; + } + outputTypes[outputWordIndex] = kind; + frequencies[outputWordIndex] = shortcutScore; + frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1; + const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH; + DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength, + &outputCodePoints[startIndex2]); + ++outputWordIndex; + } + return outputWordIndex; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutUtils); +}; +} // namespace latinime +#endif // LATINIME_SHORTCUT_UTILS diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h index 1d5082ff8..02c358aec 100644 --- a/native/jni/src/suggest/core/policy/traversal.h +++ b/native/jni/src/suggest/core/policy/traversal.h @@ -20,6 +20,9 @@ #include "defines.h" namespace latinime { + +class DicTraverseSession; + class Traversal { public: virtual int getMaxPointerCount() const = 0; diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index 4d08fa0fa..e62b70423 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -14,14 +14,15 @@ * limitations under the License. */ +#include "suggest/core/policy/weighting.h" + #include "char_utils.h" #include "defines.h" -#include "dic_node.h" -#include "dic_node_profiler.h" -#include "dic_node_utils.h" -#include "dic_traverse_session.h" #include "hash_map_compat.h" -#include "weighting.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_profiler.h" +#include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/session/dic_traverse_session.h" namespace latinime { diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h index 83a0f4b45..b92dbe278 100644 --- a/native/jni/src/suggest/core/policy/weighting.h +++ b/native/jni/src/suggest/core/policy/weighting.h @@ -18,6 +18,7 @@ #define LATINIME_WEIGHTING_H #include "defines.h" +#include "hash_map_compat.h" namespace latinime { diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 1f781dd43..ef6616e40 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -14,12 +14,13 @@ * limitations under the License. */ +#include "suggest/core/session/dic_traverse_session.h" + #include "defines.h" #include "dictionary.h" -#include "dic_node_utils.h" -#include "dic_traverse_session.h" #include "dic_traverse_wrapper.h" #include "jni.h" +#include "suggest/core/dicnode/dic_node_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index af036f82b..62e1d1ab9 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -21,10 +21,10 @@ #include <vector> #include "defines.h" -#include "dic_nodes_cache.h" #include "hash_map_compat.h" #include "jni.h" #include "proximity_info_state.h" +#include "suggest/core/dicnode/dic_nodes_cache.h" namespace latinime { diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp new file mode 100644 index 000000000..1e97a9176 --- /dev/null +++ b/native/jni/src/suggest/core/suggest.cpp @@ -0,0 +1,520 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/suggest.h" + +#include "char_utils.h" +#include "dictionary.h" +#include "proximity_info.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_priority_queue.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/shortcut_utils.h" +#include "suggest/core/policy/scoring.h" +#include "suggest/core/policy/traversal.h" +#include "suggest/core/policy/weighting.h" +#include "suggest/core/session/dic_traverse_session.h" +#include "terminal_attributes.h" + +namespace latinime { + +// Initialization of class constants. +const int Suggest::LOOKAHEAD_DIC_NODES_CACHE_SIZE = 25; +const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; +const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; +const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; +const float Suggest::AUTOCORRECT_LANGUAGE_FEATURE_THRESHOLD = 0.6f; + +const bool Suggest::CORRECT_SPACE_OMISSION = true; +const bool Suggest::CORRECT_TRANSPOSITION = true; +const bool Suggest::CORRECT_INSERTION = true; +const bool Suggest::CORRECT_OMISSION_G = true; + +/** + * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates + * whether to prematurely commit the suggested words up to the given point for sentence-level + * suggestion. + * + * Note: Currently does not support concurrent calls across threads. Continuous suggestion is + * automatically activated for sequential calls that share the same starting input. + * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. + */ +int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, + int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, + int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices, + int *outputTypes) const { + PROF_OPEN; + PROF_START(0); + const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); + DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); + tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, + pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); + // TODO: Add the way to evaluate cache + + initializeSearch(tSession, commitPoint); + PROF_END(0); + PROF_START(1); + + // keep expanding search dicNodes until all have terminated. + while (tSession->getDicTraverseCache()->activeSize() > 0) { + expandCurrentDicNodes(tSession); + tSession->getDicTraverseCache()->advanceActiveDicNodes(); + tSession->getDicTraverseCache()->advanceInputIndex(inputSize); + } + PROF_END(1); + PROF_START(2); + const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes); + PROF_END(2); + PROF_CLOSE; + return size; +} + +/** + * Initializes the search at the root of the lexicon trie. Note that when possible the search will + * continue suggestion from where it left off during the last call. + */ +void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const { + if (!traverseSession->getProximityInfoState(0)->isUsed()) { + return; + } + if (TRAVERSAL->allowPartialCommit()) { + commitPoint = 0; + } + + if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE + && traverseSession->isContinuousSuggestionPossible()) { + if (commitPoint == 0) { + // Continue suggestion + traverseSession->getDicTraverseCache()->continueSearch(); + } else { + // Continue suggestion after partial commit. + DicNode *topDicNode = + traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); + traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); + traverseSession->getDicTraverseCache()->continueSearch(); + traverseSession->setPartiallyCommited(); + } + } else { + // Restart recognition at the root. + traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(), MAX_RESULTS); + // Create a new dic node here + DicNode rootNode; + DicNodeUtils::initAsRoot(traverseSession->getDicRootPos(), + traverseSession->getOffsetDict(), traverseSession->getPrevWordPos(), &rootNode); + traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); + } +} + +/** + * Outputs the final list of suggestions (i.e., terminal nodes). + */ +int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, + int *outputCodePoints, int *spaceIndices, int *outputTypes) const { + const int terminalSize = min(MAX_RESULTS, + static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize())); + DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array + + for (int index = terminalSize - 1; index >= 0; --index) { + traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]); + } + + const float languageWeight = SCORING->getAdjustedLanguageWeight( + traverseSession, terminals, terminalSize); + + int outputWordIndex = 0; + // Insert most probable word at index == 0 as long as there is one terminal at least + const bool hasMostProbableString = + SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, + &outputCodePoints[0], &outputTypes[0], &frequencies[0]); + if (hasMostProbableString) { + ++outputWordIndex; + } + + // Initial value of the loop index for terminal nodes (words) + int doubleLetterTerminalIndex = -1; + DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER; + SCORING->searchWordWithDoubleLetter(terminals, terminalSize, + &doubleLetterTerminalIndex, &doubleLetterLevel); + + int maxScore = S_INT_MIN; + // Output suggestion results here + for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; + ++terminalIndex) { + DicNode *terminalDicNode = &terminals[terminalIndex]; + if (DEBUG_GEO_FULL) { + terminalDicNode->dump("OUT:"); + } + const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost( + terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); + const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + + doubleLetterCost; + const TerminalAttributes terminalAttributes(traverseSession->getOffsetDict(), + terminalDicNode->getFlags(), terminalDicNode->getAttributesPos()); + const int originalTerminalProbability = terminalDicNode->getProbability(); + + // Do not suggest words with a 0 probability, or entries that are blacklisted or do not + // represent a word. However, we should still submit their shortcuts if any. + const bool isValidWord = + originalTerminalProbability > 0 && !terminalAttributes.isBlacklistedOrNotAWord(); + // Increase output score of top typing suggestion to ensure autocorrection. + // TODO: Better integration with java side autocorrection logic. + // Force autocorrection for obvious long multi-word suggestions. + const bool isForceCommitMultiWords = TRAVERSAL->allowPartialCommit() + && (traverseSession->isPartiallyCommited() + || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT + && terminalDicNode->hasMultipleWords())); + + const int finalScore = SCORING->calculateFinalScore( + compoundDistance, traverseSession->getInputSize(), + isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord())); + + maxScore = max(maxScore, finalScore); + + if (TRAVERSAL->allowPartialCommit()) { + // Index for top typing suggestion should be 0. + if (isValidWord && outputWordIndex == 0) { + terminalDicNode->outputSpacePositionsResult(spaceIndices); + } + } + + // Do not suggest words with a 0 probability, or entries that are blacklisted or do not + // represent a word. However, we should still submit their shortcuts if any. + if (isValidWord) { + outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION; + frequencies[outputWordIndex] = finalScore; + // Populate the outputChars array with the suggested word. + const int startIndex = outputWordIndex * MAX_WORD_LENGTH; + terminalDicNode->outputResult(&outputCodePoints[startIndex]); + ++outputWordIndex; + } + + const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); + outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, + finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); + DicNode::managedDelete(terminalDicNode); + } + + if (hasMostProbableString) { + SCORING->safetyNetForMostProbableString(terminalSize, maxScore, + &outputCodePoints[0], &frequencies[0]); + } + return outputWordIndex; +} + +/** + * Expands the dicNodes in the current search priority queue by advancing to the possible child + * nodes based on the next touch point(s) (or no touch points for lookahead) + */ +void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { + const int inputSize = traverseSession->getInputSize(); + DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize()); + DicNode omissionDicNode; + + // TODO: Find more efficient caching + const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession); + if (shouldDepthLevelCache) { + traverseSession->getDicTraverseCache()->updateLastCachedInputIndex(); + } + if (DEBUG_CACHE) { + AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d", + shouldDepthLevelCache, inputSize); + } + while (traverseSession->getDicTraverseCache()->activeSize() > 0) { + DicNode dicNode; + traverseSession->getDicTraverseCache()->popActive(&dicNode); + if (dicNode.isTotalInputSizeExceedingLimit()) { + return; + } + childDicNodes.clear(); + const int point0Index = dicNode.getInputIndex(0); + const bool canDoLookAheadCorrection = + TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode); + const bool isLookAheadCorrection = canDoLookAheadCorrection + && traverseSession->getDicTraverseCache()-> + isLookAheadCorrectionInputIndex(static_cast<int>(point0Index)); + const bool isCompletion = dicNode.isCompletion(inputSize); + + const bool shouldNodeLevelCache = + TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode); + if (shouldDepthLevelCache || shouldNodeLevelCache) { + if (DEBUG_CACHE) { + dicNode.dump("PUSH_CACHE"); + } + traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode); + dicNode.setCached(); + } + + if (isLookAheadCorrection) { + // The algorithm maintains a small set of "deferred" nodes that have not consumed the + // latest touch point yet. These are needed to apply look-ahead correction operations + // that require special handling of the latest touch point. For example, with insertions + // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all. + if (CORRECT_TRANSPOSITION) { + processDicNodeAsTransposition(traverseSession, &dicNode); + } + if (CORRECT_INSERTION) { + processDicNodeAsInsertion(traverseSession, &dicNode); + } + } else { // !isLookAheadCorrection + // Only consider typing error corrections if the normalized compound distance is + // below a spatial distance threshold. + // NOTE: the threshold may need to be updated if scoring model changes. + // TODO: Remove. Do not prune node here. + const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode); + // Process for handling space substitution (e.g., hevis => he is) + if (allowsErrorCorrections + && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) { + createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */); + } + + DicNodeUtils::getAllChildDicNodes( + &dicNode, traverseSession->getOffsetDict(), &childDicNodes); + + const int childDicNodesSize = childDicNodes.getSizeAndLock(); + for (int i = 0; i < childDicNodesSize; ++i) { + DicNode *const childDicNode = childDicNodes[i]; + if (isCompletion) { + // Handle forward lookahead when the lexicon letter exceeds the input size. + processDicNodeAsMatch(traverseSession, childDicNode); + continue; + } + if (allowsErrorCorrections + && TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode)) { + // TODO: (Gesture) Change weight between omission and substitution errors + // TODO: (Gesture) Terminal node should not be handled as omission + omissionDicNode.initByCopy(childDicNode); + processDicNodeAsOmission(traverseSession, &omissionDicNode); + } + const ProximityType proximityType = TRAVERSAL->getProximityType( + traverseSession, &dicNode, childDicNode); + switch (proximityType) { + // TODO: Consider the difference of proximityType here + case MATCH_CHAR: + case PROXIMITY_CHAR: + processDicNodeAsMatch(traverseSession, childDicNode); + break; + case ADDITIONAL_PROXIMITY_CHAR: + if (allowsErrorCorrections) { + processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode, + childDicNode); + } + break; + case SUBSTITUTION_CHAR: + if (allowsErrorCorrections) { + processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode); + } + break; + case UNRELATED_CHAR: + // Just drop this node and do nothing. + break; + default: + // Just drop this node and do nothing. + break; + } + } + + // Push the node for look-ahead correction + if (allowsErrorCorrections && canDoLookAheadCorrection) { + traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); + } + } + } +} + +void Suggest::processTerminalDicNode( + DicTraverseSession *traverseSession, DicNode *dicNode) const { + if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { + return; + } + if (!dicNode->isTerminalWordNode()) { + return; + } + if (TRAVERSAL->needsToTraverseAllUserInput() + && dicNode->getInputIndex(0) < traverseSession->getInputSize()) { + return; + } + + if (dicNode->shouldBeFilterdBySafetyNetForBigram()) { + return; + } + // Create a non-cached node here. + DicNode terminalDicNode; + DicNodeUtils::initByCopy(dicNode, &terminalDicNode); + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0, + &terminalDicNode, traverseSession->getBigramCacheMap()); + traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode); +} + +/** + * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word + * (by the space omission error correction) search path if input dicNode is on a terminal node. + */ +void Suggest::processExpandedDicNode( + DicTraverseSession *traverseSession, DicNode *dicNode) const { + processTerminalDicNode(traverseSession, dicNode); + if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { + if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) { + createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */); + } + const int allowsLookAhead = !(dicNode->hasMultipleWords() + && dicNode->isCompletion(traverseSession->getInputSize())); + if (dicNode->hasChildren() && allowsLookAhead) { + traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode); + } + } + DicNode::managedDelete(dicNode); +} + +void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession, + DicNode *childDicNode) const { + weightChildNode(traverseSession, childDicNode); + processExpandedDicNode(traverseSession, childDicNode); +} + +void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, + DicNode *dicNode, DicNode *childDicNode) const { + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY, + traverseSession, dicNode, childDicNode, 0 /* bigramCacheMap */); + weightChildNode(traverseSession, childDicNode); + processExpandedDicNode(traverseSession, childDicNode); +} + +void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, + DicNode *dicNode, DicNode *childDicNode) const { + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession, + dicNode, childDicNode, 0 /* bigramCacheMap */); + weightChildNode(traverseSession, childDicNode); + processExpandedDicNode(traverseSession, childDicNode); +} + +/** + * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider + * matches for all possible next letters. Note that just skipping the current letter without any + * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check + * the possible *next* letters after the omission to better limit search to plausible omissions. + * Note that apostrophes are handled as omissions. + */ +void Suggest::processDicNodeAsOmission( + DicTraverseSession *traverseSession, DicNode *dicNode) const { + // If the omission is surely intentional that it should incur zero cost. + const bool isZeroCostOmission = dicNode->isZeroCostOmission(); + DicNodeVector childDicNodes; + + DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getOffsetDict(), &childDicNodes); + + const int size = childDicNodes.getSizeAndLock(); + for (int i = 0; i < size; i++) { + DicNode *const childDicNode = childDicNodes[i]; + if (!isZeroCostOmission) { + // Treat this word as omission + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession, + dicNode, childDicNode, 0 /* bigramCacheMap */); + } + weightChildNode(traverseSession, childDicNode); + + if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) { + DicNode::managedDelete(childDicNode); + continue; + } + processExpandedDicNode(traverseSession, childDicNode); + } +} + +/** + * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and + * consider matches for the next touch point. + */ +void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, + DicNode *dicNode) const { + const int16_t pointIndex = dicNode->getInputIndex(0); + DicNodeVector childDicNodes; + DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getOffsetDict(), + traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes); + const int size = childDicNodes.getSizeAndLock(); + for (int i = 0; i < size; i++) { + DicNode *const childDicNode = childDicNodes[i]; + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession, + dicNode, childDicNode, 0 /* bigramCacheMap */); + processExpandedDicNode(traverseSession, childDicNode); + } +} + +/** + * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points. + */ +void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, + DicNode *dicNode) const { + const int16_t pointIndex = dicNode->getInputIndex(0); + DicNodeVector childDicNodes1; + DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getOffsetDict(), + traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1); + const int childSize1 = childDicNodes1.getSizeAndLock(); + for (int i = 0; i < childSize1; i++) { + if (childDicNodes1[i]->hasChildren()) { + DicNodeVector childDicNodes2; + DicNodeUtils::getProximityChildDicNodes( + childDicNodes1[i], traverseSession->getOffsetDict(), + traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2); + const int childSize2 = childDicNodes2.getSizeAndLock(); + for (int j = 0; j < childSize2; j++) { + DicNode *const childDicNode2 = childDicNodes2[j]; + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION, + traverseSession, childDicNodes1[i], childDicNode2, 0 /* bigramCacheMap */); + processExpandedDicNode(traverseSession, childDicNode2); + } + } + DicNode::managedDelete(childDicNodes1[i]); + } +} + +/** + * Weight child node by aligning it to the key + */ +void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { + const int inputSize = traverseSession->getInputSize(); + if (dicNode->isCompletion(inputSize)) { + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession, + 0 /* parentDicNode */, dicNode, 0 /* bigramCacheMap */); + } else { // completion + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession, + 0 /* parentDicNode */, dicNode, 0 /* bigramCacheMap */); + } +} + +/** + * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also + * incorporates the unigram / bigram score for the ending word into the new dicNode. + */ +void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, + const bool spaceSubstitution) const { + if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { + return; + } + + // Create a non-cached node here. + DicNode newDicNode; + DicNodeUtils::initAsRootWithPreviousWord(traverseSession->getDicRootPos(), + traverseSession->getOffsetDict(), dicNode, &newDicNode); + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_NEW_WORD, traverseSession, dicNode, + &newDicNode, traverseSession->getBigramCacheMap()); + if (spaceSubstitution) { + // Merge this with CT_NEW_WORD + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SPACE_SUBSTITUTION, + traverseSession, 0, &newDicNode, 0 /* bigramCacheMap */); + } + traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode); +} +} // namespace latinime diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h new file mode 100644 index 000000000..a1e7e7a94 --- /dev/null +++ b/native/jni/src/suggest/core/suggest.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SUGGEST_IMPL_H +#define LATINIME_SUGGEST_IMPL_H + +#include "defines.h" +#include "suggest/core/suggest_interface.h" +#include "suggest/core/policy/suggest_policy.h" + +namespace latinime { + +class DicNode; +class DicTraverseSession; +class ProximityInfo; +class Scoring; +class Traversal; +class Weighting; + +class Suggest : public SuggestInterface { + public: + AK_FORCE_INLINE Suggest(const SuggestPolicy *const suggestPolicy) + : TRAVERSAL(suggestPolicy->getTraversal()), + SCORING(suggestPolicy->getScoring()), WEIGHTING(suggestPolicy->getWeighting()) {} + AK_FORCE_INLINE virtual ~Suggest() {} + int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, + int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, + int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest); + void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, + const bool spaceSubstitution) const; + int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, + int *outputCodePoints, int *outputIndices, int *outputTypes) const; + void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; + void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; + void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void processExpandedDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; + float getAutocorrectScore(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void generateFeatures( + DicTraverseSession *traverseSession, DicNode *dicNode, float *features) const; + void processDicNodeAsOmission(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void processDicNodeAsTransposition(DicTraverseSession *traverseSession, + DicNode *dicNode) const; + void processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, + DicNode *dicNode, DicNode *childDicNode) const; + void processDicNodeAsSubstitution(DicTraverseSession *traverseSession, DicNode *dicNode, + DicNode *childDicNode) const; + void processDicNodeAsMatch(DicTraverseSession *traverseSession, + DicNode *childDicNode) const; + + // Dic nodes cache size for lookahead (autocompletion) + static const int LOOKAHEAD_DIC_NODES_CACHE_SIZE; + // Max characters to lookahead + static const int MAX_LOOKAHEAD; + // Inputs longer than this will autocorrect if the suggestion is multi-word + static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT; + static const int MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE; + // Base value for converting costs into scores (low so will not autocorrect without classifier) + static const float BASE_OUTPUT_SCORE; + + // Threshold for autocorrection classifier + static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; + // Threshold for computing the language model feature for autocorrect classification + static const float AUTOCORRECT_LANGUAGE_FEATURE_THRESHOLD; + + // Typing error correction settings + static const bool CORRECT_SPACE_OMISSION; + static const bool CORRECT_TRANSPOSITION; + static const bool CORRECT_INSERTION; + + const Traversal *const TRAVERSAL; + const Scoring *const SCORING; + const Weighting *const WEIGHTING; + + static const bool CORRECT_OMISSION_G; +}; +} // namespace latinime +#endif // LATINIME_SUGGEST_IMPL_H diff --git a/native/jni/src/suggest/suggest_interface.h b/native/jni/src/suggest/core/suggest_interface.h index 0bb85d7e5..0bb85d7e5 100644 --- a/native/jni/src/suggest/suggest_interface.h +++ b/native/jni/src/suggest/core/suggest_interface.h diff --git a/native/jni/src/suggest/gesture_suggest.h b/native/jni/src/suggest/gesture_suggest.h deleted file mode 100644 index 82c3a69ad..000000000 --- a/native/jni/src/suggest/gesture_suggest.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_GESTURE_SUGGEST_H -#define LATINIME_GESTURE_SUGGEST_H - -#include "defines.h" -#include "suggest_interface.h" - -namespace latinime { - -class ProximityInfo; - -class GestureSuggest : public SuggestInterface { - public: - GestureSuggest() : mSuggestInterface(getGestureSuggestInstance()) {} - - virtual ~GestureSuggest(); - - int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, - int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const { - if (!mSuggestInterface) { - return 0; - } - return mSuggestInterface->getSuggestions(pInfo, traverseSession, inputXs, inputYs, times, - pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, - outputIndices, outputTypes); - } - - static void setGestureSuggestFactoryMethod(SuggestInterface *(*factoryMethod)()) { - sGestureSuggestFactoryMethod = factoryMethod; - } - - private: - DISALLOW_COPY_AND_ASSIGN(GestureSuggest); - static SuggestInterface *getGestureSuggestInstance() { - if (!sGestureSuggestFactoryMethod) { - return 0; - } - return sGestureSuggestFactoryMethod(); - } - - static SuggestInterface *(*sGestureSuggestFactoryMethod)(); - SuggestInterface *mSuggestInterface; -}; -} // namespace latinime -#endif // LATINIME_GESTURE_SUGGEST_H diff --git a/native/jni/src/suggest/typing_suggest.cpp b/native/jni/src/suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp index 56bd5b69a..6d3173937 100644 --- a/native/jni/src/suggest/typing_suggest.cpp +++ b/native/jni/src/suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp @@ -14,12 +14,8 @@ * limitations under the License. */ -#include "typing_suggest.h" +#include "gesture_suggest_policy_factory.h" namespace latinime { - SuggestInterface *(*TypingSuggest::sTypingSuggestFactoryMethod)() = 0; - - TypingSuggest::~TypingSuggest() { - delete mSuggestInterface; - } + const SuggestPolicy *(*GestureSuggestPolicyFactory::sGestureSuggestFactoryMethod)() = 0; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h b/native/jni/src/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h new file mode 100644 index 000000000..509b01fc0 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/gesture/gesture_suggest_policy_factory.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_GESTURE_SUGGEST_POLICY_FACTORY_H +#define LATINIME_GESTURE_SUGGEST_POLICY_FACTORY_H + +#include "defines.h" + +namespace latinime { + +class SuggestPolicy; + +class GestureSuggestPolicyFactory { + public: + static void setGestureSuggestPolicyFactoryMethod(const SuggestPolicy *(*factoryMethod)()) { + sGestureSuggestFactoryMethod = factoryMethod; + } + + static const SuggestPolicy *getGestureSuggestPolicy() { + if (!sGestureSuggestFactoryMethod) { + return 0; + } + return sGestureSuggestFactoryMethod(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(GestureSuggestPolicyFactory); + static const SuggestPolicy *(*sGestureSuggestFactoryMethod)(); +}; +} // namespace latinime +#endif // LATINIME_GESTURE_SUGGEST_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index 90985d0fe..0fa684f01 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "scoring_params.h" +#include "suggest/policyimpl/typing/scoring_params.h" namespace latinime { // TODO: RENAME all diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp b/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp index 53f68f20f..d8c6175e2 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "typing_scoring.h" +#include "suggest/policyimpl/typing/typing_scoring.h" namespace latinime { const TypingScoring TypingScoring::sInstance; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h index ed941f0ae..90e2133e7 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h @@ -18,8 +18,8 @@ #define LATINIME_TYPING_SCORING_H #include "defines.h" -#include "scoring.h" -#include "scoring_params.h" +#include "suggest/core/policy/scoring.h" +#include "suggest/policyimpl/typing/scoring_params.h" namespace latinime { diff --git a/native/jni/src/suggest/gesture_suggest.cpp b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.cpp index fce5621d5..0c2763967 100644 --- a/native/jni/src/suggest/gesture_suggest.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 The Android Open Source Project + * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,8 @@ * limitations under the License. */ -#include "gesture_suggest.h" +#include "suggest/policyimpl/typing/typing_suggest_policy.h" namespace latinime { - SuggestInterface *(*GestureSuggest::sGestureSuggestFactoryMethod)() = 0; - - GestureSuggest::~GestureSuggest() { - delete mSuggestInterface; - } +const TypingSuggestPolicy TypingSuggestPolicy::sInstance; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h index 1f659c679..35f48097c 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h @@ -18,10 +18,10 @@ #define LATINIME_TYPING_SUGGEST_POLICY_H #include "defines.h" -#include "suggest_policy.h" -#include "typing_scoring.h" -#include "typing_traversal.h" -#include "typing_weighting.h" +#include "suggest/core/policy/suggest_policy.h" +#include "suggest/policyimpl/typing/typing_scoring.h" +#include "suggest/policyimpl/typing/typing_traversal.h" +#include "suggest/policyimpl/typing/typing_weighting.h" namespace latinime { @@ -31,6 +31,8 @@ class Weighting; class TypingSuggestPolicy : public SuggestPolicy { public: + static const TypingSuggestPolicy *getInstance() { return &sInstance; } + TypingSuggestPolicy() {} virtual ~TypingSuggestPolicy() {} AK_FORCE_INLINE const Traversal *getTraversal() const { @@ -47,6 +49,7 @@ class TypingSuggestPolicy : public SuggestPolicy { private: DISALLOW_COPY_AND_ASSIGN(TypingSuggestPolicy); + static const TypingSuggestPolicy sInstance; }; } // namespace latinime #endif // LATINIME_TYPING_SUGGEST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy_factory.h b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy_factory.h new file mode 100644 index 000000000..a67b45b1b --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy_factory.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TYPING_SUGGEST_POLICY_FACTORY_H +#define LATINIME_TYPING_SUGGEST_POLICY_FACTORY_H + +#include "defines.h" +#include "typing_suggest_policy.h" + +namespace latinime { + +class SuggestPolicy; + +class TypingSuggestPolicyFactory { + public: + static const SuggestPolicy *getTypingSuggestPolicy() { + return TypingSuggestPolicy::getInstance(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(TypingSuggestPolicyFactory); +}; +} // namespace latinime +#endif // LATINIME_TYPING_SUGGEST_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp b/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp index 68c614e77..66f8ba9fa 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "typing_traversal.h" +#include "suggest/policyimpl/typing/typing_traversal.h" namespace latinime { const bool TypingTraversal::CORRECT_OMISSION = true; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 16153f8bb..f22029a2c 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -21,12 +21,12 @@ #include "char_utils.h" #include "defines.h" -#include "dic_node.h" -#include "dic_node_vector.h" -#include "dic_traverse_session.h" #include "proximity_info_state.h" -#include "scoring_params.h" -#include "traversal.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/policy/traversal.h" +#include "suggest/core/session/dic_traverse_session.h" +#include "suggest/policyimpl/typing/scoring_params.h" namespace latinime { class TypingTraversal : public Traversal { diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp index 6e4b2fb6a..1500341bd 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp @@ -14,9 +14,10 @@ * limitations under the License. */ -#include "dic_node.h" -#include "scoring_params.h" -#include "typing_weighting.h" +#include "suggest/policyimpl/typing/typing_weighting.h" + +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/policyimpl/typing/scoring_params.h" namespace latinime { const TypingWeighting TypingWeighting::sInstance; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index e8075f41a..52d54eb0f 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -18,9 +18,10 @@ #define LATINIME_TYPING_WEIGHTING_H #include "defines.h" -#include "dic_node_utils.h" -#include "dic_traverse_session.h" -#include "weighting.h" +#include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/policy/weighting.h" +#include "suggest/core/session/dic_traverse_session.h" +#include "suggest/policyimpl/typing/scoring_params.h" namespace latinime { diff --git a/native/jni/src/suggest/typing_suggest.h b/native/jni/src/suggest/typing_suggest.h deleted file mode 100644 index 678037aa2..000000000 --- a/native/jni/src/suggest/typing_suggest.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_TYPING_SUGGEST_H -#define LATINIME_TYPING_SUGGEST_H - -#include "defines.h" -#include "suggest_interface.h" - -namespace latinime { - -class ProximityInfo; - -class TypingSuggest : public SuggestInterface { - public: - TypingSuggest() : mSuggestInterface(getTypingSuggestInstance()) {} - - virtual ~TypingSuggest(); - - int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, - int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const { - if (!mSuggestInterface) { - return 0; - } - return mSuggestInterface->getSuggestions(pInfo, traverseSession, inputXs, inputYs, times, - pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, - outputIndices, outputTypes); - } - - static void setTypingSuggestFactoryMethod(SuggestInterface *(*factoryMethod)()) { - sTypingSuggestFactoryMethod = factoryMethod; - } - - private: - DISALLOW_COPY_AND_ASSIGN(TypingSuggest); - static SuggestInterface *getTypingSuggestInstance() { - if (!sTypingSuggestFactoryMethod) { - return 0; - } - return sTypingSuggestFactoryMethod(); - } - - static SuggestInterface *(*sTypingSuggestFactoryMethod)(); - SuggestInterface *mSuggestInterface; -}; -} // namespace latinime -#endif // LATINIME_TYPING_SUGGEST_H diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 80ba412a3..33795cade 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -22,6 +22,7 @@ #include "char_utils.h" #include "defines.h" #include "dictionary.h" +#include "digraph_utils.h" #include "proximity_info.h" #include "terminal_attributes.h" #include "unigram_dictionary.h" @@ -30,15 +31,6 @@ namespace latinime { -const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] = - { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS - { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS - { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS - -const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[] = - { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE - { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE - // TODO: check the header UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags) : DICT_ROOT(streamStart), ROOT_POS(0), @@ -58,7 +50,7 @@ static void addWord(int *word, int length, int probability, WordsPriorityQueue * // Return the replacement code point for a digraph, or 0 if none. int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize, - const digraph_t *const digraphs, const unsigned int digraphsSize) const { + const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const { // There can't be a digraph if we don't have at least 2 characters to examine if (i + 2 > inputSize) return false; @@ -74,7 +66,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons // It's an interesting digraph if the second char matches too. if (digraphs[lastDigraphIndex].second == codes[i + 1]) { - return digraphs[lastDigraphIndex].replacement; + return digraphs[lastDigraphIndex].compositeGlyph; } else { return 0; } @@ -93,7 +85,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, - const digraph_t *const digraphs, const unsigned int digraphsSize) const { + const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const { ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0])); ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0])); ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0])); @@ -169,7 +161,10 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x queuePool.clearAll(); Correction masterCorrection; masterCorrection.resetCorrection(); - if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS) + const DigraphUtils::digraph_t *digraphs = 0; + const int digraphsSize = + DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(FLAGS, &digraphs); + if (digraphsSize > 0) { // Incrementally tune the word and try all possibilities int codesBuffer[sizeof(*inputCodePoints) * inputSize]; int xCoordinatesBuffer[inputSize]; @@ -177,15 +172,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter, useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection, - &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS)); - } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { - int codesBuffer[sizeof(*inputCodePoints) * inputSize]; - int xCoordinatesBuffer[inputSize]; - int yCoordinatesBuffer[inputSize]; - getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter, - useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection, - &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS)); + &queuePool, digraphs, digraphsSize); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize, bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool); diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index c1955e8bb..1a01758fe 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -20,6 +20,7 @@ #include <map> #include <stdint.h> #include "defines.h" +#include "digraph_utils.h" namespace latinime { @@ -29,8 +30,6 @@ class TerminalAttributes; class WordsPriorityQueuePool; class UnigramDictionary { - typedef struct { int first; int second; int replacement; } digraph_t; - public: // Error tolerances static const int DEFAULT_MAX_ERRORS = 2; @@ -57,13 +56,13 @@ class UnigramDictionary { const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const; int getDigraphReplacement(const int *codes, const int i, const int inputSize, - const digraph_t *const digraphs, const unsigned int digraphsSize) const; + const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, - WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, + WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const; void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputSize, @@ -111,9 +110,6 @@ class UnigramDictionary { const int ROOT_POS; const int MAX_DIGRAPH_SEARCH_DEPTH; const int FLAGS; - - static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; - static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; }; } // namespace latinime #endif // LATINIME_UNIGRAM_DICTIONARY_H |