diff options
13 files changed, 117 insertions, 25 deletions
diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java index 668eb925b..743bc8037 100644 --- a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java +++ b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java @@ -47,7 +47,7 @@ public class MetadataDbHelper extends SQLiteOpenHelper { // used to identify the versions for upgrades. This should never change going forward. private static final int METADATA_DATABASE_VERSION_WITH_CLIENTID = 6; // The current database version. - private static final int CURRENT_METADATA_DATABASE_VERSION = 8; + private static final int CURRENT_METADATA_DATABASE_VERSION = 9; private final static long NOT_A_DOWNLOAD_ID = -1; diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index 67ca59540..efc5a618b 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -192,7 +192,6 @@ public final class Constants { public static final int CODE_SPACE = ' '; public static final int CODE_PERIOD = '.'; public static final int CODE_COMMA = ','; - public static final int CODE_ARMENIAN_PERIOD = 0x0589; public static final int CODE_DASH = '-'; public static final int CODE_SINGLE_QUOTE = '\''; public static final int CODE_DOUBLE_QUOTE = '"'; @@ -208,6 +207,8 @@ public final class Constants { public static final int CODE_CLOSING_SQUARE_BRACKET = ']'; public static final int CODE_CLOSING_CURLY_BRACKET = '}'; public static final int CODE_CLOSING_ANGLE_BRACKET = '>'; + public static final int CODE_INVERTED_QUESTION_MARK = 0xBF; // ¿ + public static final int CODE_INVERTED_EXCLAMATION_MARK = 0xA1; // ¡ /** * Special keys code. Must be negative. diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java index 702688f93..936219332 100644 --- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java @@ -62,6 +62,22 @@ public final class CapsModeUtils { } /** + * Helper method to find out if a code point is starting punctuation. + * + * This include the Unicode START_PUNCTUATION category, but also some other symbols that are + * starting, like the inverted question mark or the double quote. + * + * @param codePoint the code point + * @return true if it's starting punctuation, false otherwise. + */ + private static boolean isStartPunctuation(final int codePoint) { + return (codePoint == Constants.CODE_DOUBLE_QUOTE || codePoint == Constants.CODE_SINGLE_QUOTE + || codePoint == Constants.CODE_INVERTED_QUESTION_MARK + || codePoint == Constants.CODE_INVERTED_EXCLAMATION_MARK + || Character.getType(codePoint) == Character.START_PUNCTUATION); + } + + /** * Determine what caps mode should be in effect at the current offset in * the text. Only the mode bits set in <var>reqModes</var> will be * checked. Note that the caps mode flags here are explicitly defined @@ -115,8 +131,7 @@ public final class CapsModeUtils { } else { for (i = cs.length(); i > 0; i--) { final char c = cs.charAt(i - 1); - if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE - && Character.getType(c) != Character.START_PUNCTUATION) { + if (!isStartPunctuation(c)) { break; } } @@ -210,11 +225,14 @@ public final class CapsModeUtils { // We found out that we have a period. We need to determine if this is a full stop or // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation - // looks like (\w\.){2,} + // looks like (\w\.){2,}. Moreover, in German, you put periods after digits for dates + // and some other things, and in German specifically we need to not go into autocaps after + // a whitespace-digits-period sequence. // To find out, we will have a simple state machine with the following states : - // START, WORD, PERIOD, ABBREVIATION + // START, WORD, PERIOD, ABBREVIATION, NUMBER // On START : (just before the first period) // letter => WORD + // digit => NUMBER if German; end with caps otherwise // whitespace => end with no caps (it was a stand-alone period) // otherwise => end with caps (several periods/symbols in a row) // On WORD : (within the word just before the first period) @@ -228,6 +246,11 @@ public final class CapsModeUtils { // letter => LETTER // period => PERIOD // otherwise => end with no caps (it was an abbreviation) + // On NUMBER : (period immediately preceded by one or more digits) + // digit => NUMBER + // letter => LETTER (promote to word) + // otherwise => end with no caps (it was a whitespace-digits-period sequence, + // or a punctuation-digits-period sequence like "11.11.") // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This // should capitalize. @@ -235,6 +258,7 @@ public final class CapsModeUtils { final int WORD = 1; final int PERIOD = 2; final int LETTER = 3; + final int NUMBER = 4; final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES) & reqModes; final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; @@ -247,6 +271,8 @@ public final class CapsModeUtils { state = WORD; } else if (Character.isWhitespace(c)) { return noCaps; + } else if (Character.isDigit(c) && spacingAndPunctuations.mUsesGermanRules) { + state = NUMBER; } else { return caps; } @@ -275,6 +301,15 @@ public final class CapsModeUtils { } else { return noCaps; } + break; + case NUMBER: + if (Character.isLetter(c)) { + state = WORD; + } else if (Character.isDigit(c)) { + state = NUMBER; + } else { + return noCaps; + } } } // Here we arrived at the start of the line. This should behave exactly like whitespace. diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 6223f86f4..5ad211441 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -341,8 +341,8 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } // Use 1 for count to indicate the word has inputted. - const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, - probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); + const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, + isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); } @@ -450,8 +450,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } // Use 1 for count to indicate the word has inputted. - const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, - unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); + const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, + isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */, + &shortcuts); dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty); if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index fe3167a61..bcf7d5905 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -82,6 +82,12 @@ int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, c void Dictionary::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { + if (unigramProperty->representsBeginningOfSentence() + && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy() + ->supportsBeginningOfSentence()) { + AKLOGE("The dictionary doesn't support Beginning-of-Sentence."); + return; + } TimeKeeper::setCurrentTime(); mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty); } diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h index d2551057b..902eb000f 100644 --- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h @@ -48,15 +48,21 @@ class UnigramProperty { }; UnigramProperty() - : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY), - mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {} - - UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp, const int level, const int count, - const std::vector<ShortcutProperty> *const shortcuts) - : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), + : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false), + mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), + mShortcuts() {} + + UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, + const bool isBlacklisted, const int probability, const int timestamp, const int level, + const int count, const std::vector<ShortcutProperty> *const shortcuts) + : mRepresentsBeginningOfSentence(representsBeginningOfSentence), + mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {} + bool representsBeginningOfSentence() const { + return mRepresentsBeginningOfSentence; + } + bool isNotAWord() const { return mIsNotAWord; } @@ -94,6 +100,7 @@ class UnigramProperty { DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty); // TODO: Make members const. + bool mRepresentsBeginningOfSentence; bool mIsNotAWord; bool mIsBlacklisted; int mProbability; diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h index 845e629e6..a61227626 100644 --- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy { virtual const std::vector<int> *getLocale() const = 0; + virtual bool supportsBeginningOfSentence() const = 0; + protected: DictionaryHeaderStructurePolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 479d15164..281c5a818 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -246,6 +246,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return &mLocale; } + bool supportsBeginningOfSentence() const { + return mDictFormatVersion == FormatUtils::VERSION_4_DEV; + } + private: DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp index 97e1120a3..557a0b4c8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp @@ -432,8 +432,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code shortcuts.emplace_back(&target, shortcutProbability); } } - const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, + ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); return WordProperty(&codePointVector, &unigramProperty, &bigrams); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 5704c2e90..b2e60a837 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -160,7 +160,12 @@ class PtNodeParams { } AK_FORCE_INLINE bool representsNonWordInfo() const { - return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]) + return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0]) + && isNotAWord(); + } + + AK_FORCE_INLINE int representsBeginningOfSentence() const { + return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE && isNotAWord(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 30dcfba37..a6a470c4e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin shortcuts.emplace_back(&shortcutTarget, shortcutProbability); } } - const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), + ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); return WordProperty(&codePointVector, &unigramProperty, &bigrams); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 439e90e44..185844139 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -61,7 +61,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } readingHelper.readNextSiblingNode(ptNodeParams); - if (!ptNodeParams.representsNonWordInfo()) { + if (ptNodeParams.representsNonWordInfo()) { // Skip PtNodes that represent non-word information. continue; } @@ -430,8 +430,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code shortcuts.emplace_back(&target, shortcutProbability); } } - const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), + ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); return WordProperty(&codePointVector, &unigramProperty, &bigrams); diff --git a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java index 6fc9df793..de5538ed6 100644 --- a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java +++ b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java @@ -78,4 +78,35 @@ public class ShiftModeTests extends InputTestsBase { runMessages(); assertTrue("Caps after a while after repeating Backspace a lot", isCapsModeAutoShifted()); } + + public void testAutoCapsAfterDigitsPeriod() { + changeLanguage("en"); + type("On 22.11."); + assertFalse("(English) Auto caps after digits-period", isCapsModeAutoShifted()); + type(" "); + assertTrue("(English) Auto caps after digits-period-whitespace", isCapsModeAutoShifted()); + mEditText.setText(""); + changeLanguage("fr"); + type("Le 22."); + assertFalse("(French) Auto caps after digits-period", isCapsModeAutoShifted()); + type(" "); + assertTrue("(French) Auto caps after digits-period-whitespace", isCapsModeAutoShifted()); + mEditText.setText(""); + changeLanguage("de"); + type("Am 22."); + assertFalse("(German) Auto caps after digits-period", isCapsModeAutoShifted()); + type(" "); + // For German, no auto-caps in this case + assertFalse("(German) Auto caps after digits-period-whitespace", isCapsModeAutoShifted()); + } + + public void testAutoCapsAfterInvertedMarks() { + changeLanguage("es"); + assertTrue("(Spanish) Auto caps at start", isCapsModeAutoShifted()); + type("Hey. ¿"); + assertTrue("(Spanish) Auto caps after inverted what", isCapsModeAutoShifted()); + mEditText.setText(""); + type("¡"); + assertTrue("(Spanish) Auto caps after inverted bang", isCapsModeAutoShifted()); + } } |