aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java2
-rw-r--r--java/src/com/android/inputmethod/latin/Constants.java3
-rw-r--r--java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java43
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp9
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/property/unigram_property.h21
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp6
-rw-r--r--tests/src/com/android/inputmethod/latin/ShiftModeTests.java31
13 files changed, 117 insertions, 25 deletions
diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
index 668eb925b..743bc8037 100644
--- a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
+++ b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java
@@ -47,7 +47,7 @@ public class MetadataDbHelper extends SQLiteOpenHelper {
// used to identify the versions for upgrades. This should never change going forward.
private static final int METADATA_DATABASE_VERSION_WITH_CLIENTID = 6;
// The current database version.
- private static final int CURRENT_METADATA_DATABASE_VERSION = 8;
+ private static final int CURRENT_METADATA_DATABASE_VERSION = 9;
private final static long NOT_A_DOWNLOAD_ID = -1;
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java
index 67ca59540..efc5a618b 100644
--- a/java/src/com/android/inputmethod/latin/Constants.java
+++ b/java/src/com/android/inputmethod/latin/Constants.java
@@ -192,7 +192,6 @@ public final class Constants {
public static final int CODE_SPACE = ' ';
public static final int CODE_PERIOD = '.';
public static final int CODE_COMMA = ',';
- public static final int CODE_ARMENIAN_PERIOD = 0x0589;
public static final int CODE_DASH = '-';
public static final int CODE_SINGLE_QUOTE = '\'';
public static final int CODE_DOUBLE_QUOTE = '"';
@@ -208,6 +207,8 @@ public final class Constants {
public static final int CODE_CLOSING_SQUARE_BRACKET = ']';
public static final int CODE_CLOSING_CURLY_BRACKET = '}';
public static final int CODE_CLOSING_ANGLE_BRACKET = '>';
+ public static final int CODE_INVERTED_QUESTION_MARK = 0xBF; // ¿
+ public static final int CODE_INVERTED_EXCLAMATION_MARK = 0xA1; // ¡
/**
* Special keys code. Must be negative.
diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
index 702688f93..936219332 100644
--- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java
@@ -62,6 +62,22 @@ public final class CapsModeUtils {
}
/**
+ * Helper method to find out if a code point is starting punctuation.
+ *
+ * This include the Unicode START_PUNCTUATION category, but also some other symbols that are
+ * starting, like the inverted question mark or the double quote.
+ *
+ * @param codePoint the code point
+ * @return true if it's starting punctuation, false otherwise.
+ */
+ private static boolean isStartPunctuation(final int codePoint) {
+ return (codePoint == Constants.CODE_DOUBLE_QUOTE || codePoint == Constants.CODE_SINGLE_QUOTE
+ || codePoint == Constants.CODE_INVERTED_QUESTION_MARK
+ || codePoint == Constants.CODE_INVERTED_EXCLAMATION_MARK
+ || Character.getType(codePoint) == Character.START_PUNCTUATION);
+ }
+
+ /**
* Determine what caps mode should be in effect at the current offset in
* the text. Only the mode bits set in <var>reqModes</var> will be
* checked. Note that the caps mode flags here are explicitly defined
@@ -115,8 +131,7 @@ public final class CapsModeUtils {
} else {
for (i = cs.length(); i > 0; i--) {
final char c = cs.charAt(i - 1);
- if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
- && Character.getType(c) != Character.START_PUNCTUATION) {
+ if (!isStartPunctuation(c)) {
break;
}
}
@@ -210,11 +225,14 @@ public final class CapsModeUtils {
// We found out that we have a period. We need to determine if this is a full stop or
// otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
- // looks like (\w\.){2,}
+ // looks like (\w\.){2,}. Moreover, in German, you put periods after digits for dates
+ // and some other things, and in German specifically we need to not go into autocaps after
+ // a whitespace-digits-period sequence.
// To find out, we will have a simple state machine with the following states :
- // START, WORD, PERIOD, ABBREVIATION
+ // START, WORD, PERIOD, ABBREVIATION, NUMBER
// On START : (just before the first period)
// letter => WORD
+ // digit => NUMBER if German; end with caps otherwise
// whitespace => end with no caps (it was a stand-alone period)
// otherwise => end with caps (several periods/symbols in a row)
// On WORD : (within the word just before the first period)
@@ -228,6 +246,11 @@ public final class CapsModeUtils {
// letter => LETTER
// period => PERIOD
// otherwise => end with no caps (it was an abbreviation)
+ // On NUMBER : (period immediately preceded by one or more digits)
+ // digit => NUMBER
+ // letter => LETTER (promote to word)
+ // otherwise => end with no caps (it was a whitespace-digits-period sequence,
+ // or a punctuation-digits-period sequence like "11.11.")
// "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
// should capitalize.
@@ -235,6 +258,7 @@ public final class CapsModeUtils {
final int WORD = 1;
final int PERIOD = 2;
final int LETTER = 3;
+ final int NUMBER = 4;
final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
| TextUtils.CAP_MODE_SENTENCES) & reqModes;
final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
@@ -247,6 +271,8 @@ public final class CapsModeUtils {
state = WORD;
} else if (Character.isWhitespace(c)) {
return noCaps;
+ } else if (Character.isDigit(c) && spacingAndPunctuations.mUsesGermanRules) {
+ state = NUMBER;
} else {
return caps;
}
@@ -275,6 +301,15 @@ public final class CapsModeUtils {
} else {
return noCaps;
}
+ break;
+ case NUMBER:
+ if (Character.isLetter(c)) {
+ state = WORD;
+ } else if (Character.isDigit(c)) {
+ state = NUMBER;
+ } else {
+ return noCaps;
+ }
}
}
// Here we arrived at the start of the line. This should behave exactly like whitespace.
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6223f86f4..5ad211441 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -341,8 +341,8 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
- const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
- probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+ const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+ isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
@@ -450,8 +450,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
- const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
- unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
+ const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
+ isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
+ &shortcuts);
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fe3167a61..bcf7d5905 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -82,6 +82,12 @@ int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, c
void Dictionary::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
+ if (unigramProperty->representsBeginningOfSentence()
+ && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+ ->supportsBeginningOfSentence()) {
+ AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+ return;
+ }
TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
}
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d2551057b..902eb000f 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -48,15 +48,21 @@ class UnigramProperty {
};
UnigramProperty()
- : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
- mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
-
- UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, const int level, const int count,
- const std::vector<ShortcutProperty> *const shortcuts)
- : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+ mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+ mShortcuts() {}
+
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp, const int level,
+ const int count, const std::vector<ShortcutProperty> *const shortcuts)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
+ bool representsBeginningOfSentence() const {
+ return mRepresentsBeginningOfSentence;
+ }
+
bool isNotAWord() const {
return mIsNotAWord;
}
@@ -94,6 +100,7 @@ class UnigramProperty {
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
// TODO: Make members const.
+ bool mRepresentsBeginningOfSentence;
bool mIsNotAWord;
bool mIsBlacklisted;
int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 845e629e6..a61227626 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy {
virtual const std::vector<int> *getLocale() const = 0;
+ virtual bool supportsBeginningOfSentence() const = 0;
+
protected:
DictionaryHeaderStructurePolicy() {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 479d15164..281c5a818 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -246,6 +246,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return &mLocale;
}
+ bool supportsBeginningOfSentence() const {
+ return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
index 97e1120a3..557a0b4c8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp
@@ -432,8 +432,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 5704c2e90..b2e60a837 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,12 @@ class PtNodeParams {
}
AK_FORCE_INLINE bool representsNonWordInfo() const {
- return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ && isNotAWord();
+ }
+
+ AK_FORCE_INLINE int representsBeginningOfSentence() const {
+ return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
&& isNotAWord();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 30dcfba37..a6a470c4e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 439e90e44..185844139 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -61,7 +61,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
readingHelper.readNextSiblingNode(ptNodeParams);
- if (!ptNodeParams.representsNonWordInfo()) {
+ if (ptNodeParams.representsNonWordInfo()) {
// Skip PtNodes that represent non-word information.
continue;
}
@@ -430,8 +430,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
- const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
- ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
diff --git a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
index 6fc9df793..de5538ed6 100644
--- a/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
+++ b/tests/src/com/android/inputmethod/latin/ShiftModeTests.java
@@ -78,4 +78,35 @@ public class ShiftModeTests extends InputTestsBase {
runMessages();
assertTrue("Caps after a while after repeating Backspace a lot", isCapsModeAutoShifted());
}
+
+ public void testAutoCapsAfterDigitsPeriod() {
+ changeLanguage("en");
+ type("On 22.11.");
+ assertFalse("(English) Auto caps after digits-period", isCapsModeAutoShifted());
+ type(" ");
+ assertTrue("(English) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+ mEditText.setText("");
+ changeLanguage("fr");
+ type("Le 22.");
+ assertFalse("(French) Auto caps after digits-period", isCapsModeAutoShifted());
+ type(" ");
+ assertTrue("(French) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+ mEditText.setText("");
+ changeLanguage("de");
+ type("Am 22.");
+ assertFalse("(German) Auto caps after digits-period", isCapsModeAutoShifted());
+ type(" ");
+ // For German, no auto-caps in this case
+ assertFalse("(German) Auto caps after digits-period-whitespace", isCapsModeAutoShifted());
+ }
+
+ public void testAutoCapsAfterInvertedMarks() {
+ changeLanguage("es");
+ assertTrue("(Spanish) Auto caps at start", isCapsModeAutoShifted());
+ type("Hey. ¿");
+ assertTrue("(Spanish) Auto caps after inverted what", isCapsModeAutoShifted());
+ mEditText.setText("");
+ type("¡");
+ assertTrue("(Spanish) Auto caps after inverted bang", isCapsModeAutoShifted());
+ }
}