aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/LatinIME.java27
-rw-r--r--native/src/correction.cpp96
-rw-r--r--native/src/correction.h2
-rw-r--r--native/src/defines.h6
-rw-r--r--native/src/unigram_dictionary.cpp92
-rw-r--r--native/src/unigram_dictionary.h4
-rw-r--r--native/src/words_priority_queue.h5
7 files changed, 221 insertions, 11 deletions
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index cd58df6bb..b0d0da43b 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1406,7 +1406,9 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar
if (mDeleteCount > DELETE_ACCELERATE_AT) {
ic.deleteSurroundingText(1, 0);
}
- restartSuggestionsOnWordBeforeCursorIfAtEndOfWord(ic);
+ if (isSuggestionsRequested()) {
+ restartSuggestionsOnWordBeforeCursorIfAtEndOfWord(ic);
+ }
}
}
}
@@ -1698,6 +1700,10 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar
// Check if we have a suggestion engine attached.
if ((mSuggest == null || !isSuggestionsRequested())
&& !mVoiceProxy.isVoiceInputHighlighted()) {
+ if (mWordComposer.isComposingWord()) {
+ Log.w(TAG, "Called updateSuggestions but suggestions were not requested!");
+ mWordComposer.setAutoCorrection(mWordComposer.getTypedWord());
+ }
return;
}
@@ -2085,10 +2091,23 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar
&& !mSettingsValues.isWordSeparator(textAfterCursor.charAt(0))) return;
// Bail out if word before cursor is 0-length or a single non letter (like an apostrophe)
- // Example: " '|" gets rejected here but "I'|" and "I|" are okay
- final CharSequence word = EditingUtils.getWordAtCursor(ic, mSettingsValues.mWordSeparators);
+ // Example: " -|" gets rejected here but "e-|" and "e|" are okay
+ CharSequence word = EditingUtils.getWordAtCursor(ic, mSettingsValues.mWordSeparators);
+ // We don't suggest on leading single quotes, so we have to remove them from the word if
+ // it starts with single quotes.
+ while (!TextUtils.isEmpty(word) && Keyboard.CODE_SINGLE_QUOTE == word.charAt(0)) {
+ word = word.subSequence(1, word.length());
+ }
if (TextUtils.isEmpty(word)) return;
- if (word.length() == 1 && !Character.isLetter(word.charAt(0))) return;
+ final char firstChar = word.charAt(0); // we just tested that word is not empty
+ if (word.length() == 1 && !Character.isLetter(firstChar)) return;
+
+ // We only suggest on words that start with a letter or a symbol that is excluded from
+ // word separators (see #handleCharacterWhileInBatchEdit).
+ if (!(isAlphabet(firstChar)
+ || mSettingsValues.isSymbolExcludedFromWordSeparators(firstChar))) {
+ return;
+ }
// Okay, we are at the end of a word. Restart suggestions.
restartSuggestionsOnWordBeforeCursor(ic, word);
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 5dc6f8737..6a129d4e3 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -83,7 +83,7 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
- if (DEBUG_DICT) {
+ if (DEBUG_EDIT_DISTANCE) {
AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
}
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
@@ -935,6 +935,100 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
return totalFreq;
}
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld(
+ const int firstFreq, const int secondFreq, const Correction* correction,
+ const unsigned short *word) {
+ const int spaceProximityPos = correction->mSpaceProximityPos;
+ const int missingSpacePos = correction->mMissingSpacePos;
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int inputLength = correction->mInputLength;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+ bool firstCapitalizedWordDemotion = false;
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ bool secondCapitalizedWordDemotion = false;
+ if (secondWordLength >= 2) {
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
+
+ const bool capitalizedWordDemotion =
+ firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+ if (DEBUG_DICT_FULL) {
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ }
+
+ if (firstWordLength == 0 || secondWordLength == 0) {
+ return 0;
+ }
+ const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
+ int tempFirstFreq = firstFreq;
+ multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+ const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
+ int tempSecondFreq = secondFreq;
+ multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+ const int totalLength = firstWordLength + secondWordLength;
+
+ // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+ // length.
+ int totalFreq = tempFirstFreq + tempSecondFreq;
+
+ // This is a workaround to try offsetting the not-enough-demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+ // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+ // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+ const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+ multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+ // At this moment, totalFreq is calculated by the following formula:
+ // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+ // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+ multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+ // This is another workaround to offset the demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+ // the same amount because we already have adjusted the synthetic freq of this "missing or
+ // mistyped space" suggestion candidate above in this method.
+ const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+ multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+ if (isSpaceProximity) {
+ // A word pair with one space proximity correction
+ if (DEBUG_DICT) {
+ AKLOGI("Found a word pair with space proximity correction.");
+ }
+ multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
+
+ multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+ if (capitalizedWordDemotion) {
+ multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+ }
+
+ return totalFreq;
+}
+
/* Damerau-Levenshtein distance */
inline static int editDistanceInternal(
int* editDistanceTable, const unsigned short* before,
diff --git a/native/src/correction.h b/native/src/correction.h
index a0fd55fd9..22a424f5c 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -100,6 +100,8 @@ class Correction {
const int freq, int *editDistanceTable, const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
const Correction* correction, const unsigned short *word);
+ static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
const unsigned short* after, const int afterLength, const int score);
static int editDistance(const unsigned short* before,
diff --git a/native/src/defines.h b/native/src/defines.h
index 31175c369..d739043a4 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -117,8 +117,8 @@ static void prof_out(void) {
#define DEBUG_TRACE DEBUG_DICT_FULL
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
-#define DEBUG_CORRECTION_FREQ true
-#define DEBUG_WORDS_PRIORITY_QUEUE true
+#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_WORDS_PRIORITY_QUEUE false
#else // FLAG_DBG
@@ -213,6 +213,8 @@ static void prof_out(void) {
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10
+#define TWO_WORDS_CORRECTION_THRESHOLD 0.22f
+
#define MAX_DEPTH_MULTIPLIER 3
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 69e3200fc..8be95bc40 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -241,8 +241,24 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
}
PROF_END(6);
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ if (DEBUG_DICT) {
queuePool->dumpSubQueue1TopSuggestions();
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ WordsPriorityQueue* queue = queuePool->getSubQueue1(i);
+ if (queue->size() > 0) {
+ WordsPriorityQueue::SuggestedWord* sw = queue->top();
+ const int score = sw->mScore;
+ const unsigned short* word = sw->mWord;
+ const int wordLength = sw->mWordLength;
+ double ns = Correction::RankingAlgorithm::calcNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
+ ns += 0;
+ AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
+ (ns > TWO_WORDS_CORRECTION_THRESHOLD));
+ DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
+ DUMP_WORD(word, wordLength);
+ }
+ }
}
}
@@ -441,6 +457,80 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
return;
}
+void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int firstWordStartPos = 0;
+ const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity
+ ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+
+ if (inputLength >= MAX_WORD_LENGTH) return;
+ if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
+ || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
+ return;
+
+ const int newWordLength = firstWordLength + secondWordLength + 1;
+
+
+ // Space proximity preparation
+ //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
+ //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
+ //correction);
+ //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
+ //MAX_ERRORS_FOR_TWO_WORDS);
+
+ // Allocating variable length array on stack
+ unsigned short word[newWordLength];
+ const int firstFreq = getMostFrequentWordLike(
+ firstWordStartPos, firstWordLength, proximityInfo, mWord);
+ if (DEBUG_DICT) {
+ AKLOGI("First freq: %d", firstFreq);
+ }
+ if (firstFreq <= 0) return;
+
+ for (int i = 0; i < firstWordLength; ++i) {
+ word[i] = mWord[i];
+ }
+
+ const int secondFreq = getMostFrequentWordLike(
+ secondWordStartPos, secondWordLength, proximityInfo, mWord);
+ if (DEBUG_DICT) {
+ AKLOGI("Second freq: %d", secondFreq);
+ }
+ if (secondFreq <= 0) return;
+
+ word[firstWordLength] = SPACE;
+ for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
+ word[i] = mWord[i - firstWordLength - 1];
+ }
+
+ // TODO: Remove initSuggestions and correction->setCorrectionParams
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+
+ correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+ -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
+ useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+ if (DEBUG_DICT) {
+ AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ }
+ addWord(word, newWordLength, pairFreq, masterQueue);
+ return;
+}
+
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 5e7a7580f..b950971bb 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -104,6 +104,10 @@ class UnigramDictionary {
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
+ const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction,
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index 54bf27a59..6262439b5 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@@ -81,10 +81,9 @@ class WordsPriorityQueue {
mSuggestions.push(sw);
}
- SuggestedWord* topAndPop() {
+ SuggestedWord* top() {
if (mSuggestions.empty()) return 0;
SuggestedWord* sw = mSuggestions.top();
- mSuggestions.pop();
return sw;
}
@@ -112,7 +111,7 @@ class WordsPriorityQueue {
return size;
}
- int size() {
+ int size() const {
return mSuggestions.size();
}