aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2013-10-09 19:32:05 -0700
committerAndroid Git Automerger <android-git-automerger@android.com>2013-10-09 19:32:05 -0700
commite8c011679ae2bcc880902d86bc106f37f7d9136f (patch)
tree9e983848fc561be46ab99231af2a25ede1889aeb
parente2598657c3b773ff23283b7f194a49a7d1482a70 (diff)
parente398d09a50486d1b91ce109f4bd1a7529560c634 (diff)
downloadlatinime-e8c011679ae2bcc880902d86bc106f37f7d9136f.tar.gz
latinime-e8c011679ae2bcc880902d86bc106f37f7d9136f.tar.xz
latinime-e8c011679ae2bcc880902d86bc106f37f7d9136f.zip
am e398d09a: am af75e48e: am 74577bed: Merge "Implement the heuristic for auto-commit."
* commit 'e398d09a50486d1b91ce109f4bd1a7529560c634': Implement the heuristic for auto-commit.
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java19
-rw-r--r--native/jni/src/defines.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h10
-rw-r--r--native/jni/src/suggest/core/suggest.cpp58
-rw-r--r--native/jni/src/suggest/core/suggest.h2
-rw-r--r--native/jni/src/utils/char_utils.h10
6 files changed, 90 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index d9bad7e57..541e69788 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -44,9 +44,9 @@ public final class BinaryDictionary extends Dictionary {
private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
// Must be equal to MAX_RESULTS in native/jni/src/defines.h
private static final int MAX_RESULTS = 18;
- // Required space count for auto commit.
- // TODO: Remove this heuristic.
- private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3;
+ // The cutoff returned by native for auto-commit confidence.
+ // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
+ private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
@UsedForTesting
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
@@ -343,18 +343,7 @@ public final class BinaryDictionary extends Dictionary {
@Override
public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
- // TODO: actually use the confidence rather than use this completely broken heuristic
- final String word = candidate.mWord;
- final int length = word.length();
- int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT;
- for (int i = 0; i < length; ++i) {
- // This is okay because no low-surrogate and no high-surrogate can ever match the
- // space character, so we don't need to take care of iterating on code points.
- if (Constants.CODE_SPACE == word.charAt(i)) {
- if (0 >= --remainingSpaces) return true;
- }
- }
- return false;
+ return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
}
@Override
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index c920f64b4..742e388e4 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -298,9 +298,19 @@ static inline void prof_out(void) {
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)
+
// A special value to mean the first word confidence makes no sense in this case,
// e.g. this is not a multi-word suggestion.
-#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN)
+#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX)
+// How high the confidence needs to be for us to auto-commit. Arbitrary.
+// This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java
+#define CONFIDENCE_FOR_AUTO_COMMIT (1000000)
+// 80% of the full confidence
+#define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100)
+// 100% of the full confidence
+#define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT)
+// 80% of the full confidence
+#define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100)
#define KEYCODE_SPACE ' '
#define KEYCODE_SINGLE_QUOTE '\''
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index bbb30c9fe..49cfdecac 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -321,6 +321,16 @@ class DicNode {
DUMP_WORD_AND_SCORE("OUTPUT");
}
+ // "Total" in this context (and other methods in this class) means the whole suggestion. When
+ // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
+ // the one that corresponds to the last word of the suggestion, and all the previous words
+ // are concatenated together in mPrevWord - which contains a space at the end.
+ int getTotalNodeSpaceCount() const {
+ if (isFirstWord()) return 0;
+ return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
+ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength());
+ }
+
int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
if (inputIndex == NOT_AN_INDEX) {
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 079dc8b26..73ccebc88 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -166,7 +166,11 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
// TODO: have partial commit work even with multiple pointers.
const bool outputSecondWordFirstLetterInputIndex =
traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
- outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence();
+ if (terminalSize > 0) {
+ // If we have no suggestions, don't write this
+ outputAutoCommitFirstWordConfidence[0] =
+ computeFirstWordConfidence(&terminals[0]);
+ }
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
@@ -255,9 +259,55 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
return outputWordIndex;
}
-int Suggest::computeFirstWordConfidence() const {
- // TODO: implement this.
- return NOT_A_FIRST_WORD_CONFIDENCE;
+int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
+ // Get the number of spaces in the first suggestion
+ const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
+ // Get the number of characters in the first suggestion
+ const int length = terminalDicNode->getTotalNodeCodePointCount();
+ // Get the distance for the first word of the suggestion
+ const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
+
+ // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
+ // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
+ // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
+ // we are very confident.
+ // Expected space count is 1 ~ 5
+ static const int MIN_EXPECTED_SPACE_COUNT = 1;
+ static const int MAX_EXPECTED_SPACE_COUNT = 5;
+ // Expected length is about 4 ~ 30
+ static const int MIN_EXPECTED_LENGTH = 4;
+ static const int MAX_EXPECTED_LENGTH = 30;
+ // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
+ static const float MIN_EXPECTED_DISTANCE = 0.0;
+ static const float MAX_EXPECTED_DISTANCE = 2.0;
+ // This is not strict: it's where most stuff will be falling, but it's still fine if it's
+ // outside these values. We want to output a value that reflects all of these. Each factor
+ // contributes a bit.
+
+ // We need at least a space.
+ if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
+
+ // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
+ // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
+ // weight of the distance. Clamp to avoid overflows.
+ const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
+ : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
+ const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
+ * (MAX_EXPECTED_DISTANCE - clampedDistance)
+ / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
+ // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
+ // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
+ // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
+ const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
+ * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
+ // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
+ // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
+ // weight of the space count.
+ const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
+ * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
+ / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
+
+ return distanceContribution + lengthContribution + spaceContribution;
}
/**
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 0e8bd1195..b20343d29 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -58,7 +58,7 @@ class Suggest : public SuggestInterface {
int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
int *outputAutoCommitFirstWordConfidence) const;
- int computeFirstWordConfidence() const;
+ int computeFirstWordConfidence(const DicNode *const terminalDicNode) const;
void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 2e735a81c..41663c81a 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -75,6 +75,16 @@ class CharUtils {
return c;
}
+ static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, const int length) {
+ int spaceCount = 0;
+ for (int i = 0; i < length; ++i) {
+ if (codePointBuffer[i] == KEYCODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+ return spaceCount;
+ }
+
static unsigned short latin_tolower(const unsigned short c);
private: