aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/AutoCorrection.java5
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java14
-rw-r--r--java/src/com/android/inputmethod/latin/Utils.java95
-rw-r--r--java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java9
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp29
-rw-r--r--native/src/correction.cpp82
-rw-r--r--native/src/correction.h24
-rw-r--r--tests/src/com/android/inputmethod/latin/EditDistanceTests.java18
8 files changed, 148 insertions, 128 deletions
diff --git a/java/src/com/android/inputmethod/latin/AutoCorrection.java b/java/src/com/android/inputmethod/latin/AutoCorrection.java
index cd066a3d1..e5eb15938 100644
--- a/java/src/com/android/inputmethod/latin/AutoCorrection.java
+++ b/java/src/com/android/inputmethod/latin/AutoCorrection.java
@@ -118,8 +118,9 @@ public class AutoCorrection {
final int autoCorrectionSuggestionScore = sortedScores[0];
// TODO: when the normalized score of the first suggestion is nearly equals to
// the normalized score of the second suggestion, behave less aggressive.
- mNormalizedScore = Utils.calcNormalizedScore(
- typedWord,autoCorrectionSuggestion, autoCorrectionSuggestionScore);
+ mNormalizedScore = BinaryDictionary.calcNormalizedScore(
+ typedWord.toString(), autoCorrectionSuggestion.toString(),
+ autoCorrectionSuggestionScore);
if (DBG) {
Log.d(TAG, "Normalized " + typedWord + "," + autoCorrectionSuggestion + ","
+ autoCorrectionSuggestionScore + ", " + mNormalizedScore
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index f0e56d346..3692ac179 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -118,6 +118,10 @@ public class BinaryDictionary extends Dictionary {
private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength,
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
int maxWordLength, int maxBigrams, int maxAlternatives);
+ private static native double calcNormalizedScoreNative(
+ char[] before, int beforeLength, char[] after, int afterLength, int score);
+ private static native int editDistanceNative(
+ char[] before, int beforeLength, char[] after, int afterLength);
private final void loadDictionary(String path, long startOffset, long length) {
mNativeDict = openNative(path, startOffset, length,
@@ -211,6 +215,16 @@ public class BinaryDictionary extends Dictionary {
mFlags, outputChars, scores);
}
+ public static double calcNormalizedScore(String before, String after, int score) {
+ return calcNormalizedScoreNative(before.toCharArray(), before.length(),
+ after.toCharArray(), after.length(), score);
+ }
+
+ public static int editDistance(String before, String after) {
+ return editDistanceNative(
+ before.toCharArray(), before.length(), after.toCharArray(), after.length());
+ }
+
@Override
public boolean isValidWord(CharSequence word) {
if (word == null) return false;
diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java
index bfa51897e..38148438b 100644
--- a/java/src/com/android/inputmethod/latin/Utils.java
+++ b/java/src/com/android/inputmethod/latin/Utils.java
@@ -191,7 +191,8 @@ public class Utils {
final int typedWordLength = typedWord.length();
final int maxEditDistanceOfNativeDictionary =
(typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
- final int distance = Utils.editDistance(typedWord, suggestionWord);
+ final int distance = BinaryDictionary.editDistance(
+ typedWord.toString(), suggestionWord.toString());
if (DBG) {
Log.d(TAG, "Autocorrected edit distance = " + distance
+ ", " + maxEditDistanceOfNativeDictionary);
@@ -323,49 +324,6 @@ public class Utils {
}
}
-
- /* Damerau-Levenshtein distance */
- public static int editDistance(CharSequence s, CharSequence t) {
- if (s == null || t == null) {
- throw new IllegalArgumentException("editDistance: Arguments should not be null.");
- }
- final int sl = s.length();
- final int tl = t.length();
- int[][] dp = new int [sl + 1][tl + 1];
- for (int i = 0; i <= sl; i++) {
- dp[i][0] = i;
- }
- for (int j = 0; j <= tl; j++) {
- dp[0][j] = j;
- }
- for (int i = 0; i < sl; ++i) {
- for (int j = 0; j < tl; ++j) {
- final char sc = Character.toLowerCase(s.charAt(i));
- final char tc = Character.toLowerCase(t.charAt(j));
- final int cost = sc == tc ? 0 : 1;
- dp[i + 1][j + 1] = Math.min(
- dp[i][j + 1] + 1, Math.min(dp[i + 1][j] + 1, dp[i][j] + cost));
- // Overwrite for transposition cases
- if (i > 0 && j > 0
- && sc == Character.toLowerCase(t.charAt(j - 1))
- && tc == Character.toLowerCase(s.charAt(i - 1))) {
- dp[i + 1][j + 1] = Math.min(dp[i + 1][j + 1], dp[i - 1][j - 1] + cost);
- }
- }
- }
- if (DBG_EDIT_DISTANCE) {
- Log.d(TAG, "editDistance:" + s + "," + t);
- for (int i = 0; i < dp.length; ++i) {
- StringBuffer sb = new StringBuffer();
- for (int j = 0; j < dp[i].length; ++j) {
- sb.append(dp[i][j]).append(',');
- }
- Log.d(TAG, i + ":" + sb.toString());
- }
- }
- return dp[sl][tl];
- }
-
// Get the current stack trace
public static String getStackTrace() {
StringBuilder sb = new StringBuilder();
@@ -379,55 +337,6 @@ public class Utils {
return sb.toString();
}
- // In dictionary.cpp, getSuggestion() method,
- // suggestion scores are computed using the below formula.
- // original score
- // := pow(mTypedLetterMultiplier (this is defined 2),
- // (the number of matched characters between typed word and suggested word))
- // * (individual word's score which defined in the unigram dictionary,
- // and this score is defined in range [0, 255].)
- // Then, the following processing is applied.
- // - If the dictionary word is matched up to the point of the user entry
- // (full match up to min(before.length(), after.length())
- // => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
- // - If the word is a true full match except for differences in accents or
- // capitalization, then treat it as if the score was 255.
- // - If before.length() == after.length()
- // => multiply by mFullWordMultiplier (this is defined 2))
- // So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
- // For historical reasons we ignore the 1.2 modifier (because the measure for a good
- // autocorrection threshold was done at a time when it didn't exist). This doesn't change
- // the result.
- // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
- private static final int MAX_INITIAL_SCORE = 255;
- private static final int TYPED_LETTER_MULTIPLIER = 2;
- private static final int FULL_WORD_MULTIPLIER = 2;
- private static final int S_INT_MAX = 2147483647;
- public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
- final int beforeLength = before.length();
- final int afterLength = after.length();
- if (beforeLength == 0 || afterLength == 0) return 0;
- final int distance = editDistance(before, after);
- // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
- // correction.
- int spaceCount = 0;
- for (int i = 0; i < afterLength; ++i) {
- if (after.charAt(i) == Keyboard.CODE_SPACE) {
- ++spaceCount;
- }
- }
- if (spaceCount == afterLength) return 0;
- final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
- * Math.pow(
- TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
- * FULL_WORD_MULTIPLIER;
- // add a weight based on edit distance.
- // distance <= max(afterLength, beforeLength) == afterLength,
- // so, 0 <= distance / afterLength <= 1
- final double weight = 1.0 - (double) distance / afterLength;
- return (score / maximumScore) * weight;
- }
-
public static class UsabilityStudyLogUtils {
private static final String USABILITY_TAG = UsabilityStudyLogUtils.class.getSimpleName();
private static final String FILENAME = "log.txt";
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
index 1ffee0de0..39e47f661 100644
--- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
+++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java
@@ -270,7 +270,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService
// make the threshold.
final String wordString = new String(word, wordOffset, wordLength);
final double normalizedScore =
- Utils.calcNormalizedScore(mOriginalText, wordString, score);
+ BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
if (normalizedScore < mSuggestionThreshold) {
if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
return true;
@@ -303,8 +303,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService
hasRecommendedSuggestions = false;
} else {
gatheredSuggestions = EMPTY_STRING_ARRAY;
- final double normalizedScore =
- Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
+ final double normalizedScore = BinaryDictionary.calcNormalizedScore(
+ mOriginalText, mBestSuggestion, mBestScore);
hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
}
} else {
@@ -338,7 +338,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService
final int bestScore = mScores[mLength - 1];
final CharSequence bestSuggestion = mSuggestions.get(0);
final double normalizedScore =
- Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
+ BinaryDictionary.calcNormalizedScore(
+ mOriginalText, bestSuggestion.toString(), bestScore);
hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
if (DBG) {
Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 42d0e3207..71a893ca7 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -18,6 +18,7 @@
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "binary_format.h"
+#include "correction.h"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include "dictionary.h"
#include "jni.h"
@@ -188,6 +189,29 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
return result;
}
+static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
+ jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
+ jchar *beforeChars = env->GetCharArrayElements(before, 0);
+ jchar *afterChars = env->GetCharArrayElements(after, 0);
+ jdouble result = Correction::RankingAlgorithm::calcNormalizedScore(
+ (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength,
+ score);
+ env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
+ env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
+ return result;
+}
+
+static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
+ jcharArray before, jint beforeLength, jcharArray after, jint afterLength) {
+ jchar *beforeChars = env->GetCharArrayElements(before, 0);
+ jchar *afterChars = env->GetCharArrayElements(after, 0);
+ jint result = Correction::RankingAlgorithm::editDistance(
+ (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength);
+ env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
+ env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
+ return result;
+}
+
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return;
@@ -222,7 +246,10 @@ static JNINativeMethod sMethods[] = {
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
- {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}
+ {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams},
+ {"calcNormalizedScoreNative", "([CI[CII)D",
+ (void*)latinime_BinaryDictionary_calcNormalizedScore},
+ {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
};
int register_BinaryDictionary(JNIEnv *env) {
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 503fb7c53..dc31bfae7 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -16,6 +16,7 @@
#include <assert.h>
#include <ctype.h>
+#include <math.h>
#include <stdio.h>
#include <string.h>
@@ -933,14 +934,14 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
return totalFreq;
}
-#if 0 /* no longer used. keep just for reference */
-inline static int editDistance(
- int* editDistanceTable, const unsigned short* input,
- const int inputLength, const unsigned short* output, const int outputLength) {
+/* Damerau-Levenshtein distance */
+inline static int editDistanceInternal(
+ int* editDistanceTable, const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
int* dp = editDistanceTable;
- const int li = inputLength + 1;
- const int lo = outputLength + 1;
+ const int li = beforeLength + 1;
+ const int lo = afterLength + 1;
for (int i = 0; i < li; ++i) {
dp[lo * i] = i;
}
@@ -950,13 +951,13 @@ inline static int editDistance(
for (int i = 0; i < li - 1; ++i) {
for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = toBaseLowerCase(input[i]);
- const uint32_t co = toBaseLowerCase(output[j]);
+ const uint32_t ci = toBaseLowerCase(before[i]);
+ const uint32_t co = toBaseLowerCase(after[j]);
const uint16_t cost = (ci == co) ? 0 : 1;
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
- if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1])
- && co == toBaseLowerCase(input[i - 1])) {
+ if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
+ && co == toBaseLowerCase(before[i - 1])) {
dp[(i + 1) * lo + (j + 1)] = min(
dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
}
@@ -964,7 +965,7 @@ inline static int editDistance(
}
if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", inputLength, outputLength);
+ LOGI("IN = %d, OUT = %d", beforeLength, afterLength);
for (int i = 0; i < li; ++i) {
for (int j = 0; j < lo; ++j) {
LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
@@ -973,6 +974,63 @@ inline static int editDistance(
}
return dp[li * lo - 1];
}
-#endif
+
+int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ int table[(beforeLength + 1) * (afterLength + 1)];
+ return editDistanceInternal(table, before, beforeLength, after, afterLength);
+}
+
+
+// In dictionary.cpp, getSuggestion() method,
+// suggestion scores are computed using the below formula.
+// original score
+// := pow(mTypedLetterMultiplier (this is defined 2),
+// (the number of matched characters between typed word and suggested word))
+// * (individual word's score which defined in the unigram dictionary,
+// and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+// - If the dictionary word is matched up to the point of the user entry
+// (full match up to min(before.length(), after.length())
+// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+// - If the word is a true full match except for differences in accents or
+// capitalization, then treat it as if the score was 255.
+// - If before.length() == after.length()
+// => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */
+double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength,
+ const int score) {
+ if (0 == beforeLength || 0 == afterLength) {
+ return 0;
+ }
+ const int distance = editDistance(before, beforeLength, after, afterLength);
+ int spaceCount = 0;
+ for (int i = 0; i < afterLength; ++i) {
+ if (after[i] == CODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+
+ if (spaceCount == afterLength) {
+ return 0;
+ }
+
+ const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
+ * pow((double)TYPED_LETTER_MULTIPLIER,
+ (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+
+ // add a weight based on edit distance.
+ // distance <= max(afterLength, beforeLength) == afterLength,
+ // so, 0 <= distance / afterLength <= 1
+ const double weight = 1.0 - (double) distance / afterLength;
+ return (score / maxScore) * weight;
+}
} // namespace latinime
diff --git a/native/src/correction.h b/native/src/correction.h
index 9ba472955..4012e7e82 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -95,6 +95,23 @@ class Correction {
return mCorrectionStates[index].mParentIndex;
}
+ class RankingAlgorithm {
+ public:
+ static int calculateFinalFreq(const int inputIndex, const int depth,
+ const int freq, int *editDistanceTable, const Correction* correction);
+ static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
+ static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
+ const unsigned short* after, const int afterLength, const int score);
+ static int editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength);
+ private:
+ static const int CODE_SPACE = ' ';
+ static const int MAX_INITIAL_SCORE = 255;
+ static const int TYPED_LETTER_MULTIPLIER = 2;
+ static const int FULL_WORD_MULTIPLIER = 2;
+ };
+
private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
@@ -153,13 +170,6 @@ class Correction {
bool mTransposing;
bool mSkipping;
- class RankingAlgorithm {
- public:
- static int calculateFinalFreq(const int inputIndex, const int depth,
- const int freq, int *editDistanceTable, const Correction* correction);
- static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
- const Correction* correction, const unsigned short *word);
- };
};
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/tests/src/com/android/inputmethod/latin/EditDistanceTests.java b/tests/src/com/android/inputmethod/latin/EditDistanceTests.java
index 75bd04938..c053a49e5 100644
--- a/tests/src/com/android/inputmethod/latin/EditDistanceTests.java
+++ b/tests/src/com/android/inputmethod/latin/EditDistanceTests.java
@@ -37,7 +37,7 @@ public class EditDistanceTests extends AndroidTestCase {
* sitting
*/
public void testExample1() {
- final int dist = Utils.editDistance("kitten", "sitting");
+ final int dist = BinaryDictionary.editDistance("kitten", "sitting");
assertEquals("edit distance between 'kitten' and 'sitting' is 3",
3, dist);
}
@@ -50,26 +50,26 @@ public class EditDistanceTests extends AndroidTestCase {
* S--unday
*/
public void testExample2() {
- final int dist = Utils.editDistance("Saturday", "Sunday");
+ final int dist = BinaryDictionary.editDistance("Saturday", "Sunday");
assertEquals("edit distance between 'Saturday' and 'Sunday' is 3",
3, dist);
}
public void testBothEmpty() {
- final int dist = Utils.editDistance("", "");
+ final int dist = BinaryDictionary.editDistance("", "");
assertEquals("when both string are empty, no edits are needed",
0, dist);
}
public void testFirstArgIsEmpty() {
- final int dist = Utils.editDistance("", "aaaa");
+ final int dist = BinaryDictionary.editDistance("", "aaaa");
assertEquals("when only one string of the arguments is empty,"
+ " the edit distance is the length of the other.",
4, dist);
}
public void testSecoondArgIsEmpty() {
- final int dist = Utils.editDistance("aaaa", "");
+ final int dist = BinaryDictionary.editDistance("aaaa", "");
assertEquals("when only one string of the arguments is empty,"
+ " the edit distance is the length of the other.",
4, dist);
@@ -78,27 +78,27 @@ public class EditDistanceTests extends AndroidTestCase {
public void testSameStrings() {
final String arg1 = "The quick brown fox jumps over the lazy dog.";
final String arg2 = "The quick brown fox jumps over the lazy dog.";
- final int dist = Utils.editDistance(arg1, arg2);
+ final int dist = BinaryDictionary.editDistance(arg1, arg2);
assertEquals("when same strings are passed, distance equals 0.",
0, dist);
}
public void testSameReference() {
final String arg = "The quick brown fox jumps over the lazy dog.";
- final int dist = Utils.editDistance(arg, arg);
+ final int dist = BinaryDictionary.editDistance(arg, arg);
assertEquals("when same string references are passed, the distance equals 0.",
0, dist);
}
public void testNullArg() {
try {
- Utils.editDistance(null, "aaa");
+ BinaryDictionary.editDistance(null, "aaa");
fail("IllegalArgumentException should be thrown.");
} catch (Exception e) {
assertTrue(e instanceof IllegalArgumentException);
}
try {
- Utils.editDistance("aaa", null);
+ BinaryDictionary.editDistance("aaa", null);
fail("IllegalArgumentException should be thrown.");
} catch (Exception e) {
assertTrue(e instanceof IllegalArgumentException);