aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/Utils.java
diff options
context:
space:
mode:
authorsatok <satok@google.com>2012-01-12 02:58:35 -0800
committerAndroid (Google) Code Review <android-gerrit@google.com>2012-01-12 02:58:35 -0800
commit21814c56f0859a2d83d1ecc29f4a74210c4497a1 (patch)
treef6236e3c6040f91d381738db515d1a1bb0df422f /java/src/com/android/inputmethod/latin/Utils.java
parentab34a4a7f53e1426a5cc4cd7a7fefde38a82e499 (diff)
parentbe0cf72253f15bff6abdeaa79f60a56f06ab7b86 (diff)
downloadlatinime-21814c56f0859a2d83d1ecc29f4a74210c4497a1.tar.gz
latinime-21814c56f0859a2d83d1ecc29f4a74210c4497a1.tar.xz
latinime-21814c56f0859a2d83d1ecc29f4a74210c4497a1.zip
Merge "Move auto correction thresthold to the native code"
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Utils.java')
-rw-r--r--java/src/com/android/inputmethod/latin/Utils.java95
1 files changed, 2 insertions, 93 deletions
diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java
index bfa51897e..38148438b 100644
--- a/java/src/com/android/inputmethod/latin/Utils.java
+++ b/java/src/com/android/inputmethod/latin/Utils.java
@@ -191,7 +191,8 @@ public class Utils {
final int typedWordLength = typedWord.length();
final int maxEditDistanceOfNativeDictionary =
(typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
- final int distance = Utils.editDistance(typedWord, suggestionWord);
+ final int distance = BinaryDictionary.editDistance(
+ typedWord.toString(), suggestionWord.toString());
if (DBG) {
Log.d(TAG, "Autocorrected edit distance = " + distance
+ ", " + maxEditDistanceOfNativeDictionary);
@@ -323,49 +324,6 @@ public class Utils {
}
}
-
- /* Damerau-Levenshtein distance */
- public static int editDistance(CharSequence s, CharSequence t) {
- if (s == null || t == null) {
- throw new IllegalArgumentException("editDistance: Arguments should not be null.");
- }
- final int sl = s.length();
- final int tl = t.length();
- int[][] dp = new int [sl + 1][tl + 1];
- for (int i = 0; i <= sl; i++) {
- dp[i][0] = i;
- }
- for (int j = 0; j <= tl; j++) {
- dp[0][j] = j;
- }
- for (int i = 0; i < sl; ++i) {
- for (int j = 0; j < tl; ++j) {
- final char sc = Character.toLowerCase(s.charAt(i));
- final char tc = Character.toLowerCase(t.charAt(j));
- final int cost = sc == tc ? 0 : 1;
- dp[i + 1][j + 1] = Math.min(
- dp[i][j + 1] + 1, Math.min(dp[i + 1][j] + 1, dp[i][j] + cost));
- // Overwrite for transposition cases
- if (i > 0 && j > 0
- && sc == Character.toLowerCase(t.charAt(j - 1))
- && tc == Character.toLowerCase(s.charAt(i - 1))) {
- dp[i + 1][j + 1] = Math.min(dp[i + 1][j + 1], dp[i - 1][j - 1] + cost);
- }
- }
- }
- if (DBG_EDIT_DISTANCE) {
- Log.d(TAG, "editDistance:" + s + "," + t);
- for (int i = 0; i < dp.length; ++i) {
- StringBuffer sb = new StringBuffer();
- for (int j = 0; j < dp[i].length; ++j) {
- sb.append(dp[i][j]).append(',');
- }
- Log.d(TAG, i + ":" + sb.toString());
- }
- }
- return dp[sl][tl];
- }
-
// Get the current stack trace
public static String getStackTrace() {
StringBuilder sb = new StringBuilder();
@@ -379,55 +337,6 @@ public class Utils {
return sb.toString();
}
- // In dictionary.cpp, getSuggestion() method,
- // suggestion scores are computed using the below formula.
- // original score
- // := pow(mTypedLetterMultiplier (this is defined 2),
- // (the number of matched characters between typed word and suggested word))
- // * (individual word's score which defined in the unigram dictionary,
- // and this score is defined in range [0, 255].)
- // Then, the following processing is applied.
- // - If the dictionary word is matched up to the point of the user entry
- // (full match up to min(before.length(), after.length())
- // => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
- // - If the word is a true full match except for differences in accents or
- // capitalization, then treat it as if the score was 255.
- // - If before.length() == after.length()
- // => multiply by mFullWordMultiplier (this is defined 2))
- // So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
- // For historical reasons we ignore the 1.2 modifier (because the measure for a good
- // autocorrection threshold was done at a time when it didn't exist). This doesn't change
- // the result.
- // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
- private static final int MAX_INITIAL_SCORE = 255;
- private static final int TYPED_LETTER_MULTIPLIER = 2;
- private static final int FULL_WORD_MULTIPLIER = 2;
- private static final int S_INT_MAX = 2147483647;
- public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
- final int beforeLength = before.length();
- final int afterLength = after.length();
- if (beforeLength == 0 || afterLength == 0) return 0;
- final int distance = editDistance(before, after);
- // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
- // correction.
- int spaceCount = 0;
- for (int i = 0; i < afterLength; ++i) {
- if (after.charAt(i) == Keyboard.CODE_SPACE) {
- ++spaceCount;
- }
- }
- if (spaceCount == afterLength) return 0;
- final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
- * Math.pow(
- TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
- * FULL_WORD_MULTIPLIER;
- // add a weight based on edit distance.
- // distance <= max(afterLength, beforeLength) == afterLength,
- // so, 0 <= distance / afterLength <= 1
- final double weight = 1.0 - (double) distance / afterLength;
- return (score / maximumScore) * weight;
- }
-
public static class UsabilityStudyLogUtils {
private static final String USABILITY_TAG = UsabilityStudyLogUtils.class.getSimpleName();
private static final String FILENAME = "log.txt";