From e18ba4275f7fedfc2f1be13799a8c73d22b53e02 Mon Sep 17 00:00:00 2001 From: satok Date: Wed, 20 Apr 2011 14:52:11 +0900 Subject: Tweak edit distance - The edit distance of transposed characters will be changed to 1 from 2 Change-Id: I369e2f94278b3b290957eef43d23cb7048166a99 --- java/src/com/android/inputmethod/latin/Utils.java | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'java/src/com/android/inputmethod/latin/Utils.java') diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java index 7b334ac0a..f01a81d6e 100644 --- a/java/src/com/android/inputmethod/latin/Utils.java +++ b/java/src/com/android/inputmethod/latin/Utils.java @@ -258,6 +258,8 @@ public class Utils { } } + + /* Damerau-Levenshtein distance */ public static int editDistance(CharSequence s, CharSequence t) { if (s == null || t == null) { throw new IllegalArgumentException("editDistance: Arguments should not be null."); @@ -273,12 +275,27 @@ public class Utils { } for (int i = 0; i < sl; ++i) { for (int j = 0; j < tl; ++j) { - if (Character.toLowerCase(s.charAt(i)) == Character.toLowerCase(t.charAt(j))) { - dp[i + 1][j + 1] = dp[i][j]; - } else { - dp[i + 1][j + 1] = 1 + Math.min(dp[i][j], - Math.min(dp[i + 1][j], dp[i][j + 1])); + final char sc = Character.toLowerCase(s.charAt(i)); + final char tc = Character.toLowerCase(t.charAt(j)); + final int cost = sc == tc ? 0 : 1; + dp[i + 1][j + 1] = Math.min( + dp[i][j + 1] + 1, Math.min(dp[i + 1][j] + 1, dp[i][j] + cost)); + // Overwrite for transposition cases + if (i > 0 && j > 0 + && sc == Character.toLowerCase(t.charAt(j - 1)) + && tc == Character.toLowerCase(s.charAt(i - 1))) { + dp[i + 1][j + 1] = Math.min(dp[i + 1][j + 1], dp[i - 1][j - 1] + cost); + } + } + } + if (LatinImeLogger.sDBG) { + Log.d(TAG, "editDistance:" + s + "," + t); + for (int i = 0; i < dp.length; ++i) { + StringBuffer sb = new StringBuffer(); + for (int j = 0; j < dp[i].length; ++j) { + sb.append(dp[i][j]).append(','); } + Log.d(TAG, i + ":" + sb.toString()); } } return dp[sl][tl]; -- cgit v1.2.3-83-g751a From 22104663fc3ed9228c6a6ac35a893b23e50c8b33 Mon Sep 17 00:00:00 2001 From: satok Date: Wed, 20 Apr 2011 17:33:47 +0900 Subject: Tweak the normalized score for the words with spaces Change-Id: Icc0c4fd0474ed5094fb274a6ea745ca583ba524d --- java/src/com/android/inputmethod/latin/Utils.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'java/src/com/android/inputmethod/latin/Utils.java') diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java index f01a81d6e..f8b23cb65 100644 --- a/java/src/com/android/inputmethod/latin/Utils.java +++ b/java/src/com/android/inputmethod/latin/Utils.java @@ -19,6 +19,7 @@ package com.android.inputmethod.latin; import com.android.inputmethod.compat.InputMethodInfoCompatWrapper; import com.android.inputmethod.compat.InputMethodManagerCompatWrapper; import com.android.inputmethod.compat.InputTypeCompatUtils; +import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.KeyboardId; import android.content.Context; @@ -344,8 +345,16 @@ public class Utils { final int distance = editDistance(before, after); // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character // correction. + int spaceCount = 0; + for (int i = 0; i < afterLength; ++i) { + if (after.charAt(i) == Keyboard.CODE_SPACE) { + ++spaceCount; + } + } + if (spaceCount == afterLength) return 0; final double maximumScore = MAX_INITIAL_SCORE - * Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength)) + * Math.pow( + TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, -- cgit v1.2.3-83-g751a