aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsatok <satok@google.com>2011-04-26 23:55:13 -0700
committerAndroid (Google) Code Review <android-gerrit@google.com>2011-04-26 23:55:13 -0700
commitbc475dc297ab657cf8c859547a581e6c8e166cf3 (patch)
tree2421eda884f19c5b9c0d28783252df73a9f76878
parentb880ccc3bd9e74524f4d27c6768e757718ff2c21 (diff)
parentb2e5e5937ca96a448081466a9f43e937787f0c24 (diff)
downloadlatinime-bc475dc297ab657cf8c859547a581e6c8e166cf3.tar.gz
latinime-bc475dc297ab657cf8c859547a581e6c8e166cf3.tar.xz
latinime-bc475dc297ab657cf8c859547a581e6c8e166cf3.zip
Merge "Handle overflow properly in multiplyRate"
-rw-r--r--java/src/com/android/inputmethod/latin/Utils.java6
-rw-r--r--native/src/unigram_dictionary.cpp61
2 files changed, 48 insertions, 19 deletions
diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java
index b5afb5079..9ce305f32 100644
--- a/java/src/com/android/inputmethod/latin/Utils.java
+++ b/java/src/com/android/inputmethod/latin/Utils.java
@@ -48,6 +48,7 @@ public class Utils {
private static final String TAG = Utils.class.getSimpleName();
private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
private static boolean DBG = LatinImeLogger.sDBG;
+ private static boolean DBG_EDIT_DISTANCE = false;
private Utils() {
// Intentional empty constructor for utility class.
@@ -289,7 +290,7 @@ public class Utils {
}
}
}
- if (LatinImeLogger.sDBG) {
+ if (DBG_EDIT_DISTANCE) {
Log.d(TAG, "editDistance:" + s + "," + t);
for (int i = 0; i < dp.length; ++i) {
StringBuffer sb = new StringBuffer();
@@ -338,6 +339,7 @@ public class Utils {
private static final int MAX_INITIAL_SCORE = 255;
private static final int TYPED_LETTER_MULTIPLIER = 2;
private static final int FULL_WORD_MULTIPLIER = 2;
+ private static final int S_INT_MAX = 2147483647;
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
final int beforeLength = before.length();
final int afterLength = after.length();
@@ -352,7 +354,7 @@ public class Utils {
}
}
if (spaceCount == afterLength) return 0;
- final double maximumScore = MAX_INITIAL_SCORE
+ final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
* Math.pow(
TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
* FULL_WORD_MULTIPLIER;
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index b9f4b961d..cd2f141cf 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -300,7 +300,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
if (DEBUG_DICT) {
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Added word = %s, freq = %d", s, frequency);
+ LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
}
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
@@ -409,11 +409,44 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
}
}
-inline static void multiplyRate(const int rate, int *freq) {
- if (rate > 1000000) {
- *freq = (*freq / 100) * rate;
+static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
+static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
+ return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
+}
+
+static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
+inline static void multiplyIntCapped(const int multiplier, int *base) {
+ const int temp = *base;
+ if (temp != S_INT_MAX) {
+ // Branch if multiplier == 2 for the optimization
+ if (multiplier == 2) {
+ *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
+ } else {
+ const int tempRetval = temp * multiplier;
+ *base = tempRetval >= temp ? tempRetval : S_INT_MAX;
+ }
+ }
+}
+
+inline static int powerIntCapped(const int base, const int n) {
+ if (false && base == 2) {
+ return n < 31 ? 1 << n : S_INT_MAX;
} else {
- *freq = *freq * rate / 100;
+ int ret = base;
+ for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
+ return ret;
+ }
+}
+
+inline static void multiplyRate(const int rate, int *freq) {
+ if (*freq != S_INT_MAX) {
+ if (*freq > 1000000) {
+ *freq /= 100;
+ multiplyIntCapped(rate, freq);
+ } else {
+ multiplyIntCapped(rate, freq);
+ *freq /= 100;
+ }
}
}
@@ -449,9 +482,7 @@ inline static int calcFreqForSplitTwoWords(
// (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
// * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
- for (int i = 0; i < totalLength; ++i) {
- totalFreq *= typedLetterMultiplier;
- }
+ multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
// This is another workaround to offset the demotion which will be done in
// calcNormalizedScore in Utils.java.
@@ -499,7 +530,7 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
int pairFreq = calcFreqForSplitTwoWords(
TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq);
if (DEBUG_DICT) {
- LOGI("Missing space: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
+ LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
TYPED_LETTER_MULTIPLIER);
}
addWord(word, newWordLength, pairFreq);
@@ -559,10 +590,6 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
}
}
-static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
-static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
- return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
-}
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
const int freq, const bool sameLength) const {
@@ -591,7 +618,7 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
}
}
int lengthFreq = TYPED_LETTER_MULTIPLIER;
- for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
+ multiplyIntCapped(powerIntCapped(TYPED_LETTER_MULTIPLIER, depth), &lengthFreq);
if (lengthFreq == matchWeight) {
// Full exact match
if (depth > 1) {
@@ -608,13 +635,13 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
if (DEBUG_DICT) {
LOGI("Found one proximity correction.");
}
- finalFreq *= 2;
+ multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &finalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
}
if (DEBUG_DICT) {
LOGI("calc: %d, %d", depth, sameLength);
}
- if (sameLength) finalFreq *= FULL_WORD_MULTIPLIER;
+ if (sameLength) multiplyIntCapped(FULL_WORD_MULTIPLIER, &finalFreq);
return finalFreq;
}
@@ -767,7 +794,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
// If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity.
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
- matchWeight = matchWeight * TYPED_LETTER_MULTIPLIER;
+ multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
}
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);