aboutsummaryrefslogtreecommitdiffstats
path: root/native/src
diff options
context:
space:
mode:
authorsatok <satok@google.com>2011-04-13 03:18:57 -0700
committerAndroid (Google) Code Review <android-gerrit@google.com>2011-04-13 03:18:57 -0700
commit0851abf9990cac21dad5eaa70eefe33142c0e949 (patch)
tree376ce2f1d979533d2c46a5a79f49e8e526ea4873 /native/src
parentdc3d4e8c06c099c3170a9503cf3a73c7f5569ed6 (diff)
parent72bc17ec9ff200a2fe6f737f3bc5a8fff15c426f (diff)
downloadlatinime-0851abf9990cac21dad5eaa70eefe33142c0e949.tar.gz
latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.tar.xz
latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.zip
Merge "Promote a word with only one proximity character."
Diffstat (limited to 'native/src')
-rw-r--r--native/src/defines.h3
-rw-r--r--native/src/unigram_dictionary.cpp11
2 files changed, 13 insertions, 1 deletions
diff --git a/native/src/defines.h b/native/src/defines.h
index ff195f40c..2a06e9485 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -138,13 +138,14 @@ static void prof_out(void) {
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
-#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 90
+#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
+#define WORDS_WITH_JUST_ONE_PROXIMITY_CHARACTER_PROMOTION_RATE 110
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 2ae2bca92..a1efc9573 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -523,6 +523,9 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
* (10 * mInputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
/ (10 * mInputLength
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
+ if (DEBUG_DICT) {
+ LOGI("Demotion rate for missing character is %d.", demotionRate);
+ }
multiplyRate(demotionRate, &finalFreq);
} else {
finalFreq = 0;
@@ -539,6 +542,7 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
int lengthFreq = TYPED_LETTER_MULTIPLIER;
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
if (lengthFreq == matchWeight) {
+ // Full exact match
if (depth > 1) {
if (DEBUG_DICT) {
LOGI("Found full matched word.");
@@ -548,6 +552,13 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
}
+ } else if (lengthFreq / 2 == matchWeight && transposedPos < 0 && skipPos < 0
+ && excessivePos < 0 && depth > 1) {
+ // Full match except only one proximity correction
+ if (DEBUG_DICT) {
+ LOGI("Found one proximity correction.");
+ }
+ multiplyRate(WORDS_WITH_JUST_ONE_PROXIMITY_CHARACTER_PROMOTION_RATE, &finalFreq);
}
if (sameLength) finalFreq *= FULL_WORD_MULTIPLIER;
return finalFreq;