Merge "Promote a word with only one proximity character."

author: satok <satok@google.com> 2011-04-13 03:18:57 -0700
committer: Android (Google) Code Review <android-gerrit@google.com> 2011-04-13 03:18:57 -0700
commit: 0851abf9990cac21dad5eaa70eefe33142c0e949 (patch)
tree: 376ce2f1d979533d2c46a5a79f49e8e526ea4873 /native/src
parent: dc3d4e8c06c099c3170a9503cf3a73c7f5569ed6 (diff)
parent: 72bc17ec9ff200a2fe6f737f3bc5a8fff15c426f (diff)
download: latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.tar.gz
latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.tar.xz
latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.zip
2 files changed, 13 insertions, 1 deletions
diff --git a/native/src/defines.h b/native/src/defines.h
index ff195f40c..2a06e9485 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -138,13 +138,14 @@ static void prof_out(void) {
 #define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
 
 // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
-#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 90
+#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120
+#define WORDS_WITH_JUST_ONE_PROXIMITY_CHARACTER_PROMOTION_RATE 110
 
 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
 // This is only used for the size of array. Not to be used in c functions.
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 2ae2bca92..a1efc9573 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -523,6 +523,9 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
                     * (10 * mInputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
                     / (10 * mInputLength
                             - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
+            if (DEBUG_DICT) {
+                LOGI("Demotion rate for missing character is %d.", demotionRate);
+            }
             multiplyRate(demotionRate, &finalFreq);
         } else {
             finalFreq = 0;
@@ -539,6 +542,7 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
     int lengthFreq = TYPED_LETTER_MULTIPLIER;
     for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
     if (lengthFreq == matchWeight) {
+        // Full exact match
         if (depth > 1) {
             if (DEBUG_DICT) {
                 LOGI("Found full matched word.");
@@ -548,6 +552,13 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
         if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
         }
+    } else if (lengthFreq / 2 == matchWeight && transposedPos < 0 && skipPos < 0
+            && excessivePos < 0 && depth > 1) {
+        // Full match except only one proximity correction
+        if (DEBUG_DICT) {
+            LOGI("Found one proximity correction.");
+        }
+        multiplyRate(WORDS_WITH_JUST_ONE_PROXIMITY_CHARACTER_PROMOTION_RATE, &finalFreq);
     }
     if (sameLength) finalFreq *= FULL_WORD_MULTIPLIER;
     return finalFreq;
author	satok <satok@google.com>	2011-04-13 03:18:57 -0700
committer	Android (Google) Code Review <android-gerrit@google.com>	2011-04-13 03:18:57 -0700
commit	0851abf9990cac21dad5eaa70eefe33142c0e949 (patch)
tree	376ce2f1d979533d2c46a5a79f49e8e526ea4873 /native/src
parent	dc3d4e8c06c099c3170a9503cf3a73c7f5569ed6 (diff)
parent	72bc17ec9ff200a2fe6f737f3bc5a8fff15c426f (diff)
download	latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.tar.gz latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.tar.xz latinime-0851abf9990cac21dad5eaa70eefe33142c0e949.zip