diff options
author | 2011-01-27 03:23:39 +0900 | |
---|---|---|
committer | 2011-01-27 12:53:13 +0900 | |
commit | 58c49b91322847dc453742cb34c2899da9b44479 (patch) | |
tree | 645f163d8a75a9e422946eb908c50c6efcecd8fa /native/src | |
parent | 5c35e4109fc5a035605605b62bf5e5a5888b0f6b (diff) | |
download | latinime-58c49b91322847dc453742cb34c2899da9b44479.tar.gz latinime-58c49b91322847dc453742cb34c2899da9b44479.tar.xz latinime-58c49b91322847dc453742cb34c2899da9b44479.zip |
Fix auto-correction threshold and promote full matched words
Bug: 3374359
Bug: 3278422
"zbe" will be auto corrected to "be" by fixing s-line
"teh" will be auto corrected to "the" by promotion of full matched words
Change-Id: I314c632820e4e0b1501edeca60ada205d291451f
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/defines.h | 1 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 18 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 2 |
3 files changed, 14 insertions, 7 deletions
diff --git a/native/src/defines.h b/native/src/defines.h index 71aaf28ae..7374526ca 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -134,6 +134,7 @@ static void prof_out(void) { #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60 +#define FULL_MATCHED_WORDS_PROMOTION_RATE 120 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 3f9bcd758..06dd39aaa 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons } } -inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr, - const int skipPos, const int excessivePos, const int transposedPos, const int freq, - const bool sameLength) { +inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth, + const int snr, const int skipPos, const int excessivePos, const int transposedPos, + const int freq, const bool sameLength) { // TODO: Demote by edit distance int finalFreq = freq * snr; if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq); @@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); } } + int lengthFreq = TYPED_LETTER_MULTIPLIER; + for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER; + if (depth > 1 && lengthFreq == snr) { + if (DEBUG_DICT) LOGI("Found full matched word."); + multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); + } if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER; return finalFreq; } @@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe unsigned short *word, const int inputIndex, const int depth, const int snr, int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, const int transposedPos, const int freq) { - const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos, - freq, false); + const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos, + transposedPos, freq, false); if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); if (depth >= mInputLength && skipPos < 0) { registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); @@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength( const int skipPos, const int excessivePos, const int transposedPos, const int freq, const int addedWeight) { if (sameAsTyped(word, depth + 1)) return; - const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos, + const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos, excessivePos, transposedPos, freq, true); // Proximity collection will promote a word of the same length as what user typed. if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 7f7b7bd21..95f965586 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -52,7 +52,7 @@ private: const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); - int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos, + int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int freq, const bool sameLength); void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, const int inputIndex, const int depth, const int snr, int *nextLetters, |