diff options
author | 2011-03-04 22:43:16 -0800 | |
---|---|---|
committer | 2011-03-04 22:43:16 -0800 | |
commit | eaecb56f948a4979e72346f6c5c64b56f7bc7bbf (patch) | |
tree | d6a39b91831f6c8d58f1f381e54c52788acbe16f /native/src | |
parent | f9a5bfa147b07f135e8da6f9b7305c31181fa5eb (diff) | |
parent | 07a8406bc184a354ea47fb6352e48df39e35310e (diff) | |
download | latinime-eaecb56f948a4979e72346f6c5c64b56f7bc7bbf.tar.gz latinime-eaecb56f948a4979e72346f6c5c64b56f7bc7bbf.tar.xz latinime-eaecb56f948a4979e72346f6c5c64b56f7bc7bbf.zip |
Merge "Demote skipped characters matched words with respect to length." into honeycomb-mr1
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/defines.h | 2 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 13 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 7 |
3 files changed, 15 insertions, 7 deletions
diff --git a/native/src/defines.h b/native/src/defines.h index 9534f8a87..16927e5bb 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -138,7 +138,7 @@ static void prof_out(void) { #define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. -#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75 +#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 100 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 274e1f6d3..3487d4f11 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -493,10 +493,17 @@ static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(con } inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth, const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos, - const int freq, const bool sameLength) { + const int freq, const bool sameLength) const { // TODO: Demote by edit distance int finalFreq = freq * matchWeight; - if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq); + if (skipPos >= 0) { + if (mInputLength >= 3) { + multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE * + (mInputLength - 2) / (mInputLength - 1), &finalFreq); + } else { + finalFreq = 0; + } + } if (transposedPos >= 0) multiplyRate( WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); if (excessivePos >= 0) { @@ -550,7 +557,7 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, } inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex, - const int inputLength) { + const int inputLength) const { if (inputIndex < 0 || inputIndex >= inputLength) return false; const int currentChar = *getInputCharsAt(inputIndex); const int leftIndex = inputIndex - 1; diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 7359481a8..ef820cba5 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -75,7 +75,8 @@ private: const int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, - const int excessivePos, const int transposedPos, const int freq, const bool sameLength); + const int excessivePos, const int transposedPos, const int freq, + const bool sameLength) const; void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, const int inputIndex, const int depth, const int snr, int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, @@ -99,8 +100,8 @@ private: bool processCurrentNodeForExactMatch(const int firstChildPos, const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos); - bool existsAdjacentProximityChars(const int inputIndex, const int inputLength); - inline const int* getInputCharsAt(const int index) { + bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const; + inline const int* getInputCharsAt(const int index) const { return mInputCodes + (index * MAX_PROXIMITY_CHARS); } const unsigned char *DICT; |