aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2011-03-03 10:22:10 +0900
committerJean Chalard <jchalard@google.com>2011-03-05 13:20:19 +0900
commit07a8406bc184a354ea47fb6352e48df39e35310e (patch)
tree7aa88b26858323b36087ec2c5ef96f7574b235f3 /native/src/unigram_dictionary.cpp
parent50a4cb403f57c0981df584256ae3a88cbd53a31e (diff)
downloadlatinime-07a8406bc184a354ea47fb6352e48df39e35310e.tar.gz
latinime-07a8406bc184a354ea47fb6352e48df39e35310e.tar.xz
latinime-07a8406bc184a354ea47fb6352e48df39e35310e.zip
Demote skipped characters matched words with respect to length.
Words that matched user input with skipped characters used to be demoted in BinaryDictionary by a constant factor and not at all in those dictionaries implemented in java code. To represent the fact that the impact of a skipped character gets larger as the word is shorter, this change will implement a demotion that gets larger as the typed word is shorter. The demotion rate is (n - 2) / (n - 1) where n is the length of the typed word for n >= 2. It implements it for both BinaryDictionary and java dictionaries. Bug: 3340731 Change-Id: I3a18be80a9708981d56a950dc25fe08f018b5b89
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 9aa36b064..17a87a708 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -457,10 +457,17 @@ static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(con
}
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
- const int freq, const bool sameLength) {
+ const int freq, const bool sameLength) const {
// TODO: Demote by edit distance
int finalFreq = freq * matchWeight;
- if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
+ if (skipPos >= 0) {
+ if (mInputLength >= 3) {
+ multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
+ (mInputLength - 2) / (mInputLength - 1), &finalFreq);
+ } else {
+ finalFreq = 0;
+ }
+ }
if (transposedPos >= 0) multiplyRate(
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
if (excessivePos >= 0) {
@@ -514,7 +521,7 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
}
inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
- const int inputLength) {
+ const int inputLength) const {
if (inputIndex < 0 || inputIndex >= inputLength) return false;
const int currentChar = *getInputCharsAt(inputIndex);
const int leftIndex = inputIndex - 1;