aboutsummaryrefslogtreecommitdiffstats
path: root/native/src
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2011-03-04 22:43:16 -0800
committerAndroid (Google) Code Review <android-gerrit@google.com>2011-03-04 22:43:16 -0800
commiteaecb56f948a4979e72346f6c5c64b56f7bc7bbf (patch)
treed6a39b91831f6c8d58f1f381e54c52788acbe16f /native/src
parentf9a5bfa147b07f135e8da6f9b7305c31181fa5eb (diff)
parent07a8406bc184a354ea47fb6352e48df39e35310e (diff)
downloadlatinime-eaecb56f948a4979e72346f6c5c64b56f7bc7bbf.tar.gz
latinime-eaecb56f948a4979e72346f6c5c64b56f7bc7bbf.tar.xz
latinime-eaecb56f948a4979e72346f6c5c64b56f7bc7bbf.zip
Merge "Demote skipped characters matched words with respect to length." into honeycomb-mr1
Diffstat (limited to 'native/src')
-rw-r--r--native/src/defines.h2
-rw-r--r--native/src/unigram_dictionary.cpp13
-rw-r--r--native/src/unigram_dictionary.h7
3 files changed, 15 insertions, 7 deletions
diff --git a/native/src/defines.h b/native/src/defines.h
index 9534f8a87..16927e5bb 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -138,7 +138,7 @@ static void prof_out(void) {
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
-#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
+#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 100
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 274e1f6d3..3487d4f11 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -493,10 +493,17 @@ static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(con
}
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
- const int freq, const bool sameLength) {
+ const int freq, const bool sameLength) const {
// TODO: Demote by edit distance
int finalFreq = freq * matchWeight;
- if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
+ if (skipPos >= 0) {
+ if (mInputLength >= 3) {
+ multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
+ (mInputLength - 2) / (mInputLength - 1), &finalFreq);
+ } else {
+ finalFreq = 0;
+ }
+ }
if (transposedPos >= 0) multiplyRate(
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
if (excessivePos >= 0) {
@@ -550,7 +557,7 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
}
inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
- const int inputLength) {
+ const int inputLength) const {
if (inputIndex < 0 || inputIndex >= inputLength) return false;
const int currentChar = *getInputCharsAt(inputIndex);
const int leftIndex = inputIndex - 1;
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 7359481a8..ef820cba5 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -75,7 +75,8 @@ private:
const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
- const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
+ const int excessivePos, const int transposedPos, const int freq,
+ const bool sameLength) const;
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int inputIndex, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int excessivePos,
@@ -99,8 +100,8 @@ private:
bool processCurrentNodeForExactMatch(const int firstChildPos,
const int startInputIndex, const int depth, unsigned short *word,
int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
- bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
- inline const int* getInputCharsAt(const int index) {
+ bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
+ inline const int* getInputCharsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS);
}
const unsigned char *DICT;