aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsatok <satok@google.com>2011-05-18 15:31:04 +0900
committersatok <satok@google.com>2011-05-18 18:36:54 +0900
commitd8db9f86d02c9d4adad6047a9daac0742d756400 (patch)
treee9663d80e2d426577021abcf90da16e96c0fd1c9
parentd2d21ce0beeadfbcc477fcae32694846cb89dfe1 (diff)
downloadlatinime-d8db9f86d02c9d4adad6047a9daac0742d756400.tar.gz
latinime-d8db9f86d02c9d4adad6047a9daac0742d756400.tar.xz
latinime-d8db9f86d02c9d4adad6047a9daac0742d756400.zip
Fix a bug on the calculation of the freq on the mistyped space error correction
Bug: 4402942 Change-Id: I0b611e3d0e8c25ca528ef7408c3949200e5cad85
-rw-r--r--native/src/unigram_dictionary.cpp23
-rw-r--r--native/src/unigram_dictionary.h2
2 files changed, 17 insertions, 8 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 20a185219..0930b8e4c 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -451,8 +451,8 @@ inline static void multiplyRate(const int rate, int *freq) {
}
inline static int calcFreqForSplitTwoWords(
- const int typedLetterMultiplier, const int firstWordLength,
- const int secondWordLength, const int firstFreq, const int secondFreq) {
+ const int typedLetterMultiplier, const int firstWordLength, const int secondWordLength,
+ const int firstFreq, const int secondFreq, const bool isSpaceProximity) {
if (firstWordLength == 0 || secondWordLength == 0) {
return 0;
}
@@ -492,13 +492,22 @@ inline static int calcFreqForSplitTwoWords(
const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+ if (isSpaceProximity) {
+ // A word pair with one space proximity correction
+ if (DEBUG_DICT) {
+ LOGI("Found a word pair with space proximity correction.");
+ }
+ multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
+
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
return totalFreq;
}
bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
- const int secondWordLength) {
+ const int secondWordLength, const bool isSpaceProximity) {
if (inputLength >= MAX_WORD_LENGTH) return false;
if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
@@ -527,8 +536,8 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
word[i] = mWord[i - firstWordLength - 1];
}
- int pairFreq = calcFreqForSplitTwoWords(
- TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq);
+ int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
+ secondWordLength, firstFreq, secondFreq, isSpaceProximity);
if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
TYPED_LETTER_MULTIPLIER);
@@ -539,13 +548,13 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
return getSplitTwoWordsSuggestion(
- inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos);
+ inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos, false);
}
bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) {
return getSplitTwoWordsSuggestion(
inputLength, 0, spaceProximityPos, spaceProximityPos + 1,
- inputLength - spaceProximityPos - 1);
+ inputLength - spaceProximityPos - 1, true);
}
// Keep this for comparing spec to new getWords
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 3d3007ce0..dd1b89042 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -66,7 +66,7 @@ private:
const int nextLettersSize);
bool getSplitTwoWordsSuggestion(const int inputLength,
const int firstWordStartPos, const int firstWordLength,
- const int secondWordStartPos, const int secondWordLength);
+ const int secondWordStartPos, const int secondWordLength, const bool isSpaceProximity);
bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos);
// Keep getWordsOld for comparing performance between getWords and getWordsOld