diff options
author | 2011-10-03 19:21:13 +0900 | |
---|---|---|
committer | 2011-10-03 20:11:06 +0900 | |
commit | eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d (patch) | |
tree | d223a70e3dbff9ec722406b1b4c17d04e4f7a6f8 /native/src/correction.cpp | |
parent | 904baab25a4c6ec5d9c4bf7e562154e3f544d296 (diff) | |
download | latinime-eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d.tar.gz latinime-eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d.tar.xz latinime-eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d.zip |
Demote words with a capitalized char
Bug: 5371514
+1 4
-1 2
+2 0
-2 0
+3 0
-3 0
+4 1
-4 3
+5 0
-5 12
+6 3
-6 3
+7 12
-7 0
Change-Id: I6b46e43f9059f1e8a1cc02a626ea6eb8f1f9924f
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r-- | native/src/correction.cpp | 45 |
1 files changed, 40 insertions, 5 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 5128c2e5c..9e75ffc3e 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -15,6 +15,7 @@ */ #include <assert.h> +#include <ctype.h> #include <stdio.h> #include <string.h> @@ -89,8 +90,10 @@ void Correction::checkState() { } } -int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) { - return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); +int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq, + const unsigned short *word) { + return Correction::RankingAlgorithm::calcFreqForSplitTwoWords( + firstFreq, secondFreq, this, word); } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { @@ -498,6 +501,16 @@ inline static int getQuoteCount(const unsigned short* word, const int length) { return quoteCount; } +inline static bool isUpperCase(unsigned short c) { + if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { + c = BASE_CHARS[c]; + } + if (isupper(c)) { + return true; + } + return false; +} + /* static */ inline static int editDistance( int* editDistanceTable, const unsigned short* input, @@ -749,7 +762,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const /* static */ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( - const int firstFreq, const int secondFreq, const Correction* correction) { + const int firstFreq, const int secondFreq, const Correction* correction, + const unsigned short *word) { const int spaceProximityPos = correction->mSpaceProximityPos; const int missingSpacePos = correction->mMissingSpacePos; if (DEBUG_DICT) { @@ -761,11 +775,27 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const bool isSpaceProximity = spaceProximityPos >= 0; const int inputLength = correction->mInputLength; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - const int secondWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) + const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; + bool firstCapitalizedWordDemotion = false; + if (firstWordLength >= 2) { + firstCapitalizedWordDemotion = isUpperCase(word[0]); + } + + bool secondCapitalizedWordDemotion = false; + if (secondWordLength >= 2) { + secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); + } + + const bool capitalizedWordDemotion = + firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; + + if (DEBUG_DICT_FULL) { + LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion); + } + if (firstWordLength == 0 || secondWordLength == 0) { return 0; } @@ -815,6 +845,11 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( } multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); + + if (capitalizedWordDemotion) { + multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); + } + return totalFreq; } |