aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/correction.cpp
diff options
context:
space:
mode:
authorsatok <satok@google.com>2011-10-03 19:21:13 +0900
committersatok <satok@google.com>2011-10-03 20:11:06 +0900
commiteb050fc2dc97a7e6ddcaf254c110dc16279dfd0d (patch)
treed223a70e3dbff9ec722406b1b4c17d04e4f7a6f8 /native/src/correction.cpp
parent904baab25a4c6ec5d9c4bf7e562154e3f544d296 (diff)
downloadlatinime-eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d.tar.gz
latinime-eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d.tar.xz
latinime-eb050fc2dc97a7e6ddcaf254c110dc16279dfd0d.zip
Demote words with a capitalized char
Bug: 5371514 +1 4 -1 2 +2 0 -2 0 +3 0 -3 0 +4 1 -4 3 +5 0 -5 12 +6 3 -6 3 +7 12 -7 0 Change-Id: I6b46e43f9059f1e8a1cc02a626ea6eb8f1f9924f
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r--native/src/correction.cpp45
1 files changed, 40 insertions, 5 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 5128c2e5c..9e75ffc3e 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -15,6 +15,7 @@
*/
#include <assert.h>
+#include <ctype.h>
#include <stdio.h>
#include <string.h>
@@ -89,8 +90,10 @@ void Correction::checkState() {
}
}
-int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
- return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
+int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
+ const unsigned short *word) {
+ return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
+ firstFreq, secondFreq, this, word);
}
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
@@ -498,6 +501,16 @@ inline static int getQuoteCount(const unsigned short* word, const int length) {
return quoteCount;
}
+inline static bool isUpperCase(unsigned short c) {
+ if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
+ c = BASE_CHARS[c];
+ }
+ if (isupper(c)) {
+ return true;
+ }
+ return false;
+}
+
/* static */
inline static int editDistance(
int* editDistanceTable, const unsigned short* input,
@@ -749,7 +762,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
/* static */
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
- const int firstFreq, const int secondFreq, const Correction* correction) {
+ const int firstFreq, const int secondFreq, const Correction* correction,
+ const unsigned short *word) {
const int spaceProximityPos = correction->mSpaceProximityPos;
const int missingSpacePos = correction->mMissingSpacePos;
if (DEBUG_DICT) {
@@ -761,11 +775,27 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
const bool isSpaceProximity = spaceProximityPos >= 0;
const int inputLength = correction->mInputLength;
const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
- const int secondWordLength = isSpaceProximity
- ? (inputLength - spaceProximityPos - 1)
+ const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
: (inputLength - missingSpacePos);
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+ bool firstCapitalizedWordDemotion = false;
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ bool secondCapitalizedWordDemotion = false;
+ if (secondWordLength >= 2) {
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
+
+ const bool capitalizedWordDemotion =
+ firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+ if (DEBUG_DICT_FULL) {
+ LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ }
+
if (firstWordLength == 0 || secondWordLength == 0) {
return 0;
}
@@ -815,6 +845,11 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
}
multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+ if (capitalizedWordDemotion) {
+ multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+ }
+
return totalFreq;
}