aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/correction.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r--native/src/correction.cpp122
1 files changed, 109 insertions, 13 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 75831b69d..6a129d4e3 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -83,7 +83,7 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
- if (DEBUG_DICT) {
+ if (DEBUG_EDIT_DISTANCE) {
AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
}
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
@@ -653,9 +653,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
int finalFreq = freq;
// TODO: Optimize this.
- // TODO: Ignoring edit distance for transposed char, for now
- if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
- ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1);
+ if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
+ ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1)
+ - transposedCount;
+
const int matchWeight = powerIntCapped(typedLetterMultiplier,
max(inputLength, outputIndex + 1) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
@@ -667,21 +668,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
ed = max(0, ed - quoteDiffCount);
- if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
- // Promote a word with just one skipped or excessive char
- if (sameLength) {
- multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
- } else {
+ if (transposedCount < 1) {
+ if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
+ // Promote a word with just one skipped or excessive char
+ if (sameLength) {
+ multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
+ } else {
+ multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ }
+ } else if (ed == 0) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ sameLength = true;
}
- } else if (ed == 0) {
- multiplyIntCapped(typedLetterMultiplier, &finalFreq);
- sameLength = true;
}
adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
proximityMatchedCount);
} else {
- // TODO: Calculate the edit distance for transposed char
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq);
}
@@ -933,6 +935,100 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
return totalFreq;
}
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld(
+ const int firstFreq, const int secondFreq, const Correction* correction,
+ const unsigned short *word) {
+ const int spaceProximityPos = correction->mSpaceProximityPos;
+ const int missingSpacePos = correction->mMissingSpacePos;
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int inputLength = correction->mInputLength;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+ bool firstCapitalizedWordDemotion = false;
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ bool secondCapitalizedWordDemotion = false;
+ if (secondWordLength >= 2) {
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
+
+ const bool capitalizedWordDemotion =
+ firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+ if (DEBUG_DICT_FULL) {
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ }
+
+ if (firstWordLength == 0 || secondWordLength == 0) {
+ return 0;
+ }
+ const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
+ int tempFirstFreq = firstFreq;
+ multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+ const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
+ int tempSecondFreq = secondFreq;
+ multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+ const int totalLength = firstWordLength + secondWordLength;
+
+ // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+ // length.
+ int totalFreq = tempFirstFreq + tempSecondFreq;
+
+ // This is a workaround to try offsetting the not-enough-demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+ // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+ // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+ const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+ multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+ // At this moment, totalFreq is calculated by the following formula:
+ // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+ // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+ multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+ // This is another workaround to offset the demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+ // the same amount because we already have adjusted the synthetic freq of this "missing or
+ // mistyped space" suggestion candidate above in this method.
+ const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+ multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+ if (isSpaceProximity) {
+ // A word pair with one space proximity correction
+ if (DEBUG_DICT) {
+ AKLOGI("Found a word pair with space proximity correction.");
+ }
+ multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
+
+ multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+ if (capitalizedWordDemotion) {
+ multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+ }
+
+ return totalFreq;
+}
+
/* Damerau-Levenshtein distance */
inline static int editDistanceInternal(
int* editDistanceTable, const unsigned short* before,