diff options
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r-- | native/src/correction.cpp | 96 |
1 files changed, 95 insertions, 1 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 5dc6f8737..6a129d4e3 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -83,7 +83,7 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne inline static int getCurrentEditDistance( int *editDistanceTable, const int inputLength, const int outputLength) { - if (DEBUG_DICT) { + if (DEBUG_EDIT_DISTANCE) { AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); } return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; @@ -935,6 +935,100 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( return totalFreq; } +/* static */ +int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld( + const int firstFreq, const int secondFreq, const Correction* correction, + const unsigned short *word) { + const int spaceProximityPos = correction->mSpaceProximityPos; + const int missingSpacePos = correction->mMissingSpacePos; + if (DEBUG_DICT) { + int inputCount = 0; + if (spaceProximityPos >= 0) ++inputCount; + if (missingSpacePos >= 0) ++inputCount; + assert(inputCount <= 1); + } + const bool isSpaceProximity = spaceProximityPos >= 0; + const int inputLength = correction->mInputLength; + const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; + const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) + : (inputLength - missingSpacePos); + const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; + + bool firstCapitalizedWordDemotion = false; + if (firstWordLength >= 2) { + firstCapitalizedWordDemotion = isUpperCase(word[0]); + } + + bool secondCapitalizedWordDemotion = false; + if (secondWordLength >= 2) { + secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); + } + + const bool capitalizedWordDemotion = + firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; + + if (DEBUG_DICT_FULL) { + AKLOGI("Two words: %c, %c, %d", + word[0], word[firstWordLength + 1], capitalizedWordDemotion); + } + + if (firstWordLength == 0 || secondWordLength == 0) { + return 0; + } + const int firstDemotionRate = 100 - 100 / (firstWordLength + 1); + int tempFirstFreq = firstFreq; + multiplyRate(firstDemotionRate, &tempFirstFreq); + + const int secondDemotionRate = 100 - 100 / (secondWordLength + 1); + int tempSecondFreq = secondFreq; + multiplyRate(secondDemotionRate, &tempSecondFreq); + + const int totalLength = firstWordLength + secondWordLength; + + // Promote pairFreq with multiplying by 2, because the word length is the same as the typed + // length. + int totalFreq = tempFirstFreq + tempSecondFreq; + + // This is a workaround to try offsetting the not-enough-demotion which will be done in + // calcNormalizedScore in Utils.java. + // In calcNormalizedScore the score will be demoted by (1 - 1 / length) + // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by + // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length)) + const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength); + multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq); + + // At this moment, totalFreq is calculated by the following formula: + // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1))) + // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1)) + + multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq); + + // This is another workaround to offset the demotion which will be done in + // calcNormalizedScore in Utils.java. + // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote + // the same amount because we already have adjusted the synthetic freq of this "missing or + // mistyped space" suggestion candidate above in this method. + const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength); + multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq); + + if (isSpaceProximity) { + // A word pair with one space proximity correction + if (DEBUG_DICT) { + AKLOGI("Found a word pair with space proximity correction."); + } + multiplyIntCapped(typedLetterMultiplier, &totalFreq); + multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); + } + + multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); + + if (capitalizedWordDemotion) { + multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); + } + + return totalFreq; +} + /* Damerau-Levenshtein distance */ inline static int editDistanceInternal( int* editDistanceTable, const unsigned short* before, |