diff options
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r-- | native/src/correction.cpp | 60 |
1 files changed, 49 insertions, 11 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 8275c5d7e..5f11452ae 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -24,6 +24,7 @@ #include "char_utils.h" #include "correction.h" +#include "defines.h" #include "dictionary.h" #include "proximity_info.h" @@ -210,6 +211,7 @@ bool Correction::initProcessState(const int outputIndex) { mMatching = false; mProximityMatching = false; + mAdditionalProximityMatching = false; mTransposing = false; mExceeding = false; mSkipping = false; @@ -256,6 +258,7 @@ void Correction::incrementOutputIndex() { mCorrectionStates[mOutputIndex].mMatching = mMatching; mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; + mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; mCorrectionStates[mOutputIndex].mTransposing = mTransposing; mCorrectionStates[mOutputIndex].mExceeding = mExceeding; mCorrectionStates[mOutputIndex].mSkipping = mSkipping; @@ -304,6 +307,11 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) { return type == ProximityInfo::EQUIVALENT_CHAR; } +inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) { + return type == ProximityInfo::EQUIVALENT_CHAR + || type == ProximityInfo::NEAR_PROXIMITY_CHAR; +} + Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); @@ -438,6 +446,9 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + mAdditionalProximityMatching = true; + } // TODO: Optimize // As the current char turned out to be an unrelated char, // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] @@ -479,6 +490,18 @@ Correction::CorrectionType Correction::processCharAndCalcState( ++mSkippedCount; --mProximityCount; return processSkipChar(c, isTerminal, false); + } else if (mInputIndex - 1 < mInputLength + && mSkippedCount > 0 + && mCorrectionStates[mOutputIndex].mSkipping + && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching + && isProximityCharOrEquivalentChar( + mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + // Conversion s->a + incrementInputIndex(); + --mSkippedCount; + mProximityMatching = true; + ++mProximityCount; + mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength && isEquivalentChar( mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { @@ -618,7 +641,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) { } inline static bool isUpperCase(unsigned short c) { - return isAsciiUpper(toBaseChar(c)); + return isAsciiUpper(toBaseChar(c)); } ////////////////////// @@ -666,6 +689,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int finalFreq = freq; + if (DEBUG_CORRECTION_FREQ + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + AKLOGI("FinalFreq0: %d", finalFreq); + } // TODO: Optimize this. if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, @@ -681,12 +708,15 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } ed = max(0, ed - quoteDiffCount); - + adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), + proximityMatchedCount); if (transposedCount < 1) { if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { // Promote a word with just one skipped or excessive char if (sameLength) { - multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); + multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE + + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength, + &finalFreq); } else { multiplyIntCapped(typedLetterMultiplier, &finalFreq); } @@ -695,8 +725,6 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const sameLength = true; } } - adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), - proximityMatchedCount); } else { const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); multiplyIntCapped(matchWeight, &finalFreq); @@ -744,6 +772,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; // Score calibration by touch coordinates is being done only for pure-fat finger typing error // cases. + int additionalProximityCount = 0; // TODO: Remove this constraint. if (performTouchPositionCorrection) { for (int i = 0; i < outputLength; ++i) { @@ -776,12 +805,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { + ++additionalProximityCount; multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); } } } else { // Demote additional proximity characters - int additionalProximityCount = 0; for (int i = 0; i < outputLength; ++i) { const int squaredDistance = correction->mDistances[i]; if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { @@ -803,6 +832,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } } + // If the user types too many(three or more) proximity characters with additional proximity + // character,do not treat as the same length word. + if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3 + || transposedCount > 0 || skipped || excessiveCount > 0)) { + sameLength = false; + } + const int errorCount = adjustedProximityMatchedCount > 0 ? adjustedProximityMatchedCount : (proximityMatchedCount + transposedCount); @@ -813,13 +849,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const if (ed == 0) { // Full exact match if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 - && quoteDiffCount == 0) { + && quoteDiffCount == 0 && additionalProximityCount == 0) { finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); } } // Promote a word with no correction - if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) { + if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0 + && additionalProximityCount == 0) { multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); } @@ -863,10 +900,11 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const if (DEBUG_CORRECTION_FREQ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength); DUMP_WORD(correction->mWord, outputLength); - AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, - skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded, - sameLength, quoteDiffCount, ed, finalFreq); + AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, + skippedCount, transposedCount, excessiveCount, additionalProximityCount, + outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq); } return finalFreq; |