diff options
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r-- | native/src/correction.cpp | 85 |
1 files changed, 63 insertions, 22 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 9a7e5f35d..5128c2e5c 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -115,6 +115,9 @@ bool Correction::initProcessState(const int outputIndex) { mInputIndex = mCorrectionStates[outputIndex].mInputIndex; mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes; + mEquivalentCharStrongCount = mCorrectionStates[outputIndex].mEquivalentCharStrongCount; + mEquivalentCharNormalCount = mCorrectionStates[outputIndex].mEquivalentCharNormalCount; + mEquivalentCharWeakCount = mCorrectionStates[outputIndex].mEquivalentCharWeakCount; mProximityCount = mCorrectionStates[outputIndex].mProximityCount; mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount; mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount; @@ -169,6 +172,9 @@ void Correction::incrementOutputIndex() { mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; + mCorrectionStates[mOutputIndex].mEquivalentCharStrongCount = mEquivalentCharStrongCount; + mCorrectionStates[mOutputIndex].mEquivalentCharNormalCount = mEquivalentCharNormalCount; + mCorrectionStates[mOutputIndex].mEquivalentCharWeakCount = mEquivalentCharWeakCount; mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount; mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount; mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount; @@ -210,6 +216,12 @@ Correction::CorrectionType Correction::processSkipChar( } } +inline bool isEquivalentChar(ProximityInfo::ProximityType type) { + // 'type ProximityInfo::EQUIVALENT_CHAR_WEAK' means that + // type == ..._WEAK or type == ..._NORMAL or type == ..._STRONG. + return type <= ProximityInfo::EQUIVALENT_CHAR_WEAK; +} + Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); @@ -221,8 +233,9 @@ Correction::CorrectionType Correction::processCharAndCalcState( bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { // TODO: Do not check the proximity if EditDistance exceeds the threshold - const int matchId = mProximityInfo->getMatchedProximityId(mInputIndex, c, true); - if (matchId == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + const ProximityInfo::ProximityType matchId = + mProximityInfo->getMatchedProximityId(mInputIndex, c, true); + if (isEquivalentChar(matchId)) { mLastCharExceeded = false; --mExcessiveCount; } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) { @@ -266,8 +279,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( bool secondTransposing = false; if (mTransposedCount % 2 == 1) { - if (mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { ++mTransposedCount; secondTransposing = true; } else if (mCorrectionStates[mOutputIndex].mExceeding) { @@ -288,8 +300,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( // TODO: Change the limit if we'll allow two or more proximity chars with corrections const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0; - const int matchedProximityCharId = secondTransposing - ? ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR + const ProximityInfo::ProximityType matchedProximityCharId = secondTransposing + ? ProximityInfo::EQUIVALENT_CHAR_NORMAL : mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars); if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) { @@ -299,19 +311,18 @@ Correction::CorrectionType Correction::processCharAndCalcState( // here refers to the previous state. if (canTryCorrection && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mExceeding - && mProximityInfo->getMatchedProximityId(mInputIndex, mWord[mOutputIndex], false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + && isEquivalentChar(mProximityInfo->getMatchedProximityId( + mInputIndex, mWord[mOutputIndex], false))) { // Conversion p->e ++mExcessiveCount; --mProximityCount; } else if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0 && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing - && mProximityInfo->getMatchedProximityId( - mInputIndex, mWord[mOutputIndex - 1], false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR - && mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + && isEquivalentChar(mProximityInfo->getMatchedProximityId( + mInputIndex, mWord[mOutputIndex - 1], false)) + && isEquivalentChar( + mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { // Conversion t->e // Example: // occaisional -> occa sional @@ -322,8 +333,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0 && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing - && mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + && isEquivalentChar( + mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { // Conversion t->s // Example: // chcolate -> chocolate @@ -334,8 +345,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( } else if (canTryCorrection && mInputIndex > 0 && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mSkipping - && mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + && isEquivalentChar( + mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { // Conversion p->s // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of // proximity chars of "s", but it should rather be handled as a skipped char. @@ -343,8 +354,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( --mProximityCount; return processSkipChar(c, isTerminal, false); } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength - && mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false) - == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) { + && isEquivalentChar( + mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { // 1.2. Excessive or transpose correction if (mTransposing) { ++mTransposedCount; @@ -364,14 +375,28 @@ Correction::CorrectionType Correction::processCharAndCalcState( } return UNRELATED; } - } else if (secondTransposing - || ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { + } else if (secondTransposing) { // If inputIndex is greater than mInputLength, that means there is no // proximity chars. So, we don't need to check proximity. mMatching = true; + } else if (isEquivalentChar(matchedProximityCharId)) { + mMatching = true; + switch (matchedProximityCharId) { + case ProximityInfo::EQUIVALENT_CHAR_STRONG: + ++mEquivalentCharStrongCount; + break; + case ProximityInfo::EQUIVALENT_CHAR_NORMAL: + ++mEquivalentCharNormalCount; + break; + case ProximityInfo::EQUIVALENT_CHAR_WEAK: + ++mEquivalentCharWeakCount; + break; + default: + assert(false); + } } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) { mProximityMatching = true; - incrementProximityCount(); + ++mProximityCount; } mWord[mOutputIndex] = c; @@ -530,6 +555,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const const int transposedCount = correction->mTransposedCount / 2; const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; const int proximityMatchedCount = correction->mProximityCount; + const int equivalentCharStrongCount = correction->mEquivalentCharStrongCount; + const int equivalentCharWeakCount = correction->mEquivalentCharWeakCount; const bool lastCharExceeded = correction->mLastCharExceeded; const bool useFullEditDistance = correction->mUseFullEditDistance; const int outputLength = outputIndex + 1; @@ -639,6 +666,20 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); } + for (int i = 0; i < equivalentCharStrongCount; ++i) { + if (DEBUG_DICT_FULL) { + LOGI("equivalent char strong"); + } + multiplyRate(WORDS_WITH_EQUIVALENT_CHAR_STRONG_PROMOTION_RATE, &finalFreq); + } + + for (int i = 0; i < equivalentCharWeakCount; ++i) { + if (DEBUG_DICT_FULL) { + LOGI("equivalent char weak"); + } + multiplyRate(WORDS_WITH_EQUIVALENT_CHAR_WEAK_DEMOTION_RATE, &finalFreq); + } + const int errorCount = adjustedProximityMatchedCount > 0 ? adjustedProximityMatchedCount : (proximityMatchedCount + transposedCount); |