aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/correction.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r--native/src/correction.cpp85
1 files changed, 63 insertions, 22 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 9a7e5f35d..5128c2e5c 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -115,6 +115,9 @@ bool Correction::initProcessState(const int outputIndex) {
mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
+ mEquivalentCharStrongCount = mCorrectionStates[outputIndex].mEquivalentCharStrongCount;
+ mEquivalentCharNormalCount = mCorrectionStates[outputIndex].mEquivalentCharNormalCount;
+ mEquivalentCharWeakCount = mCorrectionStates[outputIndex].mEquivalentCharWeakCount;
mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount;
@@ -169,6 +172,9 @@ void Correction::incrementOutputIndex() {
mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
+ mCorrectionStates[mOutputIndex].mEquivalentCharStrongCount = mEquivalentCharStrongCount;
+ mCorrectionStates[mOutputIndex].mEquivalentCharNormalCount = mEquivalentCharNormalCount;
+ mCorrectionStates[mOutputIndex].mEquivalentCharWeakCount = mEquivalentCharWeakCount;
mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
@@ -210,6 +216,12 @@ Correction::CorrectionType Correction::processSkipChar(
}
}
+inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
+ // 'type ProximityInfo::EQUIVALENT_CHAR_WEAK' means that
+ // type == ..._WEAK or type == ..._NORMAL or type == ..._STRONG.
+ return type <= ProximityInfo::EQUIVALENT_CHAR_WEAK;
+}
+
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
@@ -221,8 +233,9 @@ Correction::CorrectionType Correction::processCharAndCalcState(
bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
// TODO: Do not check the proximity if EditDistance exceeds the threshold
- const int matchId = mProximityInfo->getMatchedProximityId(mInputIndex, c, true);
- if (matchId == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ const ProximityInfo::ProximityType matchId =
+ mProximityInfo->getMatchedProximityId(mInputIndex, c, true);
+ if (isEquivalentChar(matchId)) {
mLastCharExceeded = false;
--mExcessiveCount;
} else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
@@ -266,8 +279,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
bool secondTransposing = false;
if (mTransposedCount % 2 == 1) {
- if (mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
++mTransposedCount;
secondTransposing = true;
} else if (mCorrectionStates[mOutputIndex].mExceeding) {
@@ -288,8 +300,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// TODO: Change the limit if we'll allow two or more proximity chars with corrections
const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0;
- const int matchedProximityCharId = secondTransposing
- ? ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
+ const ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
+ ? ProximityInfo::EQUIVALENT_CHAR_NORMAL
: mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars);
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
@@ -299,19 +311,18 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// here refers to the previous state.
if (canTryCorrection && mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding
- && mProximityInfo->getMatchedProximityId(mInputIndex, mWord[mOutputIndex], false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ && isEquivalentChar(mProximityInfo->getMatchedProximityId(
+ mInputIndex, mWord[mOutputIndex], false))) {
// Conversion p->e
++mExcessiveCount;
--mProximityCount;
} else if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
- && mProximityInfo->getMatchedProximityId(
- mInputIndex, mWord[mOutputIndex - 1], false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR
- && mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ && isEquivalentChar(mProximityInfo->getMatchedProximityId(
+ mInputIndex, mWord[mOutputIndex - 1], false))
+ && isEquivalentChar(
+ mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion t->e
// Example:
// occaisional -> occa sional
@@ -322,8 +333,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
- && mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ && isEquivalentChar(
+ mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion t->s
// Example:
// chcolate -> chocolate
@@ -334,8 +345,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else if (canTryCorrection && mInputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping
- && mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ && isEquivalentChar(
+ mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion p->s
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char.
@@ -343,8 +354,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
--mProximityCount;
return processSkipChar(c, isTerminal, false);
} else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
- && mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false)
- == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+ && isEquivalentChar(
+ mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
// 1.2. Excessive or transpose correction
if (mTransposing) {
++mTransposedCount;
@@ -364,14 +375,28 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
return UNRELATED;
}
- } else if (secondTransposing
- || ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+ } else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity.
mMatching = true;
+ } else if (isEquivalentChar(matchedProximityCharId)) {
+ mMatching = true;
+ switch (matchedProximityCharId) {
+ case ProximityInfo::EQUIVALENT_CHAR_STRONG:
+ ++mEquivalentCharStrongCount;
+ break;
+ case ProximityInfo::EQUIVALENT_CHAR_NORMAL:
+ ++mEquivalentCharNormalCount;
+ break;
+ case ProximityInfo::EQUIVALENT_CHAR_WEAK:
+ ++mEquivalentCharWeakCount;
+ break;
+ default:
+ assert(false);
+ }
} else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
mProximityMatching = true;
- incrementProximityCount();
+ ++mProximityCount;
}
mWord[mOutputIndex] = c;
@@ -530,6 +555,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
const int transposedCount = correction->mTransposedCount / 2;
const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
const int proximityMatchedCount = correction->mProximityCount;
+ const int equivalentCharStrongCount = correction->mEquivalentCharStrongCount;
+ const int equivalentCharWeakCount = correction->mEquivalentCharWeakCount;
const bool lastCharExceeded = correction->mLastCharExceeded;
const bool useFullEditDistance = correction->mUseFullEditDistance;
const int outputLength = outputIndex + 1;
@@ -639,6 +666,20 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
}
+ for (int i = 0; i < equivalentCharStrongCount; ++i) {
+ if (DEBUG_DICT_FULL) {
+ LOGI("equivalent char strong");
+ }
+ multiplyRate(WORDS_WITH_EQUIVALENT_CHAR_STRONG_PROMOTION_RATE, &finalFreq);
+ }
+
+ for (int i = 0; i < equivalentCharWeakCount; ++i) {
+ if (DEBUG_DICT_FULL) {
+ LOGI("equivalent char weak");
+ }
+ multiplyRate(WORDS_WITH_EQUIVALENT_CHAR_WEAK_DEMOTION_RATE, &finalFreq);
+ }
+
const int errorCount = adjustedProximityMatchedCount > 0
? adjustedProximityMatchedCount
: (proximityMatchedCount + transposedCount);