aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/correction.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r--native/src/correction.cpp191
1 files changed, 143 insertions, 48 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index dc31bfae7..6a129d4e3 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -42,7 +42,7 @@ inline static void initEditDistance(int *editDistanceTable) {
inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
const int outputLength) {
if (DEBUG_DICT) {
- LOGI("EditDistanceTable");
+ AKLOGI("EditDistanceTable");
for (int i = 0; i <= 10; ++i) {
int c[11];
for (int j = 0; j <= 10; ++j) {
@@ -52,7 +52,7 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int
c[j] = -1;
}
}
- LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+ AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
@@ -83,8 +83,8 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
- if (DEBUG_DICT) {
- LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
}
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
}
@@ -168,8 +168,8 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
- return -1;
+ if (outputIndex < MIN_SUGGEST_DEPTH) {
+ return NOT_A_FREQUENCY;
}
*word = mWord;
@@ -215,20 +215,10 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getOutputIndex() {
- return mOutputIndex;
-}
-
-// TODO: remove
int Correction::getInputIndex() {
return mInputIndex;
}
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
- return mNeedsToTraverseAllNodes;
-}
-
void Correction::incrementInputIndex() {
++mInputIndex;
}
@@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
mWord, mOutputIndex + 1);
}
-// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c);
- if (needsToTraverseAllNodes() && isTerminal) {
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ if (mNeedsToTraverseAllNodes && isTerminal) {
incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL;
} else {
@@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
}
}
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR;
}
@@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) {
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
// TODO: Change the limit if we'll allow two or more corrections
@@ -378,10 +374,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
--mTransposedCount;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
}
@@ -404,7 +400,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& isEquivalentChar(mProximityInfo->getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION) {
- LOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
+ AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
}
// Conversion p->e
// Example:
@@ -481,10 +477,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else {
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no
@@ -534,11 +530,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mTerminalOutputIndex = mOutputIndex - 1;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return ON_TERMINAL;
} else {
+ mTerminalInputIndex = mInputIndex - 1;
+ mTerminalOutputIndex = mOutputIndex - 1;
return NOT_ON_TERMINAL;
}
}
@@ -655,9 +653,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
int finalFreq = freq;
// TODO: Optimize this.
- // TODO: Ignoring edit distance for transposed char, for now
- if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
- ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1);
+ if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
+ ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1)
+ - transposedCount;
+
const int matchWeight = powerIntCapped(typedLetterMultiplier,
max(inputLength, outputIndex + 1) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
@@ -669,21 +668,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
ed = max(0, ed - quoteDiffCount);
- if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
- // Promote a word with just one skipped or excessive char
- if (sameLength) {
- multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
- } else {
+ if (transposedCount < 1) {
+ if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
+ // Promote a word with just one skipped or excessive char
+ if (sameLength) {
+ multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
+ } else {
+ multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ }
+ } else if (ed == 0) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ sameLength = true;
}
- } else if (ed == 0) {
- multiplyIntCapped(typedLetterMultiplier, &finalFreq);
- sameLength = true;
}
adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
proximityMatchedCount);
} else {
- // TODO: Calculate the edit distance for transposed char
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq);
}
@@ -703,7 +703,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
/ (10 * inputLength
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
if (DEBUG_DICT_FULL) {
- LOGI("Demotion rate for missing character is %d.", demotionRate);
+ AKLOGI("Demotion rate for missing character is %d.", demotionRate);
}
multiplyRate(demotionRate, &finalFreq);
}
@@ -717,7 +717,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_CORRECTION_FREQ) {
- LOGI("Double excessive demotion");
+ AKLOGI("Double excessive demotion");
}
// If an excessive character is not adjacent to the left char or the right char,
// we will demote this word.
@@ -767,7 +767,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
// A word with proximity corrections
if (DEBUG_DICT_FULL) {
- LOGI("Found a proximity correction.");
+ AKLOGI("Found a proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
@@ -828,12 +828,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
}
if (DEBUG_DICT_FULL) {
- LOGI("calc: %d, %d", outputIndex, sameLength);
+ AKLOGI("calc: %d, %d", outputIndex, sameLength);
}
if (DEBUG_CORRECTION_FREQ) {
DUMP_WORD(correction->mWord, outputIndex + 1);
- LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
+ AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
quoteDiffCount, ed, finalFreq);
}
@@ -874,7 +874,102 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
if (DEBUG_DICT_FULL) {
- LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ }
+
+ if (firstWordLength == 0 || secondWordLength == 0) {
+ return 0;
+ }
+ const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
+ int tempFirstFreq = firstFreq;
+ multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+ const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
+ int tempSecondFreq = secondFreq;
+ multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+ const int totalLength = firstWordLength + secondWordLength;
+
+ // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+ // length.
+ int totalFreq = tempFirstFreq + tempSecondFreq;
+
+ // This is a workaround to try offsetting the not-enough-demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+ // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+ // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+ const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+ multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+ // At this moment, totalFreq is calculated by the following formula:
+ // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+ // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+ multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+ // This is another workaround to offset the demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+ // the same amount because we already have adjusted the synthetic freq of this "missing or
+ // mistyped space" suggestion candidate above in this method.
+ const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+ multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+ if (isSpaceProximity) {
+ // A word pair with one space proximity correction
+ if (DEBUG_DICT) {
+ AKLOGI("Found a word pair with space proximity correction.");
+ }
+ multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
+
+ multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+ if (capitalizedWordDemotion) {
+ multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+ }
+
+ return totalFreq;
+}
+
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld(
+ const int firstFreq, const int secondFreq, const Correction* correction,
+ const unsigned short *word) {
+ const int spaceProximityPos = correction->mSpaceProximityPos;
+ const int missingSpacePos = correction->mMissingSpacePos;
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int inputLength = correction->mInputLength;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+ bool firstCapitalizedWordDemotion = false;
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ bool secondCapitalizedWordDemotion = false;
+ if (secondWordLength >= 2) {
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
+
+ const bool capitalizedWordDemotion =
+ firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+ if (DEBUG_DICT_FULL) {
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
}
if (firstWordLength == 0 || secondWordLength == 0) {
@@ -919,7 +1014,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
if (isSpaceProximity) {
// A word pair with one space proximity correction
if (DEBUG_DICT) {
- LOGI("Found a word pair with space proximity correction.");
+ AKLOGI("Found a word pair with space proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &totalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
@@ -965,10 +1060,10 @@ inline static int editDistanceInternal(
}
if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
for (int i = 0; i < li; ++i) {
for (int j = 0; j < lo; ++j) {
- LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+ AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
}
}
}