diff options
Diffstat (limited to 'native/jni/src/correction.cpp')
-rw-r--r-- | native/jni/src/correction.cpp | 165 |
1 files changed, 79 insertions, 86 deletions
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 99f5b92c1..71f8a4fc8 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -14,22 +14,22 @@ * limitations under the License. */ -#include <assert.h> -#include <ctype.h> -#include <math.h> -#include <stdio.h> -#include <string.h> +#include <cassert> +#include <cctype> +#include <cmath> +#include <cstring> #define LOG_TAG "LatinIME: correction.cpp" #include "char_utils.h" #include "correction.h" #include "defines.h" -#include "dictionary.h" -#include "proximity_info.h" +#include "proximity_info_state.h" namespace latinime { +class ProximityInfo; + ///////////////////////////// // edit distance funcitons // ///////////////////////////// @@ -94,22 +94,17 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD ////////////////////// // inline functions // ////////////////////// -static const char QUOTE = '\''; +static const char SINGLE_QUOTE = '\''; -inline bool Correction::isQuote(const unsigned short c) { - const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex); - return (c == QUOTE && userTypedChar != QUOTE); +inline bool Correction::isSingleQuote(const unsigned short c) { + const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); + return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE); } //////////////// // Correction // //////////////// -Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier) - : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) { - initEditDistance(mEditDistanceTable); -} - void Correction::resetCorrection() { mTotalTraverseCount = 0; } @@ -159,11 +154,13 @@ void Correction::checkState() { if (mSkipPos >= 0) ++inputCount; if (mExcessivePos >= 0) ++inputCount; if (mTransposedPos >= 0) ++inputCount; - // TODO: remove this assert - assert(inputCount <= 1); } } +bool Correction::sameAsTyped() { + return mProximityInfoState.sameAsTyped(mWord, mOutputIndex); +} + int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, @@ -184,10 +181,6 @@ int Correction::getFinalProbabilityInternal(const int probability, unsigned shor const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; - if (outputIndex < MIN_SUGGEST_DEPTH) { - return NOT_A_PROBABILITY; - } - *word = mWord; int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength); @@ -233,7 +226,7 @@ int Correction::goDownTree( } // TODO: remove -int Correction::getInputIndex() { +int Correction::getInputIndex() const { return mInputIndex; } @@ -282,7 +275,7 @@ bool Correction::needsToPrune() const { void Correction::addCharToCurrentWord(const int32_t c) { mWord[mOutputIndex] = c; - const unsigned short *primaryInputWord = mProximityInfo->getPrimaryInputWord(); + const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength, mWord, mOutputIndex + 1); } @@ -308,13 +301,12 @@ Correction::CorrectionType Correction::processUnrelatedCorrectionType() { return UNRELATED; } -inline bool isEquivalentChar(ProximityInfo::ProximityType type) { - return type == ProximityInfo::EQUIVALENT_CHAR; +inline bool isEquivalentChar(ProximityType type) { + return type == EQUIVALENT_CHAR; } -inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) { - return type == ProximityInfo::EQUIVALENT_CHAR - || type == ProximityInfo::NEAR_PROXIMITY_CHAR; +inline bool isProximityCharOrEquivalentChar(ProximityType type) { + return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; } Correction::CorrectionType Correction::processCharAndCalcState( @@ -331,25 +323,25 @@ Correction::CorrectionType Correction::processCharAndCalcState( mDistances[mOutputIndex] = NOT_A_DISTANCE; // Skip checking this node - if (mNeedsToTraverseAllNodes || isQuote(c)) { + if (mNeedsToTraverseAllNodes || isSingleQuote(c)) { bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { // TODO: Do not check the proximity if EditDistance exceeds the threshold - const ProximityInfo::ProximityType matchId = - mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex); + const ProximityType matchId = mProximityInfoState.getMatchedProximityId( + mInputIndex, c, true, &proximityIndex); if (isEquivalentChar(matchId)) { mLastCharExceeded = false; --mExcessiveCount; mDistances[mOutputIndex] = - mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0); - } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) { + mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); + } else if (matchId == NEAR_PROXIMITY_CHAR) { mLastCharExceeded = false; --mExcessiveCount; ++mProximityCount; - mDistances[mOutputIndex] = - mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); + mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance( + mInputIndex, proximityIndex); } - if (!isQuote(c)) { + if (!isSingleQuote(c)) { incrementInputIndex(); incremented = true; } @@ -388,7 +380,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( bool secondTransposing = false; if (mTransposedCount % 2 == 1) { - if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { + if (isEquivalentChar(mProximityInfoState.getMatchedProximityId( + mInputIndex - 1, c, false))) { ++mTransposedCount; secondTransposing = true; } else if (mCorrectionStates[mOutputIndex].mExceeding) { @@ -417,17 +410,17 @@ Correction::CorrectionType Correction::processCharAndCalcState( ? (noCorrectionsHappenedSoFar || mProximityCount == 0) : (noCorrectionsHappenedSoFar && mProximityCount == 0); - ProximityInfo::ProximityType matchedProximityCharId = secondTransposing - ? ProximityInfo::EQUIVALENT_CHAR - : mProximityInfo->getMatchedProximityId( + ProximityType matchedProximityCharId = secondTransposing + ? EQUIVALENT_CHAR + : mProximityInfoState.getMatchedProximityId( mInputIndex, c, checkProximityChars, &proximityIndex); - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId - || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (UNRELATED_CHAR == matchedProximityCharId + || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { if (canTryCorrection && mOutputIndex > 0 && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mExceeding - && isEquivalentChar(mProximityInfo->getMatchedProximityId( + && isEquivalentChar(mProximityInfoState.getMatchedProximityId( mInputIndex, mWord[mOutputIndex - 1], false))) { if (DEBUG_CORRECTION && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) @@ -446,14 +439,14 @@ Correction::CorrectionType Correction::processCharAndCalcState( // Here, we are doing something equivalent to matchedProximityCharId, // but we already know that "excessive char correction" just happened // so that we just need to check "mProximityCount == 0". - matchedProximityCharId = mProximityInfo->getMatchedProximityId( + matchedProximityCharId = mProximityInfoState.getMatchedProximityId( mInputIndex, c, mProximityCount == 0, &proximityIndex); } } - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId - || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { - if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (UNRELATED_CHAR == matchedProximityCharId + || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { mAdditionalProximityMatching = true; } // TODO: Optimize @@ -463,10 +456,10 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0 && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing - && isEquivalentChar(mProximityInfo->getMatchedProximityId( + && isEquivalentChar(mProximityInfoState.getMatchedProximityId( mInputIndex, mWord[mOutputIndex - 1], false)) && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // Conversion t->e // Example: // occaisional -> occa sional @@ -478,7 +471,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { // Conversion t->s // Example: // chcolate -> chocolate @@ -490,7 +483,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mSkipping && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { // Conversion p->s // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of // proximity chars of "s", but it should rather be handled as a skipped char. @@ -502,7 +495,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching && isProximityCharOrEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // Conversion s->a incrementInputIndex(); --mSkippedCount; @@ -511,7 +504,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // 1.2. Excessive or transpose correction if (mTransposing) { ++mTransposedCount; @@ -543,7 +536,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mTransposedCount, mExcessiveCount, c); } return processSkipChar(c, isTerminal, false); - } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { // As a last resort, use additional proximity characters mProximityMatching = true; ++mProximityCount; @@ -573,12 +566,12 @@ Correction::CorrectionType Correction::processCharAndCalcState( } else if (isEquivalentChar(matchedProximityCharId)) { mMatching = true; ++mEquivalentCharCount; - mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0); - } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) { + mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); + } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) { mProximityMatching = true; ++mProximityCount; mDistances[mOutputIndex] = - mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); + mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex); if (DEBUG_CORRECTION && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 @@ -637,7 +630,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( Correction::~Correction() { } -inline static int getQuoteCount(const unsigned short* word, const int length) { +inline static int getQuoteCount(const unsigned short *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { if(word[i] == '\'') { @@ -657,12 +650,12 @@ inline static bool isUpperCase(unsigned short c) { /* static */ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, - const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction, + const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction, const int inputLength) { const int excessivePos = correction->getExcessivePos(); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; - const ProximityInfo *proximityInfo = correction->mProximityInfo; + const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState; const int skippedCount = correction->mSkippedCount; const int transposedCount = correction->mTransposedCount / 2; const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; @@ -681,11 +674,11 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // TODO: use mExcessiveCount const int matchCount = inputLength - correction->mProximityCount - excessiveCount; - const unsigned short* word = correction->mWord; + const unsigned short *word = correction->mWord; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) - - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); + - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputLength)); // TODO: Calculate edit distance for transposed and excessive int ed = 0; @@ -737,8 +730,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex multiplyIntCapped(matchWeight, &finalFreq); } - if (proximityInfo->getMatchedProximityId(0, word[0], true) - == ProximityInfo::UNRELATED_CHAR) { + if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) { multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); } @@ -764,7 +756,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // Demotion for a word with excessive character if (excessiveCount > 0) { multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); - if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) { + if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) { if (DEBUG_DICT_FULL) { AKLOGI("Double excessive demotion"); } @@ -775,8 +767,9 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex } const bool performTouchPositionCorrection = - CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled() - && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; + CALIBRATE_SCORE_BY_TOUCH_COORDINATES + && proximityInfoState->touchPositionCorrectionEnabled() + && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; // Score calibration by touch coordinates is being done only for pure-fat finger typing error // cases. int additionalProximityCount = 0; @@ -795,8 +788,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex static const float MIN = 0.3f; static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; static const float R2 = HALF_SCORE_SQUARED_RADIUS; - const float x = (float)squaredDistance - / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; + const float x = static_cast<float>(squaredDistance) + / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; const float factor = max((x < R1) ? (A * (R1 - x) + B * x) / R1 : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); @@ -907,7 +900,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex if (DEBUG_CORRECTION_FREQ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { - DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength); + DUMP_WORD(correction->getPrimaryInputWord(), inputLength); DUMP_WORD(correction->mWord, outputLength); AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, skippedCount, transposedCount, excessiveCount, additionalProximityCount, @@ -920,7 +913,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex /* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, - const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { + const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; @@ -1050,10 +1043,10 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( /* Damerau-Levenshtein distance */ inline static int editDistanceInternal( - int* editDistanceTable, const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength) { + int *editDistanceTable, const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int* dp = editDistanceTable; + int *dp = editDistanceTable; const int li = beforeLength + 1; const int lo = afterLength + 1; for (int i = 0; i < li; ++i) { @@ -1089,8 +1082,8 @@ inline static int editDistanceInternal( return dp[li * lo - 1]; } -int Correction::RankingAlgorithm::editDistance(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength) { +int Correction::RankingAlgorithm::editDistance(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); } @@ -1099,7 +1092,7 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before, // In dictionary.cpp, getSuggestion() method, // suggestion scores are computed using the below formula. // original score -// := pow(mTypedLetterMultiplier (this is defined 2), +// := powf(mTypedLetterMultiplier (this is defined 2), // (the number of matched characters between typed word and suggested word)) // * (individual word's score which defined in the unigram dictionary, // and this score is defined in range [0, 255].) @@ -1111,15 +1104,15 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before, // capitalization, then treat it as if the score was 255. // - If before.length() == after.length() // => multiply by mFullWordMultiplier (this is defined 2)) -// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2 +// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2 // For historical reasons we ignore the 1.2 modifier (because the measure for a good // autocorrection threshold was done at a time when it didn't exist). This doesn't change // the result. -// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. +// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. /* static */ -float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength, +float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength, const int score) { if (0 == beforeLength || 0 == afterLength) { return 0; @@ -1137,14 +1130,14 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* be } const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE - * pow((float)TYPED_LETTER_MULTIPLIER, - (float)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; + * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), + static_cast<float>(min(beforeLength, afterLength - spaceCount))) + * FULL_WORD_MULTIPLIER; // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, // so, 0 <= distance / afterLength <= 1 - const float weight = 1.0 - (float) distance / afterLength; + const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); return (score / maxScore) * weight; } - } // namespace latinime |