diff options
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r-- | native/src/correction.cpp | 68 |
1 files changed, 51 insertions, 17 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index a931a61fb..654d4715f 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -49,12 +49,11 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, mInputLength = inputLength; mMaxDepth = maxDepth; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; - mSkippedOutputIndex = -1; } void Correction::initCorrectionState( const int rootPos, const int childCount, const bool traverseAll) { - mCorrectionStates[0].init(rootPos, childCount, traverseAll); + latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll); } void Correction::setCorrectionParams(const int skipPos, const int excessivePos, @@ -88,6 +87,12 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) { return -1; } + + // TODO: Remove this + if (mSkipPos >= 0 && mSkippedCount <= 0) { + return -1; + } + *word = mWord; const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2) : (mInputLength == inputIndex + 1); @@ -103,8 +108,11 @@ bool Correction::initProcessState(const int outputIndex) { --(mCorrectionStates[outputIndex].mChildCount); mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount; mInputIndex = mCorrectionStates[outputIndex].mInputIndex; - mTraverseAllNodes = mCorrectionStates[outputIndex].mTraverseAll; + mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes; mDiffs = mCorrectionStates[outputIndex].mDiffs; + mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount; + mSkipping = false; + mMatching = false; return true; } @@ -131,8 +139,8 @@ int Correction::getInputIndex() { } // TODO: remove -bool Correction::needsToTraverseAll() { - return mTraverseAllNodes; +bool Correction::needsToTraverseAllNodes() { + return mNeedsToTraverseAllNodes; } void Correction::incrementInputIndex() { @@ -146,12 +154,15 @@ void Correction::incrementOutputIndex() { mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount; mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; - mCorrectionStates[mOutputIndex].mTraverseAll = mTraverseAllNodes; + mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; mCorrectionStates[mOutputIndex].mDiffs = mDiffs; + mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; + mCorrectionStates[mOutputIndex].mSkipping = mSkipping; + mCorrectionStates[mOutputIndex].mMatching = mMatching; } -void Correction::startTraverseAll() { - mTraverseAllNodes = true; +void Correction::startToTraverseAllNodes() { + mNeedsToTraverseAllNodes = true; } bool Correction::needsToPrune() const { @@ -162,7 +173,7 @@ bool Correction::needsToPrune() const { Correction::CorrectionType Correction::processSkipChar( const int32_t c, const bool isTerminal) { mWord[mOutputIndex] = c; - if (needsToTraverseAll() && isTerminal) { + if (needsToTraverseAllNodes() && isTerminal) { mTerminalInputIndex = mInputIndex; mTerminalOutputIndex = mOutputIndex; incrementOutputIndex(); @@ -185,9 +196,10 @@ Correction::CorrectionType Correction::processCharAndCalcState( bool skip = false; if (mSkipPos >= 0) { skip = mSkipPos == mOutputIndex; + mSkipping = true; } - if (mTraverseAllNodes || isQuote(c)) { + if (mNeedsToTraverseAllNodes || isQuote(c)) { return processSkipChar(c, isTerminal); } else { int inputIndexForProximity = mInputIndex; @@ -210,25 +222,23 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (unrelated) { if (skip) { // Skip this letter and continue deeper - mSkippedOutputIndex = mOutputIndex; + ++mSkippedCount; return processSkipChar(c, isTerminal); } else { return UNRELATED; } } - // No need to skip. Finish traversing and increment skipPos. - // TODO: Remove this? + // TODO: remove after allowing combination errors if (skip) { - mWord[mOutputIndex] = c; - incrementOutputIndex(); - return TRAVERSE_ALL_NOT_ON_TERMINAL; + return UNRELATED; } mWord[mOutputIndex] = c; // If inputIndex is greater than mInputLength, that means there is no // proximity chars. So, we don't need to check proximity. if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { + mMatching = true; charMatched(); } @@ -247,7 +257,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( } // Start traversing all nodes after the index exceeds the user typed length if (isSameAsUserTypedLength) { - startTraverseAll(); + startToTraverseAllNodes(); } // Finally, we are ready to go to the next character, the next "virtual node". @@ -317,6 +327,7 @@ inline static void multiplyRate(const int rate, int *freq) { // RankingAlgorithm // ////////////////////// +/* static */ int Correction::RankingAlgorithm::calculateFinalFreq( const int inputIndex, const int outputIndex, const int matchCount, const int freq, const bool sameLength, @@ -329,6 +340,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq( const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const ProximityInfo *proximityInfo = correction->mProximityInfo; const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); + const unsigned short* word = correction->mWord; + const int skippedCount = correction->mSkippedCount; // TODO: Demote by edit distance int finalFreq = freq * matchWeight; @@ -382,9 +395,30 @@ int Correction::RankingAlgorithm::calculateFinalFreq( LOGI("calc: %d, %d", outputIndex, sameLength); } if (sameLength) multiplyIntCapped(fullWordMultiplier, &finalFreq); + + // TODO: check excessive count and transposed count + /* + If the last character of the user input word is the same as the next character + of the output word, and also all of characters of the user input are matched + to the output word, we'll promote that word a bit because + that word can be considered the combination of skipped and matched characters. + This means that the 'sm' pattern wins over the 'ma' pattern. + e.g.) + shel -> shell [mmmma] or [mmmsm] + hel -> hello [mmmaa] or [mmsma] + m ... matching + s ... skipping + a ... traversing all + */ + if (matchCount == inputLength && matchCount >= 2 && skippedCount == 0 + && word[matchCount] == word[matchCount - 1]) { + multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); + } + return finalFreq; } +/* static */ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const Correction* correction) { const int spaceProximityPos = correction->mSpaceProximityPos; |