diff options
Diffstat (limited to 'native/jni/src/correction.cpp')
-rw-r--r-- | native/jni/src/correction.cpp | 1145 |
1 files changed, 1145 insertions, 0 deletions
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp new file mode 100644 index 000000000..827067b9f --- /dev/null +++ b/native/jni/src/correction.cpp @@ -0,0 +1,1145 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <assert.h> +#include <ctype.h> +#include <math.h> +#include <stdio.h> +#include <string.h> + +#define LOG_TAG "LatinIME: correction.cpp" + +#include "char_utils.h" +#include "correction.h" +#include "defines.h" +#include "dictionary.h" +#include "proximity_info.h" +#include "proximity_info_state.h" + +namespace latinime { + +///////////////////////////// +// edit distance funcitons // +///////////////////////////// + +inline static void initEditDistance(int *editDistanceTable) { + for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { + editDistanceTable[i] = i; + } +} + +inline static void dumpEditDistance10ForDebug(int *editDistanceTable, + const int editDistanceTableWidth, const int outputLength) { + if (DEBUG_DICT) { + AKLOGI("EditDistanceTable"); + for (int i = 0; i <= 10; ++i) { + int c[11]; + for (int j = 0; j <= 10; ++j) { + if (j < editDistanceTableWidth + 1 && i < outputLength + 1) { + c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j]; + } else { + c[j] = -1; + } + } + AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); + (void)c; + } + } +} + +inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, + const int inputLength, const unsigned short *output, const int outputLength) { + // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. + // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j]. + // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated, + // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength]. + int *const current = editDistanceTable + outputLength * (inputLength + 1); + const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1); + const int *const prevprev = + outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0; + current[0] = outputLength; + const uint32_t co = toBaseLowerCase(output[outputLength - 1]); + const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; + for (int i = 1; i <= inputLength; ++i) { + const uint32_t ci = toBaseLowerCase(input[i - 1]); + const uint16_t cost = (ci == co) ? 0 : 1; + current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); + if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { + current[i] = min(current[i], prevprev[i - 2] + 1); + } + } +} + +inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, + const int outputLength, const int inputLength) { + if (DEBUG_EDIT_DISTANCE) { + AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); + } + return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength]; +} + +////////////////////// +// inline functions // +////////////////////// +static const char QUOTE = '\''; + +inline bool Correction::isQuote(const unsigned short c) { + const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); + return (c == QUOTE && userTypedChar != QUOTE); +} + +//////////////// +// Correction // +//////////////// + +void Correction::resetCorrection() { + mTotalTraverseCount = 0; +} + +void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, + const int maxDepth) { + mProximityInfo = pi; + mInputLength = inputLength; + mMaxDepth = maxDepth; + mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; + // TODO: This is not supposed to be required. Check what's going wrong with + // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] + initEditDistance(mEditDistanceTable); +} + +void Correction::initCorrectionState( + const int rootPos, const int childCount, const bool traverseAll) { + latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll); + // TODO: remove + mCorrectionStates[0].mTransposedPos = mTransposedPos; + mCorrectionStates[0].mExcessivePos = mExcessivePos; + mCorrectionStates[0].mSkipPos = mSkipPos; +} + +void Correction::setCorrectionParams(const int skipPos, const int excessivePos, + const int transposedPos, const int spaceProximityPos, const int missingSpacePos, + const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) { + // TODO: remove + mTransposedPos = transposedPos; + mExcessivePos = excessivePos; + mSkipPos = skipPos; + // TODO: remove + mCorrectionStates[0].mTransposedPos = transposedPos; + mCorrectionStates[0].mExcessivePos = excessivePos; + mCorrectionStates[0].mSkipPos = skipPos; + + mSpaceProximityPos = spaceProximityPos; + mMissingSpacePos = missingSpacePos; + mUseFullEditDistance = useFullEditDistance; + mDoAutoCompletion = doAutoCompletion; + mMaxErrors = maxErrors; +} + +void Correction::checkState() { + if (DEBUG_DICT) { + int inputCount = 0; + if (mSkipPos >= 0) ++inputCount; + if (mExcessivePos >= 0) ++inputCount; + if (mTransposedPos >= 0) ++inputCount; + // TODO: remove this assert + assert(inputCount <= 1); + } +} + +int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, + const int wordCount, const bool isSpaceProximity, const unsigned short *word) { + return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, + wordCount, this, isSpaceProximity, word); +} + +int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { + return getFinalProbabilityInternal(probability, word, wordLength, mInputLength); +} + +int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, + int *wordLength, const int inputLength) { + return getFinalProbabilityInternal(probability, word, wordLength, inputLength); +} + +int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, + int *wordLength, const int inputLength) { + const int outputIndex = mTerminalOutputIndex; + const int inputIndex = mTerminalInputIndex; + *wordLength = outputIndex + 1; + if (outputIndex < MIN_SUGGEST_DEPTH) { + return NOT_A_PROBABILITY; + } + + *word = mWord; + int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( + inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength); + return finalProbability; +} + +bool Correction::initProcessState(const int outputIndex) { + if (mCorrectionStates[outputIndex].mChildCount <= 0) { + return false; + } + mOutputIndex = outputIndex; + --(mCorrectionStates[outputIndex].mChildCount); + mInputIndex = mCorrectionStates[outputIndex].mInputIndex; + mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes; + + mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount; + mProximityCount = mCorrectionStates[outputIndex].mProximityCount; + mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount; + mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount; + mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount; + mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded; + + mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos; + mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos; + mSkipPos = mCorrectionStates[outputIndex].mSkipPos; + + mMatching = false; + mProximityMatching = false; + mAdditionalProximityMatching = false; + mTransposing = false; + mExceeding = false; + mSkipping = false; + + return true; +} + +int Correction::goDownTree( + const int parentIndex, const int childCount, const int firstChildPos) { + mCorrectionStates[mOutputIndex].mParentIndex = parentIndex; + mCorrectionStates[mOutputIndex].mChildCount = childCount; + mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos; + return mOutputIndex; +} + +// TODO: remove +int Correction::getInputIndex() { + return mInputIndex; +} + +void Correction::incrementInputIndex() { + ++mInputIndex; +} + +void Correction::incrementOutputIndex() { + ++mOutputIndex; + mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; + mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; + mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; + mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; + mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; + + mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount; + mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount; + mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount; + mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount; + mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; + + mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos; + mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos; + mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos; + + mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded; + + mCorrectionStates[mOutputIndex].mMatching = mMatching; + mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; + mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; + mCorrectionStates[mOutputIndex].mTransposing = mTransposing; + mCorrectionStates[mOutputIndex].mExceeding = mExceeding; + mCorrectionStates[mOutputIndex].mSkipping = mSkipping; +} + +void Correction::startToTraverseAllNodes() { + mNeedsToTraverseAllNodes = true; +} + +bool Correction::needsToPrune() const { + // TODO: use edit distance here + return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance + // Allow one char longer word for missing character + || (!mDoAutoCompletion && (mOutputIndex > mInputLength)); +} + +void Correction::addCharToCurrentWord(const int32_t c) { + mWord[mOutputIndex] = c; + const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); + calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength, + mWord, mOutputIndex + 1); +} + +Correction::CorrectionType Correction::processSkipChar( + const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { + addCharToCurrentWord(c); + mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); + mTerminalOutputIndex = mOutputIndex; + if (mNeedsToTraverseAllNodes && isTerminal) { + incrementOutputIndex(); + return TRAVERSE_ALL_ON_TERMINAL; + } else { + incrementOutputIndex(); + return TRAVERSE_ALL_NOT_ON_TERMINAL; + } +} + +Correction::CorrectionType Correction::processUnrelatedCorrectionType() { + // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType + mTerminalInputIndex = mInputIndex; + mTerminalOutputIndex = mOutputIndex; + return UNRELATED; +} + +inline bool isEquivalentChar(ProximityType type) { + return type == EQUIVALENT_CHAR; +} + +inline bool isProximityCharOrEquivalentChar(ProximityType type) { + return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; +} + +Correction::CorrectionType Correction::processCharAndCalcState( + const int32_t c, const bool isTerminal) { + const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); + if (correctionCount > mMaxErrors) { + return processUnrelatedCorrectionType(); + } + + // TODO: Change the limit if we'll allow two or more corrections + const bool noCorrectionsHappenedSoFar = correctionCount == 0; + const bool canTryCorrection = noCorrectionsHappenedSoFar; + int proximityIndex = 0; + mDistances[mOutputIndex] = NOT_A_DISTANCE; + + // Skip checking this node + if (mNeedsToTraverseAllNodes || isQuote(c)) { + bool incremented = false; + if (mLastCharExceeded && mInputIndex == mInputLength - 1) { + // TODO: Do not check the proximity if EditDistance exceeds the threshold + const ProximityType matchId = mProximityInfoState.getMatchedProximityId( + mInputIndex, c, true, &proximityIndex); + if (isEquivalentChar(matchId)) { + mLastCharExceeded = false; + --mExcessiveCount; + mDistances[mOutputIndex] = + mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); + } else if (matchId == NEAR_PROXIMITY_CHAR) { + mLastCharExceeded = false; + --mExcessiveCount; + ++mProximityCount; + mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance( + mInputIndex, proximityIndex); + } + if (!isQuote(c)) { + incrementInputIndex(); + incremented = true; + } + } + return processSkipChar(c, isTerminal, incremented); + } + + // Check possible corrections. + if (mExcessivePos >= 0) { + if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { + mExcessivePos = mOutputIndex; + } + if (mExcessivePos < mInputLength - 1) { + mExceeding = mExcessivePos == mInputIndex && canTryCorrection; + } + } + + if (mSkipPos >= 0) { + if (mSkippedCount == 0 && mSkipPos < mOutputIndex) { + if (DEBUG_DICT) { + assert(mSkipPos == mOutputIndex - 1); + } + mSkipPos = mOutputIndex; + } + mSkipping = mSkipPos == mOutputIndex && canTryCorrection; + } + + if (mTransposedPos >= 0) { + if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) { + mTransposedPos = mOutputIndex; + } + if (mTransposedPos < mInputLength - 1) { + mTransposing = mInputIndex == mTransposedPos && canTryCorrection; + } + } + + bool secondTransposing = false; + if (mTransposedCount % 2 == 1) { + if (isEquivalentChar(mProximityInfoState.getMatchedProximityId( + mInputIndex - 1, c, false))) { + ++mTransposedCount; + secondTransposing = true; + } else if (mCorrectionStates[mOutputIndex].mExceeding) { + --mTransposedCount; + ++mExcessiveCount; + --mExcessivePos; + incrementInputIndex(); + } else { + --mTransposedCount; + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + DUMP_WORD(mWord, mOutputIndex); + AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + return processUnrelatedCorrectionType(); + } + } + + // TODO: Change the limit if we'll allow two or more proximity chars with corrections + // Work around: When the mMaxErrors is 1, we only allow just one error + // including proximity correction. + const bool checkProximityChars = (mMaxErrors > 1) + ? (noCorrectionsHappenedSoFar || mProximityCount == 0) + : (noCorrectionsHappenedSoFar && mProximityCount == 0); + + ProximityType matchedProximityCharId = secondTransposing + ? EQUIVALENT_CHAR + : mProximityInfoState.getMatchedProximityId( + mInputIndex, c, checkProximityChars, &proximityIndex); + + if (UNRELATED_CHAR == matchedProximityCharId + || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (canTryCorrection && mOutputIndex > 0 + && mCorrectionStates[mOutputIndex].mProximityMatching + && mCorrectionStates[mOutputIndex].mExceeding + && isEquivalentChar(mProximityInfoState.getMatchedProximityId( + mInputIndex, mWord[mOutputIndex - 1], false))) { + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]); + } + // Conversion p->e + // Example: + // wearth -> earth + // px -> (E)mmmmm + ++mExcessiveCount; + --mProximityCount; + mExcessivePos = mOutputIndex - 1; + ++mInputIndex; + // Here, we are doing something equivalent to matchedProximityCharId, + // but we already know that "excessive char correction" just happened + // so that we just need to check "mProximityCount == 0". + matchedProximityCharId = mProximityInfoState.getMatchedProximityId( + mInputIndex, c, mProximityCount == 0, &proximityIndex); + } + } + + if (UNRELATED_CHAR == matchedProximityCharId + || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + mAdditionalProximityMatching = true; + } + // TODO: Optimize + // As the current char turned out to be an unrelated char, + // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] + // here refers to the previous state. + if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0 + && !mCorrectionStates[mOutputIndex].mTransposing + && mCorrectionStates[mOutputIndex - 1].mTransposing + && isEquivalentChar(mProximityInfoState.getMatchedProximityId( + mInputIndex, mWord[mOutputIndex - 1], false)) + && isEquivalentChar( + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { + // Conversion t->e + // Example: + // occaisional -> occa sional + // mmmmttx -> mmmm(E)mmmmmm + mTransposedCount -= 2; + ++mExcessiveCount; + ++mInputIndex; + } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0 + && !mCorrectionStates[mOutputIndex].mTransposing + && mCorrectionStates[mOutputIndex - 1].mTransposing + && isEquivalentChar( + mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { + // Conversion t->s + // Example: + // chcolate -> chocolate + // mmttx -> mmsmmmmmm + mTransposedCount -= 2; + ++mSkippedCount; + --mInputIndex; + } else if (canTryCorrection && mInputIndex > 0 + && mCorrectionStates[mOutputIndex].mProximityMatching + && mCorrectionStates[mOutputIndex].mSkipping + && isEquivalentChar( + mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { + // Conversion p->s + // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of + // proximity chars of "s", but it should rather be handled as a skipped char. + ++mSkippedCount; + --mProximityCount; + return processSkipChar(c, isTerminal, false); + } else if (mInputIndex - 1 < mInputLength + && mSkippedCount > 0 + && mCorrectionStates[mOutputIndex].mSkipping + && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching + && isProximityCharOrEquivalentChar( + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { + // Conversion s->a + incrementInputIndex(); + --mSkippedCount; + mProximityMatching = true; + ++mProximityCount; + mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; + } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength + && isEquivalentChar( + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { + // 1.2. Excessive or transpose correction + if (mTransposing) { + ++mTransposedCount; + } else { + ++mExcessiveCount; + incrementInputIndex(); + } + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + DUMP_WORD(mWord, mOutputIndex); + if (mTransposing) { + AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } else { + AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + } + } else if (mSkipping) { + // 3. Skip correction + ++mSkippedCount; + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + return processSkipChar(c, isTerminal, false); + } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + // As a last resort, use additional proximity characters + mProximityMatching = true; + ++mProximityCount; + mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + } else { + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + DUMP_WORD(mWord, mOutputIndex); + AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + return processUnrelatedCorrectionType(); + } + } else if (secondTransposing) { + // If inputIndex is greater than mInputLength, that means there is no + // proximity chars. So, we don't need to check proximity. + mMatching = true; + } else if (isEquivalentChar(matchedProximityCharId)) { + mMatching = true; + ++mEquivalentCharCount; + mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); + } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) { + mProximityMatching = true; + ++mProximityCount; + mDistances[mOutputIndex] = + mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex); + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + } + + addCharToCurrentWord(c); + + // 4. Last char excessive correction + mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0 + && mProximityCount == 0 && (mInputIndex == mInputLength - 2); + const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded; + if (mLastCharExceeded) { + ++mExcessiveCount; + } + + // Start traversing all nodes after the index exceeds the user typed length + if (isSameAsUserTypedLength) { + startToTraverseAllNodes(); + } + + const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar = + mExceeding && mInputIndex == mInputLength - 2; + + // Finally, we are ready to go to the next character, the next "virtual node". + // We should advance the input index. + // We do this in this branch of the 'if traverseAllNodes' because we are still matching + // characters to input; the other branch is not matching them but searching for + // completions, this is why it does not have to do it. + incrementInputIndex(); + // Also, the next char is one "virtual node" depth more than this char. + incrementOutputIndex(); + + if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar + || isSameAsUserTypedLength) && isTerminal) { + mTerminalInputIndex = mInputIndex - 1; + mTerminalOutputIndex = mOutputIndex - 1; + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + DUMP_WORD(mWord, mOutputIndex); + AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + return ON_TERMINAL; + } else { + mTerminalInputIndex = mInputIndex - 1; + mTerminalOutputIndex = mOutputIndex - 1; + return NOT_ON_TERMINAL; + } +} + +Correction::~Correction() { +} + +inline static int getQuoteCount(const unsigned short* word, const int length) { + int quoteCount = 0; + for (int i = 0; i < length; ++i) { + if(word[i] == '\'') { + ++quoteCount; + } + } + return quoteCount; +} + +inline static bool isUpperCase(unsigned short c) { + return isAsciiUpper(toBaseChar(c)); +} + +////////////////////// +// RankingAlgorithm // +////////////////////// + +/* static */ +int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, + const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction, + const int inputLength) { + const int excessivePos = correction->getExcessivePos(); + const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; + const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; + const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState; + const int skippedCount = correction->mSkippedCount; + const int transposedCount = correction->mTransposedCount / 2; + const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; + const int proximityMatchedCount = correction->mProximityCount; + const bool lastCharExceeded = correction->mLastCharExceeded; + const bool useFullEditDistance = correction->mUseFullEditDistance; + const int outputLength = outputIndex + 1; + if (skippedCount >= inputLength || inputLength == 0) { + return -1; + } + + // TODO: find more robust way + bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2) + : (inputLength == inputIndex + 1); + + // TODO: use mExcessiveCount + const int matchCount = inputLength - correction->mProximityCount - excessiveCount; + + const unsigned short* word = correction->mWord; + const bool skipped = skippedCount > 0; + + const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) + - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputLength)); + + // TODO: Calculate edit distance for transposed and excessive + int ed = 0; + if (DEBUG_DICT_FULL) { + dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength); + } + int adjustedProximityMatchedCount = proximityMatchedCount; + + int finalFreq = freq; + + if (DEBUG_CORRECTION_FREQ + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + AKLOGI("FinalFreq0: %d", finalFreq); + } + // TODO: Optimize this. + if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { + ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, + inputLength) - transposedCount; + + const int matchWeight = powerIntCapped(typedLetterMultiplier, + max(inputLength, outputLength) - ed); + multiplyIntCapped(matchWeight, &finalFreq); + + // TODO: Demote further if there are two or more excessive chars with longer user input? + if (inputLength > outputLength) { + multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); + } + + ed = max(0, ed - quoteDiffCount); + adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), + proximityMatchedCount); + if (transposedCount <= 0) { + if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { + // Promote a word with just one skipped or excessive char + if (sameLength) { + multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE + + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength, + &finalFreq); + } else { + multiplyIntCapped(typedLetterMultiplier, &finalFreq); + } + } else if (ed == 0) { + multiplyIntCapped(typedLetterMultiplier, &finalFreq); + sameLength = true; + } + } + } else { + const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); + multiplyIntCapped(matchWeight, &finalFreq); + } + + if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) { + multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); + } + + /////////////////////////////////////////////// + // Promotion and Demotion for each correction + + // Demotion for a word with missing character + if (skipped) { + const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE + * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) + / (10 * inputLength + - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10); + if (DEBUG_DICT_FULL) { + AKLOGI("Demotion rate for missing character is %d.", demotionRate); + } + multiplyRate(demotionRate, &finalFreq); + } + + // Demotion for a word with transposed character + if (transposedCount > 0) multiplyRate( + WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); + + // Demotion for a word with excessive character + if (excessiveCount > 0) { + multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); + if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) { + if (DEBUG_DICT_FULL) { + AKLOGI("Double excessive demotion"); + } + // If an excessive character is not adjacent to the left char or the right char, + // we will demote this word. + multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); + } + } + + const bool performTouchPositionCorrection = + CALIBRATE_SCORE_BY_TOUCH_COORDINATES + && proximityInfoState->touchPositionCorrectionEnabled() + && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; + // Score calibration by touch coordinates is being done only for pure-fat finger typing error + // cases. + int additionalProximityCount = 0; + // TODO: Remove this constraint. + if (performTouchPositionCorrection) { + for (int i = 0; i < outputLength; ++i) { + const int squaredDistance = correction->mDistances[i]; + if (i < adjustedProximityMatchedCount) { + multiplyIntCapped(typedLetterMultiplier, &finalFreq); + } + if (squaredDistance >= 0) { + // Promote or demote the score according to the distance from the sweet spot + static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; + static const float B = 1.0f; + static const float C = 0.5f; + static const float MIN = 0.3f; + static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; + static const float R2 = HALF_SCORE_SQUARED_RADIUS; + const float x = (float)squaredDistance + / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; + const float factor = max((x < R1) + ? (A * (R1 - x) + B * x) / R1 + : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); + // factor is piecewise linear function like: + // A -_ . + // ^-_ . + // B \ . + // \_ . + // C ------------. + // . + // 0 R1 R2 . + multiplyRate((int)(factor * 100), &finalFreq); + } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { + multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); + } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { + ++additionalProximityCount; + multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); + } + } + } else { + // Demote additional proximity characters + for (int i = 0; i < outputLength; ++i) { + const int squaredDistance = correction->mDistances[i]; + if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { + ++additionalProximityCount; + } + } + // Promotion for a word with proximity characters + for (int i = 0; i < adjustedProximityMatchedCount; ++i) { + // A word with proximity corrections + if (DEBUG_DICT_FULL) { + AKLOGI("Found a proximity correction."); + } + multiplyIntCapped(typedLetterMultiplier, &finalFreq); + if (i < additionalProximityCount) { + multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); + } else { + multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); + } + } + } + + // If the user types too many(three or more) proximity characters with additional proximity + // character,do not treat as the same length word. + if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3 + || transposedCount > 0 || skipped || excessiveCount > 0)) { + sameLength = false; + } + + const int errorCount = adjustedProximityMatchedCount > 0 + ? adjustedProximityMatchedCount + : (proximityMatchedCount + transposedCount); + multiplyRate( + 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq); + + // Promotion for an exactly matched word + if (ed == 0) { + // Full exact match + if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 + && quoteDiffCount == 0 && additionalProximityCount == 0) { + finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); + } + } + + // Promote a word with no correction + if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0 + && additionalProximityCount == 0) { + multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); + } + + // TODO: Check excessive count and transposed count + // TODO: Remove this if possible + /* + If the last character of the user input word is the same as the next character + of the output word, and also all of characters of the user input are matched + to the output word, we'll promote that word a bit because + that word can be considered the combination of skipped and matched characters. + This means that the 'sm' pattern wins over the 'ma' pattern. + e.g.) + shel -> shell [mmmma] or [mmmsm] + hel -> hello [mmmaa] or [mmsma] + m ... matching + s ... skipping + a ... traversing all + t ... transposing + e ... exceeding + p ... proximity matching + */ + if (matchCount == inputLength && matchCount >= 2 && !skipped + && word[matchCount] == word[matchCount - 1]) { + multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); + } + + // TODO: Do not use sameLength? + if (sameLength) { + multiplyIntCapped(fullWordMultiplier, &finalFreq); + } + + if (useFullEditDistance && outputLength > inputLength + 1) { + const int diff = outputLength - inputLength - 1; + const int divider = diff < 31 ? 1 << diff : S_INT_MAX; + finalFreq = divider > finalFreq ? 1 : finalFreq / divider; + } + + if (DEBUG_DICT_FULL) { + AKLOGI("calc: %d, %d", outputLength, sameLength); + } + + if (DEBUG_CORRECTION_FREQ + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + DUMP_WORD(correction->getPrimaryInputWord(), inputLength); + DUMP_WORD(correction->mWord, outputLength); + AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, + skippedCount, transposedCount, excessiveCount, additionalProximityCount, + outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq); + } + + return finalFreq; +} + +/* static */ +int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( + const int *freqArray, const int *wordLengthArray, const int wordCount, + const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { + const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; + + bool firstCapitalizedWordDemotion = false; + bool secondCapitalizedWordDemotion = false; + + { + // TODO: Handle multiple capitalized word demotion properly + const int firstWordLength = wordLengthArray[0]; + const int secondWordLength = wordLengthArray[1]; + if (firstWordLength >= 2) { + firstCapitalizedWordDemotion = isUpperCase(word[0]); + } + + if (secondWordLength >= 2) { + // FIXME: word[firstWordLength + 1] is incorrect. + secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); + } + } + + + const bool capitalizedWordDemotion = + firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; + + int totalLength = 0; + int totalFreq = 0; + for (int i = 0; i < wordCount; ++i){ + const int wordLength = wordLengthArray[i]; + if (wordLength <= 0) { + return 0; + } + totalLength += wordLength; + const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1); + int tempFirstFreq = freqArray[i]; + multiplyRate(demotionRate, &tempFirstFreq); + totalFreq += tempFirstFreq; + } + + if (totalLength <= 0 || totalFreq <= 0) { + return 0; + } + + // TODO: Currently totalFreq is adjusted to two word metrix. + // Promote pairFreq with multiplying by 2, because the word length is the same as the typed + // length. + totalFreq = totalFreq * 2 / wordCount; + if (wordCount > 2) { + // Safety net for 3+ words -- Caveats: many heuristics and workarounds here. + int oneLengthCounter = 0; + int twoLengthCounter = 0; + for (int i = 0; i < wordCount; ++i) { + const int wordLength = wordLengthArray[i]; + // TODO: Use bigram instead of this safety net + if (i < wordCount - 1) { + const int nextWordLength = wordLengthArray[i + 1]; + if (wordLength == 1 && nextWordLength == 2) { + // Safety net to filter 1 length and 2 length sequential words + return 0; + } + } + const int freq = freqArray[i]; + // Demote too short weak words + if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) { + multiplyRate(100 * freq / MAX_FREQ, &totalFreq); + } + if (wordLength == 1) { + ++oneLengthCounter; + } else if (wordLength == 2) { + ++twoLengthCounter; + } + if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) { + // Safety net to filter too many short words + return 0; + } + } + multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq); + } + + // This is a workaround to try offsetting the not-enough-demotion which will be done in + // calcNormalizedScore in Utils.java. + // In calcNormalizedScore the score will be demoted by (1 - 1 / length) + // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by + // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length)) + const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength); + multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq); + + // At this moment, totalFreq is calculated by the following formula: + // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1))) + // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1)) + + multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq); + + // This is another workaround to offset the demotion which will be done in + // calcNormalizedScore in Utils.java. + // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote + // the same amount because we already have adjusted the synthetic freq of this "missing or + // mistyped space" suggestion candidate above in this method. + const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength); + multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq); + + if (isSpaceProximity) { + // A word pair with one space proximity correction + if (DEBUG_DICT) { + AKLOGI("Found a word pair with space proximity correction."); + } + multiplyIntCapped(typedLetterMultiplier, &totalFreq); + multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); + } + + if (isSpaceProximity) { + multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq); + } else { + multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); + } + + if (capitalizedWordDemotion) { + multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); + } + + if (DEBUG_CORRECTION_FREQ) { + AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1], + wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq); + DUMP_WORD(word, wordLengthArray[0]); + } + + return totalFreq; +} + +/* Damerau-Levenshtein distance */ +inline static int editDistanceInternal( + int* editDistanceTable, const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength) { + // dp[li][lo] dp[a][b] = dp[ a * lo + b] + int* dp = editDistanceTable; + const int li = beforeLength + 1; + const int lo = afterLength + 1; + for (int i = 0; i < li; ++i) { + dp[lo * i] = i; + } + for (int i = 0; i < lo; ++i) { + dp[i] = i; + } + + for (int i = 0; i < li - 1; ++i) { + for (int j = 0; j < lo - 1; ++j) { + const uint32_t ci = toBaseLowerCase(before[i]); + const uint32_t co = toBaseLowerCase(after[j]); + const uint16_t cost = (ci == co) ? 0 : 1; + dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, + min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); + if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) + && co == toBaseLowerCase(before[i - 1])) { + dp[(i + 1) * lo + (j + 1)] = min( + dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); + } + } + } + + if (DEBUG_EDIT_DISTANCE) { + AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); + for (int i = 0; i < li; ++i) { + for (int j = 0; j < lo; ++j) { + AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); + } + } + } + return dp[li * lo - 1]; +} + +int Correction::RankingAlgorithm::editDistance(const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength) { + int table[(beforeLength + 1) * (afterLength + 1)]; + return editDistanceInternal(table, before, beforeLength, after, afterLength); +} + + +// In dictionary.cpp, getSuggestion() method, +// suggestion scores are computed using the below formula. +// original score +// := pow(mTypedLetterMultiplier (this is defined 2), +// (the number of matched characters between typed word and suggested word)) +// * (individual word's score which defined in the unigram dictionary, +// and this score is defined in range [0, 255].) +// Then, the following processing is applied. +// - If the dictionary word is matched up to the point of the user entry +// (full match up to min(before.length(), after.length()) +// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) +// - If the word is a true full match except for differences in accents or +// capitalization, then treat it as if the score was 255. +// - If before.length() == after.length() +// => multiply by mFullWordMultiplier (this is defined 2)) +// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2 +// For historical reasons we ignore the 1.2 modifier (because the measure for a good +// autocorrection threshold was done at a time when it didn't exist). This doesn't change +// the result. +// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. + +/* static */ +float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength, + const int score) { + if (0 == beforeLength || 0 == afterLength) { + return 0; + } + const int distance = editDistance(before, beforeLength, after, afterLength); + int spaceCount = 0; + for (int i = 0; i < afterLength; ++i) { + if (after[i] == CODE_SPACE) { + ++spaceCount; + } + } + + if (spaceCount == afterLength) { + return 0; + } + + const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE + * pow((float)TYPED_LETTER_MULTIPLIER, + (float)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; + + // add a weight based on edit distance. + // distance <= max(afterLength, beforeLength) == afterLength, + // so, 0 <= distance / afterLength <= 1 + const float weight = 1.0 - (float) distance / afterLength; + return (score / maxScore) * weight; +} +} // namespace latinime |