aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/correction.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/correction.cpp')
-rw-r--r--native/src/correction.cpp650
1 files changed, 426 insertions, 224 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 27dc40745..087219ed4 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -16,12 +16,15 @@
#include <assert.h>
#include <ctype.h>
+#include <math.h>
#include <stdio.h>
#include <string.h>
#define LOG_TAG "LatinIME: correction.cpp"
+#include "char_utils.h"
#include "correction.h"
+#include "defines.h"
#include "dictionary.h"
#include "proximity_info.h"
@@ -31,81 +34,60 @@ namespace latinime {
// edit distance funcitons //
/////////////////////////////
-#if 0 /* no longer used */
-inline static int editDistance(
- int* editDistanceTable, const unsigned short* input,
- const int inputLength, const unsigned short* output, const int outputLength) {
- // dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int* dp = editDistanceTable;
- const int li = inputLength + 1;
- const int lo = outputLength + 1;
- for (int i = 0; i < li; ++i) {
- dp[lo * i] = i;
- }
- for (int i = 0; i < lo; ++i) {
- dp[i] = i;
- }
-
- for (int i = 0; i < li - 1; ++i) {
- for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
- const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
- const uint16_t cost = (ci == co) ? 0 : 1;
- dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
- min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
- if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1])
- && co == Dictionary::toBaseLowerCase(input[i - 1])) {
- dp[(i + 1) * lo + (j + 1)] = min(
- dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
- }
- }
+inline static void initEditDistance(int *editDistanceTable) {
+ for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
+ editDistanceTable[i] = i;
}
+}
- if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", inputLength, outputLength);
- for (int i = 0; i < li; ++i) {
- for (int j = 0; j < lo; ++j) {
- LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
+ const int editDistanceTableWidth, const int outputLength) {
+ if (DEBUG_DICT) {
+ AKLOGI("EditDistanceTable");
+ for (int i = 0; i <= 10; ++i) {
+ int c[11];
+ for (int j = 0; j <= 10; ++j) {
+ if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
+ c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
+ } else {
+ c[j] = -1;
+ }
}
+ AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
- return dp[li * lo - 1];
-}
-#endif
-
-inline static void initEditDistance(int *editDistanceTable) {
- for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
- editDistanceTable[i] = i;
- }
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputLength, const unsigned short *output, const int outputLength) {
+ // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
int *const current = editDistanceTable + outputLength * (inputLength + 1);
const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : NULL;
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
current[0] = outputLength;
- const uint32_t co = Dictionary::toBaseLowerCase(output[outputLength - 1]);
- const uint32_t prevCO =
- outputLength >= 2 ? Dictionary::toBaseLowerCase(output[outputLength - 2]) : 0;
+ const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
+ const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputLength; ++i) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i - 1]);
+ const uint32_t ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
- if (i >= 2 && prevprev && ci == prevCO
- && co == Dictionary::toBaseLowerCase(input[i - 2])) {
+ if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
current[i] = min(current[i], prevprev[i - 2] + 1);
}
}
}
-inline static int getCurrentEditDistance(
- int *editDistanceTable, const int inputLength, const int outputLength) {
- return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
+inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
+ const int outputLength, const int inputLength) {
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ }
+ return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
}
//////////////////////
@@ -133,6 +115,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
mInputLength = inputLength;
mMaxDepth = maxDepth;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ // TODO: This is not supposed to be required. Check what's going wrong with
+ // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
+ initEditDistance(mEditDistanceTable);
}
void Correction::initCorrectionState(
@@ -146,7 +131,7 @@ void Correction::initCorrectionState(
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
- const bool useFullEditDistance) {
+ const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
// TODO: remove
mTransposedPos = transposedPos;
mExcessivePos = excessivePos;
@@ -159,6 +144,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mSpaceProximityPos = spaceProximityPos;
mMissingSpacePos = missingSpacePos;
mUseFullEditDistance = useFullEditDistance;
+ mDoAutoCompletion = doAutoCompletion;
+ mMaxErrors = maxErrors;
}
void Correction::checkState() {
@@ -172,23 +159,34 @@ void Correction::checkState() {
}
}
-int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
- const unsigned short *word) {
- return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
- firstFreq, secondFreq, this, word);
+int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
+ const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
+ return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
+ wordCount, this, isSpaceProximity, word);
}
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
+ return getFinalFreqInternal(freq, word, wordLength, mInputLength);
+}
+
+int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength,
+ const int inputLength) {
+ return getFinalFreqInternal(freq, word, wordLength, inputLength);
+}
+
+int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength,
+ const int inputLength) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
- return -1;
+ if (outputIndex < MIN_SUGGEST_DEPTH) {
+ return NOT_A_FREQUENCY;
}
*word = mWord;
- return Correction::RankingAlgorithm::calculateFinalFreq(
- inputIndex, outputIndex, freq, mEditDistanceTable, this);
+ int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq(
+ inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength);
+ return finalFreq;
}
bool Correction::initProcessState(const int outputIndex) {
@@ -213,6 +211,7 @@ bool Correction::initProcessState(const int outputIndex) {
mMatching = false;
mProximityMatching = false;
+ mAdditionalProximityMatching = false;
mTransposing = false;
mExceeding = false;
mSkipping = false;
@@ -229,20 +228,10 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getOutputIndex() {
- return mOutputIndex;
-}
-
-// TODO: remove
int Correction::getInputIndex() {
return mInputIndex;
}
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
- return mNeedsToTraverseAllNodes;
-}
-
void Correction::incrementInputIndex() {
++mInputIndex;
}
@@ -269,6 +258,7 @@ void Correction::incrementOutputIndex() {
mCorrectionStates[mOutputIndex].mMatching = mMatching;
mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
+ mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
@@ -280,7 +270,9 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const {
// TODO: use edit distance here
- return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
+ return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
+ // Allow one char longer word for missing character
+ || (!mDoAutoCompletion && (mOutputIndex > mInputLength));
}
void Correction::addCharToCurrentWord(const int32_t c) {
@@ -290,13 +282,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
mWord, mOutputIndex + 1);
}
-// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c);
- if (needsToTraverseAllNodes() && isTerminal) {
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ if (mNeedsToTraverseAllNodes && isTerminal) {
incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL;
} else {
@@ -305,19 +296,36 @@ Correction::CorrectionType Correction::processSkipChar(
}
}
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR;
}
+inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) {
+ return type == ProximityInfo::EQUIVALENT_CHAR
+ || type == ProximityInfo::NEAR_PROXIMITY_CHAR;
+}
+
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
+ if (correctionCount > mMaxErrors) {
+ return processUnrelatedCorrectionType();
+ }
+
// TODO: Change the limit if we'll allow two or more corrections
const bool noCorrectionsHappenedSoFar = correctionCount == 0;
const bool canTryCorrection = noCorrectionsHappenedSoFar;
int proximityIndex = 0;
mDistances[mOutputIndex] = NOT_A_DISTANCE;
+ // Skip checking this node
if (mNeedsToTraverseAllNodes || isQuote(c)) {
bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
@@ -342,6 +350,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
return processSkipChar(c, isTerminal, incremented);
}
+ // Check possible corrections.
if (mExcessivePos >= 0) {
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
mExcessivePos = mOutputIndex;
@@ -382,30 +391,42 @@ Correction::CorrectionType Correction::processCharAndCalcState(
incrementInputIndex();
} else {
--mTransposedCount;
- if (DEBUG_CORRECTION) {
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
}
// TODO: Change the limit if we'll allow two or more proximity chars with corrections
- const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0;
+ // Work around: When the mMaxErrors is 1, we only allow just one error
+ // including proximity correction.
+ const bool checkProximityChars = (mMaxErrors > 1)
+ ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
+ : (noCorrectionsHappenedSoFar && mProximityCount == 0);
+
ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
? ProximityInfo::EQUIVALENT_CHAR
: mProximityInfo->getMatchedProximityId(
mInputIndex, c, checkProximityChars, &proximityIndex);
- if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
+ if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
+ || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
if (canTryCorrection && mOutputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding
&& isEquivalentChar(mProximityInfo->getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) {
- if (DEBUG_CORRECTION) {
- LOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
+ AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
}
// Conversion p->e
// Example:
@@ -423,7 +444,11 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
}
- if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
+ if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
+ || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ mAdditionalProximityMatching = true;
+ }
// TODO: Optimize
// As the current char turned out to be an unrelated char,
// we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
@@ -465,6 +490,18 @@ Correction::CorrectionType Correction::processCharAndCalcState(
++mSkippedCount;
--mProximityCount;
return processSkipChar(c, isTerminal, false);
+ } else if (mInputIndex - 1 < mInputLength
+ && mSkippedCount > 0
+ && mCorrectionStates[mOutputIndex].mSkipping
+ && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
+ && isProximityCharOrEquivalentChar(
+ mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ // Conversion s->a
+ incrementInputIndex();
+ --mSkippedCount;
+ mProximityMatching = true;
+ ++mProximityCount;
+ mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
} else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
&& isEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
@@ -475,17 +512,52 @@ Correction::CorrectionType Correction::processCharAndCalcState(
++mExcessiveCount;
incrementInputIndex();
}
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
+ DUMP_WORD(mWord, mOutputIndex);
+ if (mTransposing) {
+ AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ mTransposedCount, mExcessiveCount, c);
+ } else {
+ AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ mTransposedCount, mExcessiveCount, c);
+ }
+ }
} else if (mSkipping) {
// 3. Skip correction
++mSkippedCount;
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
+ AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ mTransposedCount, mExcessiveCount, c);
+ }
return processSkipChar(c, isTerminal, false);
+ } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ // As a last resort, use additional proximity characters
+ mProximityMatching = true;
+ ++mProximityCount;
+ mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
+ AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ mTransposedCount, mExcessiveCount, c);
+ }
} else {
- if (DEBUG_CORRECTION) {
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no
@@ -500,6 +572,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
++mProximityCount;
mDistances[mOutputIndex] =
mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
+ || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
+ AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ mTransposedCount, mExcessiveCount, c);
+ }
}
addCharToCurrentWord(c);
@@ -533,13 +612,17 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|| isSameAsUserTypedLength) && isTerminal) {
mTerminalInputIndex = mInputIndex - 1;
mTerminalOutputIndex = mOutputIndex - 1;
- if (DEBUG_CORRECTION) {
+ if (DEBUG_CORRECTION
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return ON_TERMINAL;
} else {
+ mTerminalInputIndex = mInputIndex - 1;
+ mTerminalOutputIndex = mOutputIndex - 1;
return NOT_ON_TERMINAL;
}
}
@@ -547,55 +630,6 @@ Correction::CorrectionType Correction::processCharAndCalcState(
Correction::~Correction() {
}
-/////////////////////////
-// static inline utils //
-/////////////////////////
-
-static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
-static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
- return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
-}
-
-static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
-inline static void multiplyIntCapped(const int multiplier, int *base) {
- const int temp = *base;
- if (temp != S_INT_MAX) {
- // Branch if multiplier == 2 for the optimization
- if (multiplier == 2) {
- *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
- } else {
- // TODO: This overflow check gives a wrong answer when, for example,
- // temp = 2^16 + 1 and multiplier = 2^17 + 1.
- // Fix this behavior.
- const int tempRetval = temp * multiplier;
- *base = tempRetval >= temp ? tempRetval : S_INT_MAX;
- }
- }
-}
-
-inline static int powerIntCapped(const int base, const int n) {
- if (n <= 0) return 1;
- if (base == 2) {
- return n < 31 ? 1 << n : S_INT_MAX;
- } else {
- int ret = base;
- for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
- return ret;
- }
-}
-
-inline static void multiplyRate(const int rate, int *freq) {
- if (*freq != S_INT_MAX) {
- if (*freq > 1000000) {
- *freq /= 100;
- multiplyIntCapped(rate, freq);
- } else {
- multiplyIntCapped(rate, freq);
- *freq /= 100;
- }
- }
-}
-
inline static int getQuoteCount(const unsigned short* word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
@@ -607,13 +641,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (isupper(c)) {
- return true;
- }
- return false;
+ return isAsciiUpper(toBaseChar(c));
}
//////////////////////
@@ -622,9 +650,9 @@ inline static bool isUpperCase(unsigned short c) {
/* static */
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
- const int freq, int* editDistanceTable, const Correction* correction) {
+ const int freq, int* editDistanceTable, const Correction* correction,
+ const int inputLength) {
const int excessivePos = correction->getExcessivePos();
- const int inputLength = correction->mInputLength;
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correction->mProximityInfo;
@@ -649,45 +677,55 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
const unsigned short* word = correction->mWord;
const bool skipped = skippedCount > 0;
- const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
+ const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
+ if (DEBUG_DICT_FULL) {
+ dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
+ }
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
+ if (DEBUG_CORRECTION_FREQ
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
+ AKLOGI("FinalFreq0: %d", finalFreq);
+ }
// TODO: Optimize this.
- // TODO: Ignoring edit distance for transposed char, for now
- if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
- ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1);
+ if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
+ ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
+ inputLength) - transposedCount;
+
const int matchWeight = powerIntCapped(typedLetterMultiplier,
- max(inputLength, outputIndex + 1) - ed);
+ max(inputLength, outputLength) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
// TODO: Demote further if there are two or more excessive chars with longer user input?
- if (inputLength > outputIndex + 1) {
+ if (inputLength > outputLength) {
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
}
ed = max(0, ed - quoteDiffCount);
-
- if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
- // Promote a word with just one skipped or excessive char
- if (sameLength) {
- multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
- } else {
+ adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
+ proximityMatchedCount);
+ if (transposedCount < 1) {
+ if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
+ // Promote a word with just one skipped or excessive char
+ if (sameLength) {
+ multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
+ + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength,
+ &finalFreq);
+ } else {
+ multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ }
+ } else if (ed == 0) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ sameLength = true;
}
- } else if (ed == 0) {
- multiplyIntCapped(typedLetterMultiplier, &finalFreq);
- sameLength = true;
}
- adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
- proximityMatchedCount);
} else {
- // TODO: Calculate the edit distance for transposed char
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq);
}
@@ -707,7 +745,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
/ (10 * inputLength
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
if (DEBUG_DICT_FULL) {
- LOGI("Demotion rate for missing character is %d.", demotionRate);
+ AKLOGI("Demotion rate for missing character is %d.", demotionRate);
}
multiplyRate(demotionRate, &finalFreq);
}
@@ -720,8 +758,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
if (excessiveCount > 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
- if (DEBUG_CORRECTION_FREQ) {
- LOGI("Double excessive demotion");
+ if (DEBUG_DICT_FULL) {
+ AKLOGI("Double excessive demotion");
}
// If an excessive character is not adjacent to the left char or the right char,
// we will demote this word.
@@ -729,11 +767,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
}
}
+ const bool performTouchPositionCorrection =
+ CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
+ && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
// Score calibration by touch coordinates is being done only for pure-fat finger typing error
// cases.
+ int additionalProximityCount = 0;
// TODO: Remove this constraint.
- if (CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
- && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0) {
+ if (performTouchPositionCorrection) {
for (int i = 0; i < outputLength; ++i) {
const int squaredDistance = correction->mDistances[i];
if (i < adjustedProximityMatchedCount) {
@@ -744,40 +785,60 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
static const float B = 1.0f;
static const float C = 0.5f;
+ static const float MIN = 0.3f;
static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
static const float R2 = HALF_SCORE_SQUARED_RADIUS;
const float x = (float)squaredDistance
/ ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
- const float factor = (x < R1)
+ const float factor = max((x < R1)
? (A * (R1 - x) + B * x) / R1
- : (B * (R2 - x) + C * (x - R1)) / (R2 - R1);
+ : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
// factor is piecewise linear function like:
// A -_ .
// ^-_ .
// B \ .
- // \ .
- // C \ .
- // 0 R1 R2
- if (factor <= 0) {
- return -1;
- }
+ // \_ .
+ // C ------------.
+ // .
+ // 0 R1 R2 .
multiplyRate((int)(factor * 100), &finalFreq);
} else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
+ } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
+ ++additionalProximityCount;
+ multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
}
}
} else {
+ // Demote additional proximity characters
+ for (int i = 0; i < outputLength; ++i) {
+ const int squaredDistance = correction->mDistances[i];
+ if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
+ ++additionalProximityCount;
+ }
+ }
// Promotion for a word with proximity characters
for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
// A word with proximity corrections
if (DEBUG_DICT_FULL) {
- LOGI("Found a proximity correction.");
+ AKLOGI("Found a proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
- multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
+ if (i < additionalProximityCount) {
+ multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
+ } else {
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
+ }
}
}
+ // If the user types too many(three or more) proximity characters with additional proximity
+ // character,do not treat as the same length word.
+ if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3
+ || transposedCount > 0 || skipped || excessiveCount > 0)) {
+ sameLength = false;
+ }
+
const int errorCount = adjustedProximityMatchedCount > 0
? adjustedProximityMatchedCount
: (proximityMatchedCount + transposedCount);
@@ -787,13 +848,15 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
// Promotion for an exactly matched word
if (ed == 0) {
// Full exact match
- if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
+ if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
+ && quoteDiffCount == 0 && additionalProximityCount == 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
}
}
// Promote a word with no correction
- if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) {
+ if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0
+ && additionalProximityCount == 0) {
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
}
@@ -832,71 +895,101 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
}
if (DEBUG_DICT_FULL) {
- LOGI("calc: %d, %d", outputIndex, sameLength);
+ AKLOGI("calc: %d, %d", outputLength, sameLength);
}
- if (DEBUG_CORRECTION_FREQ) {
- DUMP_WORD(correction->mWord, outputIndex + 1);
- LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
- skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
- quoteDiffCount, ed, finalFreq);
+ if (DEBUG_CORRECTION_FREQ
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
+ DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength);
+ DUMP_WORD(correction->mWord, outputLength);
+ AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
+ skippedCount, transposedCount, excessiveCount, additionalProximityCount,
+ outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq);
}
return finalFreq;
}
/* static */
-int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
- const int firstFreq, const int secondFreq, const Correction* correction,
- const unsigned short *word) {
- const int spaceProximityPos = correction->mSpaceProximityPos;
- const int missingSpacePos = correction->mMissingSpacePos;
- if (DEBUG_DICT) {
- int inputCount = 0;
- if (spaceProximityPos >= 0) ++inputCount;
- if (missingSpacePos >= 0) ++inputCount;
- assert(inputCount <= 1);
- }
- const bool isSpaceProximity = spaceProximityPos >= 0;
- const int inputLength = correction->mInputLength;
- const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
- const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
- : (inputLength - missingSpacePos);
+int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
+ const int *freqArray, const int *wordLengthArray, const int wordCount,
+ const Correction* correction, const bool isSpaceProximity, const unsigned short *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false;
- if (firstWordLength >= 2) {
- firstCapitalizedWordDemotion = isUpperCase(word[0]);
- }
-
bool secondCapitalizedWordDemotion = false;
- if (secondWordLength >= 2) {
- secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+
+ {
+ // TODO: Handle multiple capitalized word demotion properly
+ const int firstWordLength = wordLengthArray[0];
+ const int secondWordLength = wordLengthArray[1];
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ if (secondWordLength >= 2) {
+ // FIXME: word[firstWordLength + 1] is incorrect.
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
}
+
const bool capitalizedWordDemotion =
firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
- if (DEBUG_DICT_FULL) {
- LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ int totalLength = 0;
+ int totalFreq = 0;
+ for (int i = 0; i < wordCount; ++i){
+ const int wordLength = wordLengthArray[i];
+ if (wordLength <= 0) {
+ return 0;
+ }
+ totalLength += wordLength;
+ const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1);
+ int tempFirstFreq = freqArray[i];
+ multiplyRate(demotionRate, &tempFirstFreq);
+ totalFreq += tempFirstFreq;
}
- if (firstWordLength == 0 || secondWordLength == 0) {
+ if (totalLength <= 0 || totalFreq <= 0) {
return 0;
}
- const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
- int tempFirstFreq = firstFreq;
- multiplyRate(firstDemotionRate, &tempFirstFreq);
-
- const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
- int tempSecondFreq = secondFreq;
- multiplyRate(secondDemotionRate, &tempSecondFreq);
-
- const int totalLength = firstWordLength + secondWordLength;
+ // TODO: Currently totalFreq is adjusted to two word metrix.
// Promote pairFreq with multiplying by 2, because the word length is the same as the typed
// length.
- int totalFreq = tempFirstFreq + tempSecondFreq;
+ totalFreq = totalFreq * 2 / wordCount;
+ if (wordCount > 2) {
+ // Safety net for 3+ words -- Caveats: many heuristics and workarounds here.
+ int oneLengthCounter = 0;
+ int twoLengthCounter = 0;
+ for (int i = 0; i < wordCount; ++i) {
+ const int wordLength = wordLengthArray[i];
+ // TODO: Use bigram instead of this safety net
+ if (i < wordCount - 1) {
+ const int nextWordLength = wordLengthArray[i + 1];
+ if (wordLength == 1 && nextWordLength == 2) {
+ // Safety net to filter 1 length and 2 length sequential words
+ return 0;
+ }
+ }
+ const int freq = freqArray[i];
+ // Demote too short weak words
+ if (wordLength <= 4 && freq <= MAX_FREQ * 2 / 3 /* heuristic... */) {
+ multiplyRate(100 * freq / MAX_FREQ, &totalFreq);
+ }
+ if (wordLength == 1) {
+ ++oneLengthCounter;
+ } else if (wordLength == 2) {
+ ++twoLengthCounter;
+ }
+ if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) {
+ // Safety net to filter too many short words
+ return 0;
+ }
+ }
+ multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq);
+ }
// This is a workaround to try offsetting the not-enough-demotion which will be done in
// calcNormalizedScore in Utils.java.
@@ -923,19 +1016,128 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
if (isSpaceProximity) {
// A word pair with one space proximity correction
if (DEBUG_DICT) {
- LOGI("Found a word pair with space proximity correction.");
+ AKLOGI("Found a word pair with space proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &totalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
}
- multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+ if (isSpaceProximity) {
+ multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq);
+ } else {
+ multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
if (capitalizedWordDemotion) {
multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
}
+ if (DEBUG_CORRECTION_FREQ) {
+ AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1],
+ wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq);
+ DUMP_WORD(word, wordLengthArray[0]);
+ }
+
return totalFreq;
}
+/* Damerau-Levenshtein distance */
+inline static int editDistanceInternal(
+ int* editDistanceTable, const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+ int* dp = editDistanceTable;
+ const int li = beforeLength + 1;
+ const int lo = afterLength + 1;
+ for (int i = 0; i < li; ++i) {
+ dp[lo * i] = i;
+ }
+ for (int i = 0; i < lo; ++i) {
+ dp[i] = i;
+ }
+
+ for (int i = 0; i < li - 1; ++i) {
+ for (int j = 0; j < lo - 1; ++j) {
+ const uint32_t ci = toBaseLowerCase(before[i]);
+ const uint32_t co = toBaseLowerCase(after[j]);
+ const uint16_t cost = (ci == co) ? 0 : 1;
+ dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+ min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+ if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
+ && co == toBaseLowerCase(before[i - 1])) {
+ dp[(i + 1) * lo + (j + 1)] = min(
+ dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+ }
+ }
+ }
+
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ for (int i = 0; i < li; ++i) {
+ for (int j = 0; j < lo; ++j) {
+ AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+ }
+ }
+ }
+ return dp[li * lo - 1];
+}
+
+int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ int table[(beforeLength + 1) * (afterLength + 1)];
+ return editDistanceInternal(table, before, beforeLength, after, afterLength);
+}
+
+
+// In dictionary.cpp, getSuggestion() method,
+// suggestion scores are computed using the below formula.
+// original score
+// := pow(mTypedLetterMultiplier (this is defined 2),
+// (the number of matched characters between typed word and suggested word))
+// * (individual word's score which defined in the unigram dictionary,
+// and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+// - If the dictionary word is matched up to the point of the user entry
+// (full match up to min(before.length(), after.length())
+// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+// - If the word is a true full match except for differences in accents or
+// capitalization, then treat it as if the score was 255.
+// - If before.length() == after.length()
+// => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */
+double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength,
+ const int score) {
+ if (0 == beforeLength || 0 == afterLength) {
+ return 0;
+ }
+ const int distance = editDistance(before, beforeLength, after, afterLength);
+ int spaceCount = 0;
+ for (int i = 0; i < afterLength; ++i) {
+ if (after[i] == CODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+
+ if (spaceCount == afterLength) {
+ return 0;
+ }
+
+ const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
+ * pow((double)TYPED_LETTER_MULTIPLIER,
+ (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+
+ // add a weight based on edit distance.
+ // distance <= max(afterLength, beforeLength) == afterLength,
+ // so, 0 <= distance / afterLength <= 1
+ const double weight = 1.0 - (double) distance / afterLength;
+ return (score / maxScore) * weight;
+}
+
} // namespace latinime