aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/correction.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/correction.cpp')
-rw-r--r--native/jni/src/correction.cpp300
1 files changed, 146 insertions, 154 deletions
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 99f5b92c1..49e3e3c8c 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -14,22 +14,22 @@
* limitations under the License.
*/
-#include <assert.h>
-#include <ctype.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstring>
#define LOG_TAG "LatinIME: correction.cpp"
#include "char_utils.h"
#include "correction.h"
#include "defines.h"
-#include "dictionary.h"
-#include "proximity_info.h"
+#include "proximity_info_state.h"
namespace latinime {
+class ProximityInfo;
+
/////////////////////////////
// edit distance funcitons //
/////////////////////////////
@@ -55,25 +55,25 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
}
AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
- (void)c;
+ (void)c; // To suppress compiler warning
}
}
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
- const int inputLength, const unsigned short *output, const int outputLength) {
+ const int inputSize, const unsigned short *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
- // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
- // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
- // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
- int *const current = editDistanceTable + outputLength * (inputLength + 1);
- const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
+ // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
+ // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
+ // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
+ int *const current = editDistanceTable + outputLength * (inputSize + 1);
+ const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
current[0] = outputLength;
const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
- for (int i = 1; i <= inputLength; ++i) {
+ for (int i = 1; i <= inputSize; ++i) {
const uint32_t ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
@@ -84,42 +84,37 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
}
inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
- const int outputLength, const int inputLength) {
+ const int outputLength, const int inputSize) {
if (DEBUG_EDIT_DISTANCE) {
- AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength);
}
- return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
+ return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize];
}
//////////////////////
// inline functions //
//////////////////////
-static const char QUOTE = '\'';
+static const char SINGLE_QUOTE = '\'';
-inline bool Correction::isQuote(const unsigned short c) {
- const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
- return (c == QUOTE && userTypedChar != QUOTE);
+inline bool Correction::isSingleQuote(const unsigned short c) {
+ const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
+ return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
}
////////////////
// Correction //
////////////////
-Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
- : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
- initEditDistance(mEditDistanceTable);
-}
-
void Correction::resetCorrection() {
mTotalTraverseCount = 0;
}
-void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
+void Correction::initCorrection(const ProximityInfo *pi, const int inputSize,
const int maxDepth) {
mProximityInfo = pi;
- mInputLength = inputLength;
+ mInputSize = inputSize;
mMaxDepth = maxDepth;
- mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2;
// TODO: This is not supposed to be required. Check what's going wrong with
// editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
initEditDistance(mEditDistanceTable);
@@ -159,11 +154,13 @@ void Correction::checkState() {
if (mSkipPos >= 0) ++inputCount;
if (mExcessivePos >= 0) ++inputCount;
if (mTransposedPos >= 0) ++inputCount;
- // TODO: remove this assert
- assert(inputCount <= 1);
}
}
+bool Correction::sameAsTyped() {
+ return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
+}
+
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
@@ -171,26 +168,22 @@ int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wo
}
int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
- return getFinalProbabilityInternal(probability, word, wordLength, mInputLength);
+ return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
}
int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int *wordLength, const int inputLength) {
- return getFinalProbabilityInternal(probability, word, wordLength, inputLength);
+ int *wordLength, const int inputSize) {
+ return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
}
int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
- int *wordLength, const int inputLength) {
+ int *wordLength, const int inputSize) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (outputIndex < MIN_SUGGEST_DEPTH) {
- return NOT_A_PROBABILITY;
- }
-
*word = mWord;
int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
- inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength);
+ inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
return finalProbability;
}
@@ -233,7 +226,7 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getInputIndex() {
+int Correction::getInputIndex() const {
return mInputIndex;
}
@@ -277,13 +270,13 @@ bool Correction::needsToPrune() const {
// TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
// Allow one char longer word for missing character
- || (!mDoAutoCompletion && (mOutputIndex > mInputLength));
+ || (!mDoAutoCompletion && (mOutputIndex > mInputSize));
}
void Correction::addCharToCurrentWord(const int32_t c) {
mWord[mOutputIndex] = c;
- const unsigned short *primaryInputWord = mProximityInfo->getPrimaryInputWord();
- calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength,
+ const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
+ calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
mWord, mOutputIndex + 1);
}
@@ -308,13 +301,12 @@ Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
return UNRELATED;
}
-inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
- return type == ProximityInfo::EQUIVALENT_CHAR;
+inline bool isEquivalentChar(ProximityType type) {
+ return type == EQUIVALENT_CHAR;
}
-inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) {
- return type == ProximityInfo::EQUIVALENT_CHAR
- || type == ProximityInfo::NEAR_PROXIMITY_CHAR;
+inline bool isProximityCharOrEquivalentChar(ProximityType type) {
+ return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
}
Correction::CorrectionType Correction::processCharAndCalcState(
@@ -331,25 +323,25 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mDistances[mOutputIndex] = NOT_A_DISTANCE;
// Skip checking this node
- if (mNeedsToTraverseAllNodes || isQuote(c)) {
+ if (mNeedsToTraverseAllNodes || isSingleQuote(c)) {
bool incremented = false;
- if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
+ if (mLastCharExceeded && mInputIndex == mInputSize - 1) {
// TODO: Do not check the proximity if EditDistance exceeds the threshold
- const ProximityInfo::ProximityType matchId =
- mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex);
+ const ProximityType matchId = mProximityInfoState.getMatchedProximityId(
+ mInputIndex, c, true, &proximityIndex);
if (isEquivalentChar(matchId)) {
mLastCharExceeded = false;
--mExcessiveCount;
mDistances[mOutputIndex] =
- mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
- } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
+ mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
+ } else if (matchId == NEAR_PROXIMITY_CHAR) {
mLastCharExceeded = false;
--mExcessiveCount;
++mProximityCount;
- mDistances[mOutputIndex] =
- mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
+ mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(
+ mInputIndex, proximityIndex);
}
- if (!isQuote(c)) {
+ if (!isSingleQuote(c)) {
incrementInputIndex();
incremented = true;
}
@@ -362,7 +354,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
mExcessivePos = mOutputIndex;
}
- if (mExcessivePos < mInputLength - 1) {
+ if (mExcessivePos < mInputSize - 1) {
mExceeding = mExcessivePos == mInputIndex && canTryCorrection;
}
}
@@ -370,7 +362,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mSkipPos >= 0) {
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
if (DEBUG_DICT) {
- assert(mSkipPos == mOutputIndex - 1);
+ // TODO: Enable this assertion.
+ //assert(mSkipPos == mOutputIndex - 1);
}
mSkipPos = mOutputIndex;
}
@@ -381,14 +374,15 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
mTransposedPos = mOutputIndex;
}
- if (mTransposedPos < mInputLength - 1) {
+ if (mTransposedPos < mInputSize - 1) {
mTransposing = mInputIndex == mTransposedPos && canTryCorrection;
}
}
bool secondTransposing = false;
if (mTransposedCount % 2 == 1) {
- if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
+ if (isEquivalentChar(mProximityInfoState.getMatchedProximityId(
+ mInputIndex - 1, c, false))) {
++mTransposedCount;
secondTransposing = true;
} else if (mCorrectionStates[mOutputIndex].mExceeding) {
@@ -399,7 +393,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else {
--mTransposedCount;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
@@ -417,20 +411,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
? (noCorrectionsHappenedSoFar || mProximityCount == 0)
: (noCorrectionsHappenedSoFar && mProximityCount == 0);
- ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
- ? ProximityInfo::EQUIVALENT_CHAR
- : mProximityInfo->getMatchedProximityId(
+ ProximityType matchedProximityCharId = secondTransposing
+ ? EQUIVALENT_CHAR
+ : mProximityInfoState.getMatchedProximityId(
mInputIndex, c, checkProximityChars, &proximityIndex);
- if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
- || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (UNRELATED_CHAR == matchedProximityCharId
+ || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
if (canTryCorrection && mOutputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding
- && isEquivalentChar(mProximityInfo->getMatchedProximityId(
+ && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
@@ -446,27 +440,27 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// Here, we are doing something equivalent to matchedProximityCharId,
// but we already know that "excessive char correction" just happened
// so that we just need to check "mProximityCount == 0".
- matchedProximityCharId = mProximityInfo->getMatchedProximityId(
+ matchedProximityCharId = mProximityInfoState.getMatchedProximityId(
mInputIndex, c, mProximityCount == 0, &proximityIndex);
}
}
- if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
- || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
- if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (UNRELATED_CHAR == matchedProximityCharId
+ || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
mAdditionalProximityMatching = true;
}
// TODO: Optimize
// As the current char turned out to be an unrelated char,
// we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
// here refers to the previous state.
- if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0
+ if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
- && isEquivalentChar(mProximityInfo->getMatchedProximityId(
+ && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion t->e
// Example:
// occaisional -> occa sional
@@ -478,7 +472,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion t->s
// Example:
// chcolate -> chocolate
@@ -490,28 +484,28 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
// Conversion p->s
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char.
++mSkippedCount;
--mProximityCount;
return processSkipChar(c, isTerminal, false);
- } else if (mInputIndex - 1 < mInputLength
+ } else if (mInputIndex - 1 < mInputSize
&& mSkippedCount > 0
&& mCorrectionStates[mOutputIndex].mSkipping
&& mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
&& isProximityCharOrEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion s->a
incrementInputIndex();
--mSkippedCount;
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
- } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
+ } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize
&& isEquivalentChar(
- mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
// 1.2. Excessive or transpose correction
if (mTransposing) {
++mTransposedCount;
@@ -520,7 +514,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
incrementInputIndex();
}
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
@@ -536,20 +530,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// 3. Skip correction
++mSkippedCount;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return processSkipChar(c, isTerminal, false);
- } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
+ } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
// As a last resort, use additional proximity characters
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
@@ -557,7 +551,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
} else {
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
@@ -567,20 +561,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
- // If inputIndex is greater than mInputLength, that means there is no
+ // If inputIndex is greater than mInputSize, that means there is no
// proximity chars. So, we don't need to check proximity.
mMatching = true;
} else if (isEquivalentChar(matchedProximityCharId)) {
mMatching = true;
++mEquivalentCharCount;
- mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
- } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+ mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
+ } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] =
- mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
+ mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex);
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
|| MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
@@ -592,8 +586,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// 4. Last char excessive correction
mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0
- && mProximityCount == 0 && (mInputIndex == mInputLength - 2);
- const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded;
+ && mProximityCount == 0 && (mInputIndex == mInputSize - 2);
+ const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded;
if (mLastCharExceeded) {
++mExcessiveCount;
}
@@ -604,7 +598,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
- mExceeding && mInputIndex == mInputLength - 2;
+ mExceeding && mInputIndex == mInputSize - 2;
// Finally, we are ready to go to the next character, the next "virtual node".
// We should advance the input index.
@@ -620,7 +614,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mTerminalInputIndex = mInputIndex - 1;
mTerminalOutputIndex = mOutputIndex - 1;
if (DEBUG_CORRECTION
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength)
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
&& (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
DUMP_WORD(mWord, mOutputIndex);
AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
@@ -634,13 +628,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
}
-Correction::~Correction() {
-}
-
-inline static int getQuoteCount(const unsigned short* word, const int length) {
+inline static int getQuoteCount(const unsigned short *word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
- if(word[i] == '\'') {
+ if (word[i] == SINGLE_QUOTE) {
++quoteCount;
}
}
@@ -657,12 +648,12 @@ inline static bool isUpperCase(unsigned short c) {
/* static */
int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
- const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction,
- const int inputLength) {
+ const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction,
+ const int inputSize) {
const int excessivePos = correction->getExcessivePos();
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
- const ProximityInfo *proximityInfo = correction->mProximityInfo;
+ const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState;
const int skippedCount = correction->mSkippedCount;
const int transposedCount = correction->mTransposedCount / 2;
const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
@@ -670,55 +661,55 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
const bool lastCharExceeded = correction->mLastCharExceeded;
const bool useFullEditDistance = correction->mUseFullEditDistance;
const int outputLength = outputIndex + 1;
- if (skippedCount >= inputLength || inputLength == 0) {
+ if (skippedCount >= inputSize || inputSize == 0) {
return -1;
}
// TODO: find more robust way
- bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2)
- : (inputLength == inputIndex + 1);
+ bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2)
+ : (inputSize == inputIndex + 1);
// TODO: use mExcessiveCount
- const int matchCount = inputLength - correction->mProximityCount - excessiveCount;
+ const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
- const unsigned short* word = correction->mWord;
+ const unsigned short *word = correction->mWord;
const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
- - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
+ - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize));
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
if (DEBUG_DICT_FULL) {
- dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
+ dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength);
}
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
if (DEBUG_CORRECTION_FREQ
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
AKLOGI("FinalFreq0: %d", finalFreq);
}
// TODO: Optimize this.
if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
- ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
- inputLength) - transposedCount;
+ ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength,
+ inputSize) - transposedCount;
const int matchWeight = powerIntCapped(typedLetterMultiplier,
- max(inputLength, outputLength) - ed);
+ max(inputSize, outputLength) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
// TODO: Demote further if there are two or more excessive chars with longer user input?
- if (inputLength > outputLength) {
+ if (inputSize > outputLength) {
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
}
ed = max(0, ed - quoteDiffCount);
- adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
+ adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)),
proximityMatchedCount);
if (transposedCount <= 0) {
- if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
+ if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) {
// Promote a word with just one skipped or excessive char
if (sameLength) {
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
@@ -737,8 +728,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
multiplyIntCapped(matchWeight, &finalFreq);
}
- if (proximityInfo->getMatchedProximityId(0, word[0], true)
- == ProximityInfo::UNRELATED_CHAR) {
+ if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) {
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
}
@@ -748,8 +738,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// Demotion for a word with missing character
if (skipped) {
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
- * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
- / (10 * inputLength
+ * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
+ / (10 * inputSize
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
if (DEBUG_DICT_FULL) {
AKLOGI("Demotion rate for missing character is %d.", demotionRate);
@@ -764,7 +754,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// Demotion for a word with excessive character
if (excessiveCount > 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
- if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
+ if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_DICT_FULL) {
AKLOGI("Double excessive demotion");
}
@@ -775,8 +765,9 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
}
const bool performTouchPositionCorrection =
- CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
- && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
+ CALIBRATE_SCORE_BY_TOUCH_COORDINATES
+ && proximityInfoState->touchPositionCorrectionEnabled()
+ && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
// Score calibration by touch coordinates is being done only for pure-fat finger typing error
// cases.
int additionalProximityCount = 0;
@@ -795,8 +786,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
static const float MIN = 0.3f;
static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
static const float R2 = HALF_SCORE_SQUARED_RADIUS;
- const float x = (float)squaredDistance
- / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
+ const float x = static_cast<float>(squaredDistance)
+ / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
const float factor = max((x < R1)
? (A * (R1 - x) + B * x) / R1
: (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
@@ -850,7 +841,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
? adjustedProximityMatchedCount
: (proximityMatchedCount + transposedCount);
multiplyRate(
- 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
+ 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq);
// Promotion for an exactly matched word
if (ed == 0) {
@@ -885,7 +876,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
e ... exceeding
p ... proximity matching
*/
- if (matchCount == inputLength && matchCount >= 2 && !skipped
+ if (matchCount == inputSize && matchCount >= 2 && !skipped
&& word[matchCount] == word[matchCount - 1]) {
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
}
@@ -895,8 +886,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
multiplyIntCapped(fullWordMultiplier, &finalFreq);
}
- if (useFullEditDistance && outputLength > inputLength + 1) {
- const int diff = outputLength - inputLength - 1;
+ if (useFullEditDistance && outputLength > inputSize + 1) {
+ const int diff = outputLength - inputSize - 1;
const int divider = diff < 31 ? 1 << diff : S_INT_MAX;
finalFreq = divider > finalFreq ? 1 : finalFreq / divider;
}
@@ -906,8 +897,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
}
if (DEBUG_CORRECTION_FREQ
- && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
- DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength);
+ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
+ DUMP_WORD(correction->getPrimaryInputWord(), inputSize);
DUMP_WORD(correction->mWord, outputLength);
AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
skippedCount, transposedCount, excessiveCount, additionalProximityCount,
@@ -920,7 +911,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
/* static */
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount,
- const Correction* correction, const bool isSpaceProximity, const unsigned short *word) {
+ const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false;
@@ -946,7 +937,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
int totalLength = 0;
int totalFreq = 0;
- for (int i = 0; i < wordCount; ++i){
+ for (int i = 0; i < wordCount; ++i) {
const int wordLength = wordLengthArray[i];
if (wordLength <= 0) {
return 0;
@@ -1050,10 +1041,10 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
/* Damerau-Levenshtein distance */
inline static int editDistanceInternal(
- int* editDistanceTable, const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength) {
+ int *editDistanceTable, const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int* dp = editDistanceTable;
+ int *dp = editDistanceTable;
const int li = beforeLength + 1;
const int lo = afterLength + 1;
for (int i = 0; i < li; ++i) {
@@ -1089,8 +1080,8 @@ inline static int editDistanceInternal(
return dp[li * lo - 1];
}
-int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength) {
+int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength) {
int table[(beforeLength + 1) * (afterLength + 1)];
return editDistanceInternal(table, before, beforeLength, after, afterLength);
}
@@ -1099,7 +1090,7 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
// In dictionary.cpp, getSuggestion() method,
// suggestion scores are computed using the below formula.
// original score
-// := pow(mTypedLetterMultiplier (this is defined 2),
+// := powf(mTypedLetterMultiplier (this is defined 2),
// (the number of matched characters between typed word and suggested word))
// * (individual word's score which defined in the unigram dictionary,
// and this score is defined in range [0, 255].)
@@ -1111,15 +1102,15 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
// capitalization, then treat it as if the score was 255.
// - If before.length() == after.length()
// => multiply by mFullWordMultiplier (this is defined 2))
-// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
// For historical reasons we ignore the 1.2 modifier (because the measure for a good
// autocorrection threshold was done at a time when it didn't exist). This doesn't change
// the result.
-// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
/* static */
-float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
- const int beforeLength, const unsigned short* after, const int afterLength,
+float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
+ const int beforeLength, const unsigned short *after, const int afterLength,
const int score) {
if (0 == beforeLength || 0 == afterLength) {
return 0;
@@ -1136,15 +1127,16 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* be
return 0;
}
- const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
- * pow((float)TYPED_LETTER_MULTIPLIER,
- (float)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+ const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
+ : static_cast<float>(MAX_INITIAL_SCORE)
+ * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
+ static_cast<float>(min(beforeLength, afterLength - spaceCount)))
+ * static_cast<float>(FULL_WORD_MULTIPLIER);
// add a weight based on edit distance.
// distance <= max(afterLength, beforeLength) == afterLength,
// so, 0 <= distance / afterLength <= 1
- const float weight = 1.0 - (float) distance / afterLength;
- return (score / maxScore) * weight;
+ const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
+ return (static_cast<float>(score) / maxScore) * weight;
}
-
} // namespace latinime