diff options
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/correction.cpp | 146 | ||||
-rw-r--r-- | native/src/correction.h | 2 | ||||
-rw-r--r-- | native/src/defines.h | 9 | ||||
-rw-r--r-- | native/src/proximity_info.cpp | 18 | ||||
-rw-r--r-- | native/src/proximity_info.h | 4 |
5 files changed, 127 insertions, 52 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 7323747d7..dafc0fd7f 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -383,7 +383,10 @@ Correction::CorrectionType Correction::processCharAndCalcState( incrementInputIndex(); } else { --mTransposedCount; - if (DEBUG_CORRECTION) { + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); @@ -404,13 +407,17 @@ Correction::CorrectionType Correction::processCharAndCalcState( : mProximityInfo->getMatchedProximityId( mInputIndex, c, checkProximityChars, &proximityIndex); - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) { + if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId + || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { if (canTryCorrection && mOutputIndex > 0 && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mExceeding && isEquivalentChar(mProximityInfo->getMatchedProximityId( mInputIndex, mWord[mOutputIndex - 1], false))) { - if (DEBUG_CORRECTION) { + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]); } // Conversion p->e @@ -429,7 +436,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) { + if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId + || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { // TODO: Optimize // As the current char turned out to be an unrelated char, // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] @@ -481,12 +489,47 @@ Correction::CorrectionType Correction::processCharAndCalcState( ++mExcessiveCount; incrementInputIndex(); } + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + DUMP_WORD(mWord, mOutputIndex); + if (mTransposing) { + AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } else { + AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } + } } else if (mSkipping) { // 3. Skip correction ++mSkippedCount; + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } return processSkipChar(c, isTerminal, false); + } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + // As a last resort, use additional proximity characters + mProximityMatching = true; + ++mProximityCount; + mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } } else { - if (DEBUG_CORRECTION) { + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); @@ -506,6 +549,13 @@ Correction::CorrectionType Correction::processCharAndCalcState( ++mProximityCount; mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 + || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { + AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, + mTransposedCount, mExcessiveCount, c); + } } addCharToCurrentWord(c); @@ -539,7 +589,9 @@ Correction::CorrectionType Correction::processCharAndCalcState( || isSameAsUserTypedLength) && isTerminal) { mTerminalInputIndex = mInputIndex - 1; mTerminalOutputIndex = mOutputIndex - 1; - if (DEBUG_CORRECTION) { + if (DEBUG_CORRECTION + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); @@ -678,7 +730,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const if (excessiveCount > 0) { multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) { - if (DEBUG_CORRECTION_FREQ) { + if (DEBUG_DICT_FULL) { AKLOGI("Double excessive demotion"); } // If an excessive character is not adjacent to the left char or the right char, @@ -687,51 +739,46 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } } + const bool performTouchPositionCorrection = + CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled() + && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; // Score calibration by touch coordinates is being done only for pure-fat finger typing error // cases. // TODO: Remove this constraint. - if (CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled() - && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0) { - for (int i = 0; i < outputLength; ++i) { - const int squaredDistance = correction->mDistances[i]; - if (i < adjustedProximityMatchedCount) { - multiplyIntCapped(typedLetterMultiplier, &finalFreq); - } - if (squaredDistance >= 0) { - // Promote or demote the score according to the distance from the sweet spot - static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; - static const float B = 1.0f; - static const float C = 0.5f; - static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; - static const float R2 = HALF_SCORE_SQUARED_RADIUS; - const float x = (float)squaredDistance - / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; - const float factor = (x < R1) - ? (A * (R1 - x) + B * x) / R1 - : (B * (R2 - x) + C * (x - R1)) / (R2 - R1); - // factor is piecewise linear function like: - // A -_ . - // ^-_ . - // B \ . - // \ . - // C \ . - // 0 R1 R2 - if (factor <= 0) { - return -1; - } - multiplyRate((int)(factor * 100), &finalFreq); - } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { - multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); - } - } - } else { - // Promotion for a word with proximity characters - for (int i = 0; i < adjustedProximityMatchedCount; ++i) { - // A word with proximity corrections - if (DEBUG_DICT_FULL) { - AKLOGI("Found a proximity correction."); - } + for (int i = 0; i < outputLength; ++i) { + const int squaredDistance = correction->mDistances[i]; + if (i < adjustedProximityMatchedCount) { multiplyIntCapped(typedLetterMultiplier, &finalFreq); + } + + if (performTouchPositionCorrection && squaredDistance >= 0) { + // Promote or demote the score according to the distance from the sweet spot + static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; + static const float B = 1.0f; + static const float C = 0.5f; + static const float MIN = 0.3f; + static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; + static const float R2 = HALF_SCORE_SQUARED_RADIUS; + const float x = (float)squaredDistance + / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; + const float factor = max((x < R1) + ? (A * (R1 - x) + B * x) / R1 + : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); + // factor is piecewise linear function like: + // A -_ . + // ^-_ . + // B \ . + // \_ . + // C ------------. + // . + // 0 R1 R2 . + multiplyRate((int)(factor * 100), &finalFreq); + } else if (performTouchPositionCorrection + && squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { + multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); + } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { + multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); + } else if (i < adjustedProximityMatchedCount) { multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); } } @@ -794,7 +841,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const AKLOGI("calc: %d, %d", outputLength, sameLength); } - if (DEBUG_CORRECTION_FREQ) { + if (DEBUG_CORRECTION_FREQ + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { DUMP_WORD(correction->mWord, outputLength); AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded, diff --git a/native/src/correction.h b/native/src/correction.h index b246070fe..a711c994d 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -85,7 +85,7 @@ class Correction { } } - Correction(const int typedLetterMultiplier, const int fullWordMultiplier); + Correction(const int typedLetterMultiplier, const int fullWordMultiplier); void initCorrection( const ProximityInfo *pi, const int inputLength, const int maxWordLength); void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); diff --git a/native/src/defines.h b/native/src/defines.h index 3f3f5ba5c..02c1fe0a2 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -172,6 +172,7 @@ static void prof_out(void) { #define NOT_A_COORDINATE -1 #define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2 #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3 +#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4 #define NOT_A_INDEX -1 #define NOT_A_FREQUENCY -1 @@ -194,6 +195,7 @@ static void prof_out(void) { #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90 +#define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 30 #define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105 #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160 #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 @@ -210,6 +212,9 @@ static void prof_out(void) { // This is only used for the size of array. Not to be used in c functions. #define MAX_WORD_LENGTH_INTERNAL 48 +// This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java +#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 + // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used // for better performance. // Holds up to 1 candidate for each word @@ -241,4 +246,8 @@ static void prof_out(void) { // The ratio of neutral area radius to sweet spot radius. #define NEUTRAL_AREA_RADIUS_RATIO 1.3f +// DEBUG +#define INPUTLENGTH_FOR_DEBUG -1 +#define MIN_OUTPUT_INDEX_FOR_DEBUG -1 + #endif // LATINIME_DEFINES_H diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp index e0e938099..b6bab2274 100644 --- a/native/src/proximity_info.cpp +++ b/native/src/proximity_info.cpp @@ -261,7 +261,8 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde // Not an exact nor an accent-alike match: search the list of close keys int j = 1; - while (j < MAX_PROXIMITY_CHARS_SIZE && currentChars[j] > 0) { + while (j < MAX_PROXIMITY_CHARS_SIZE + && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); if (matched) { if (proximityIndex) { @@ -271,6 +272,21 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde } ++j; } + if (j < MAX_PROXIMITY_CHARS_SIZE + && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + ++j; + while (j < MAX_PROXIMITY_CHARS_SIZE + && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return ADDITIONAL_PROXIMITY_CHAR; + } + ++j; + } + } // Was not included, signal this as an unrelated character. return UNRELATED_CHAR; diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h index 9ca5505a7..b77c1bb0a 100644 --- a/native/src/proximity_info.h +++ b/native/src/proximity_info.h @@ -38,7 +38,9 @@ class ProximityInfo { // It is a char located nearby on the keyboard NEAR_PROXIMITY_CHAR, // It is an unrelated char - UNRELATED_CHAR + UNRELATED_CHAR, + // Additional proximity char which can differ by language. + ADDITIONAL_PROXIMITY_CHAR } ProximityType; ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth, |