diff options
Diffstat (limited to 'native/jni/src')
-rw-r--r-- | native/jni/src/correction.cpp | 24 | ||||
-rw-r--r-- | native/jni/src/correction.h | 2 | ||||
-rw-r--r-- | native/jni/src/defines.h | 19 | ||||
-rw-r--r-- | native/jni/src/proximity_info.cpp | 2 | ||||
-rw-r--r-- | native/jni/src/proximity_info.h | 8 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state.cpp | 33 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state.h | 4 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state_utils.cpp | 6 | ||||
-rw-r--r-- | native/jni/src/proximity_info_utils.h | 2 |
9 files changed, 69 insertions, 31 deletions
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index d4bd4aa00..b8690eb94 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -214,7 +214,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputSize - 1) { // TODO: Do not check the proximity if EditDistance exceeds the threshold - const ProximityType matchId = mProximityInfoState.getMatchedProximityId( + const ProximityType matchId = mProximityInfoState.getProximityType( mInputIndex, c, true, &proximityIndex); if (isEquivalentChar(matchId)) { mLastCharExceeded = false; @@ -268,7 +268,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons bool secondTransposing = false; if (mTransposedCount % 2 == 1) { - if (isEquivalentChar(mProximityInfoState.getMatchedProximityId( + if (isEquivalentChar(mProximityInfoState.getProximityType( mInputIndex - 1, c, false))) { ++mTransposedCount; secondTransposing = true; @@ -300,7 +300,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons ProximityType matchedProximityCharId = secondTransposing ? EQUIVALENT_CHAR - : mProximityInfoState.getMatchedProximityId( + : mProximityInfoState.getProximityType( mInputIndex, c, checkProximityChars, &proximityIndex); if (UNRELATED_CHAR == matchedProximityCharId @@ -308,7 +308,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons if (canTryCorrection && mOutputIndex > 0 && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mExceeding - && isEquivalentChar(mProximityInfoState.getMatchedProximityId( + && isEquivalentChar(mProximityInfoState.getProximityType( mInputIndex, mWord[mOutputIndex - 1], false))) { if (DEBUG_CORRECTION && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) @@ -327,7 +327,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons // Here, we are doing something equivalent to matchedProximityCharId, // but we already know that "excessive char correction" just happened // so that we just need to check "mProximityCount == 0". - matchedProximityCharId = mProximityInfoState.getMatchedProximityId( + matchedProximityCharId = mProximityInfoState.getProximityType( mInputIndex, c, mProximityCount == 0, &proximityIndex); } } @@ -344,10 +344,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0 && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing - && isEquivalentChar(mProximityInfoState.getMatchedProximityId( + && isEquivalentChar(mProximityInfoState.getProximityType( mInputIndex, mWord[mOutputIndex - 1], false)) && isEquivalentChar( - mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) { // Conversion t->e // Example: // occaisional -> occa sional @@ -359,7 +359,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing && isEquivalentChar( - mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { + mProximityInfoState.getProximityType(mInputIndex - 1, c, false))) { // Conversion t->s // Example: // chcolate -> chocolate @@ -371,7 +371,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mSkipping && isEquivalentChar( - mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { + mProximityInfoState.getProximityType(mInputIndex - 1, c, false))) { // Conversion p->s // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of // proximity chars of "s", but it should rather be handled as a skipped char. @@ -383,7 +383,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons && mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching && isProximityCharOrEquivalentChar( - mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) { // Conversion s->a incrementInputIndex(); --mSkippedCount; @@ -392,7 +392,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize && isEquivalentChar( - mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) { // 1.2. Excessive or transpose correction if (mTransposing) { ++mTransposedCount; @@ -614,7 +614,7 @@ inline static bool isUpperCase(unsigned short c) { multiplyIntCapped(matchWeight, &finalFreq); } - if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) { + if (proximityInfoState->getProximityType(0, word[0], true) == UNRELATED_CHAR) { multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); } diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 34f794d84..f0d62102f 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -119,7 +119,7 @@ class Correction { // proximity info state void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes, const int inputSize, const int *xCoordinates, const int *yCoordinates) { - mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH, + mProximityInfoState.initInputParams(0, static_cast<float>(MAX_VALUE_FOR_WEIGHTING), proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); } diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index a84dfddec..11dc3d20e 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -341,8 +341,9 @@ static inline void prof_out(void) { #define MAX_DEPTH_MULTIPLIER 3 #define FIRST_WORD_INDEX 0 -// Max Distance between point to key -#define MAX_POINT_TO_KEY_LENGTH 10000000 +// Max value for length, distance and probability which are used in weighting +// TODO: Remove +#define MAX_VALUE_FOR_WEIGHTING 10000000 // The max number of the keys in one keyboard layout #define MAX_KEY_COUNT_IN_A_KEYBOARD 64 @@ -411,4 +412,18 @@ typedef enum { A_DOUBLE_LETTER, A_STRONG_DOUBLE_LETTER } DoubleLetterLevel; + +typedef enum { + CT_MATCH, + CT_OMISSION, + CT_INSERTION, + CT_TRANSPOSITION, + CT_SUBSTITUTION, + CT_SPACE_SUBSTITUTION, + CT_SPACE_OMISSION, + CT_COMPLETION, + CT_TERMINAL, + CT_NEW_WORD, + CT_NEW_WORD_BIGRAM, +} CorrectionType; #endif // LATINIME_DEFINES_H diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index 81eb0b37d..74b5e0131 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -204,6 +204,6 @@ int ProximityInfo::getKeyKeyDistanceG(const int keyId0, const int keyId1) const if (keyId0 >= 0 && keyId1 >= 0) { return mKeyKeyDistancesG[keyId0][keyId1]; } - return MAX_POINT_TO_KEY_LENGTH; + return MAX_VALUE_FOR_WEIGHTING; } } // namespace latinime diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index 22bbdf165..57a175d2c 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -70,7 +70,7 @@ class ProximityInfo { int getKeyCenterYOfKeyIdG(int keyId) const; int getKeyKeyDistanceG(int keyId0, int keyId1) const; - void AK_FORCE_INLINE initializeProximities(const int *const inputCodes, + AK_FORCE_INLINE void initializeProximities(const int *const inputCodes, const int *const inputXCoordinates, const int *const inputYCoordinates, const int inputSize, int *allInputCodes) const { ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates, @@ -79,10 +79,14 @@ class ProximityInfo { KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes); } - int AK_FORCE_INLINE getKeyIndexOf(const int c) const { + AK_FORCE_INLINE int getKeyIndexOf(const int c) const { return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap); } + AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const { + return getKeyIndexOf(codePoint) != NOT_AN_INDEX; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index d13248caf..00e7ffc6c 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -170,7 +170,7 @@ float ProximityInfoState::getPointToKeyLength( return 0.0f; } // If the char is not a key on the keyboard then return the max length. - return MAX_POINT_TO_KEY_LENGTH; + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } float ProximityInfoState::getPointToKeyLength_G(const int inputIndex, const int codePoint) const { @@ -193,20 +193,20 @@ float ProximityInfoState::getPointToKeyByIdLength(const int inputIndex, const in // the same position. We want to see if c is in it: if so, then the word contains at that position // a character close to what the user typed. // What the user typed is actually the first character of the array. -// proximityIndex is a pointer to the variable where getMatchedProximityId returns the index of c +// proximityIndex is a pointer to the variable where getProximityType returns the index of c // in the proximity chars of the input index. // Notice : accented characters do not have a proximity list, so they are alone in their list. The // non-accented version of the character should be considered "close", but not the other keys close // to the non-accented version. -ProximityType ProximityInfoState::getMatchedProximityId(const int index, const int c, +ProximityType ProximityInfoState::getProximityType(const int index, const int codePoint, const bool checkProximityChars, int *proximityIndex) const { const int *currentCodePoints = getProximityCodePointsAt(index); const int firstCodePoint = currentCodePoints[0]; - const int baseLowerC = toBaseLowerCase(c); + const int baseLowerC = toBaseLowerCase(codePoint); // The first char in the array is what user typed. If it matches right away, that means the // user typed that same char for this pos. - if (firstCodePoint == baseLowerC || firstCodePoint == c) { + if (firstCodePoint == baseLowerC || firstCodePoint == codePoint) { return EQUIVALENT_CHAR; } @@ -222,7 +222,8 @@ ProximityType ProximityInfoState::getMatchedProximityId(const int index, const i int j = 1; while (j < MAX_PROXIMITY_CHARS_SIZE && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); + const bool matched = (currentCodePoints[j] == baseLowerC + || currentCodePoints[j] == codePoint); if (matched) { if (proximityIndex) { *proximityIndex = j; @@ -236,7 +237,8 @@ ProximityType ProximityInfoState::getMatchedProximityId(const int index, const i ++j; while (j < MAX_PROXIMITY_CHARS_SIZE && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); + const bool matched = (currentCodePoints[j] == baseLowerC + || currentCodePoints[j] == codePoint); if (matched) { if (proximityIndex) { *proximityIndex = j; @@ -250,6 +252,21 @@ ProximityType ProximityInfoState::getMatchedProximityId(const int index, const i return UNRELATED_CHAR; } +ProximityType ProximityInfoState::getProximityTypeG(const int index, const int codePoint) const { + if (!isUsed()) { + return UNRELATED_NOR_SUBSTITUTION_CHAR; + } + const int lowerCodePoint = toLowerCase(codePoint); + const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint); + for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) { + if (mSampledSearchKeyVectors[index][i] == lowerCodePoint + || mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) { + return EQUIVALENT_CHAR; + } + } + return UNRELATED_NOR_SUBSTITUTION_CHAR; +} + bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const { ASSERT(keyId >= 0 && index >= 0 && index < mSampledInputSize); return mSampledSearchKeySets[index].test(keyId); @@ -297,6 +314,6 @@ float ProximityInfoState::getProbability(const int index, const int keyIndex) co if (it != mCharProbabilities[index].end()) { return it->second; } - return static_cast<float>(MAX_POINT_TO_KEY_LENGTH); + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } } // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index c4cbd582d..dd1eb764f 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -152,9 +152,11 @@ class ProximityInfoState { float getPointToKeyLength(const int inputIndex, const int codePoint, const float scale) const; float getPointToKeyLength_G(const int inputIndex, const int codePoint) const; - ProximityType getMatchedProximityId(const int index, const int c, + ProximityType getProximityType(const int index, const int codePoint, const bool checkProximityChars, int *proximityIndex = 0) const; + ProximityType getProximityTypeG(const int index, const int codePoint) const; + const std::vector<int> *getSearchKeyVector(const int index) const { return &mSampledSearchKeyVectors[index]; } diff --git a/native/jni/src/proximity_info_state_utils.cpp b/native/jni/src/proximity_info_state_utils.cpp index 2bf327fcc..2ca39f9e3 100644 --- a/native/jni/src/proximity_info_state_utils.cpp +++ b/native/jni/src/proximity_info_state_utils.cpp @@ -645,7 +645,7 @@ namespace latinime { return min((*SampledDistanceCache_G)[index] * scale, maxPointToKeyLength); } // If the char is not a key on the keyboard then return the max length. - return static_cast<float>(MAX_POINT_TO_KEY_LENGTH); + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } /* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength, @@ -678,7 +678,7 @@ namespace latinime { const float currentAngle = getPointAngle(sampledInputXs, sampledInputYs, i); const float speedRate = (*sampledSpeedRates)[i]; - float nearestKeyDistance = static_cast<float>(MAX_POINT_TO_KEY_LENGTH); + float nearestKeyDistance = static_cast<float>(MAX_VALUE_FOR_WEIGHTING); for (int j = 0; j < keyCount; ++j) { if ((*SampledNearKeySets)[i].test(j)) { const float distance = getPointToKeyByIdLength( @@ -1016,7 +1016,7 @@ namespace latinime { float sumLogProbability = 0.0f; // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases. for (int i = 0; i < sampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) { - float minLogProbability = static_cast<float>(MAX_POINT_TO_KEY_LENGTH); + float minLogProbability = static_cast<float>(MAX_VALUE_FOR_WEIGHTING); int character = NOT_AN_INDEX; for (hash_map_compat<int, float>::const_iterator it = (*charProbabilities)[i].begin(); it != (*charProbabilities)[i].end(); ++it) { diff --git a/native/jni/src/proximity_info_utils.h b/native/jni/src/proximity_info_utils.h index 51cafba2c..71c97e325 100644 --- a/native/jni/src/proximity_info_utils.h +++ b/native/jni/src/proximity_info_utils.h @@ -226,7 +226,7 @@ class ProximityInfoUtils { const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights, const int keyId, const int x, const int y) { // NOT_A_ID is -1, but return whenever < 0 just in case - if (keyId < 0) return MAX_POINT_TO_KEY_LENGTH; + if (keyId < 0) return MAX_VALUE_FOR_WEIGHTING; const int left = keyXCoordinates[keyId]; const int top = keyYCoordinates[keyId]; const int right = left + keyWidths[keyId]; |