diff options
Diffstat (limited to 'native/jni/src')
26 files changed, 812 insertions, 402 deletions
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index d676cca63..5f11ae822 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -29,8 +29,6 @@ class BigramDictionary { BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, unsigned short *outWords, int *frequencies, int *outputTypes) const; - int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, - const bool forceLowerCaseSearch) const; void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const; bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; @@ -45,6 +43,8 @@ class BigramDictionary { bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; + int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, + const bool forceLowerCaseSearch) const; const unsigned char *DICT; const int MAX_WORD_LENGTH; diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 4cabc8404..25d504bfb 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -43,6 +43,10 @@ class BinaryFormat { static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; + // Flag for non-words (typically, shortcut only entries) + static const int FLAG_IS_NOT_A_WORD = 0x02; + // Flag for blacklist + static const int FLAG_IS_BLACKLISTED = 0x01; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes @@ -61,13 +65,6 @@ class BinaryFormat { static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); - const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; - const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; - const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; - - public: const static int UNKNOWN_FORMAT = -1; // Originally, format version 1 had a 16-bit magic number, then the version number `01' // then options that must be 0. Hence the first 32-bits of the format are always as follow @@ -94,7 +91,6 @@ class BinaryFormat { static int skipFrequency(const uint8_t flags, const int pos); static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos); static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); - static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); @@ -118,6 +114,13 @@ class BinaryFormat { REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4 }; const static unsigned int NO_FLAGS = 0; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); + const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; + const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; + const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; + static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); }; inline int BinaryFormat::detectFormat(const uint8_t *const dict) { diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp index 223291f60..9d886da31 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/char_utils.cpp @@ -889,7 +889,7 @@ static int compare_pair_capital(const void *a, const void *b) { - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital); } -unsigned short latin_tolower(unsigned short c) { +unsigned short latin_tolower(const unsigned short c) { struct LatinCapitalSmallPair *p = static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP, sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]), diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index edd96bbb0..b30677fa7 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -17,21 +17,23 @@ #ifndef LATINIME_CHAR_UTILS_H #define LATINIME_CHAR_UTILS_H +#include <cctype> + namespace latinime { -inline static int isAsciiUpper(unsigned short c) { - return c >= 'A' && c <= 'Z'; +inline static bool isAsciiUpper(unsigned short c) { + return isupper(static_cast<int>(c)) != 0; } inline static unsigned short toAsciiLower(unsigned short c) { return c - 'A' + 'a'; } -inline static int isAscii(unsigned short c) { - return c <= 127; +inline static bool isAscii(unsigned short c) { + return isascii(static_cast<int>(c)) != 0; } -unsigned short latin_tolower(unsigned short c); +unsigned short latin_tolower(const unsigned short c); /** * Table mapping most combined Latin, Greek, and Cyrillic characters diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index e55da0113..9ad65b09d 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -61,19 +61,19 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, } inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, - const int inputLength, const unsigned short *output, const int outputLength) { + const int inputSize, const unsigned short *output, const int outputLength) { // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. - // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j]. - // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated, - // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength]. - int *const current = editDistanceTable + outputLength * (inputLength + 1); - const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1); + // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. + // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, + // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize]. + int *const current = editDistanceTable + outputLength * (inputSize + 1); + const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1); const int *const prevprev = - outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0; + outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; current[0] = outputLength; const uint32_t co = toBaseLowerCase(output[outputLength - 1]); const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; - for (int i = 1; i <= inputLength; ++i) { + for (int i = 1; i <= inputSize; ++i) { const uint32_t ci = toBaseLowerCase(input[i - 1]); const uint16_t cost = (ci == co) ? 0 : 1; current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); @@ -84,11 +84,11 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne } inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, - const int outputLength, const int inputLength) { + const int outputLength, const int inputSize) { if (DEBUG_EDIT_DISTANCE) { - AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); + AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength); } - return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength]; + return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize]; } ////////////////////// @@ -109,12 +109,12 @@ void Correction::resetCorrection() { mTotalTraverseCount = 0; } -void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, +void Correction::initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth) { mProximityInfo = pi; - mInputLength = inputLength; + mInputSize = inputSize; mMaxDepth = maxDepth; - mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; + mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2; // TODO: This is not supposed to be required. Check what's going wrong with // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] initEditDistance(mEditDistanceTable); @@ -168,26 +168,22 @@ int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wo } int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { - return getFinalProbabilityInternal(probability, word, wordLength, mInputLength); + return getFinalProbabilityInternal(probability, word, wordLength, mInputSize); } int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int *wordLength, const int inputLength) { - return getFinalProbabilityInternal(probability, word, wordLength, inputLength); + int *wordLength, const int inputSize) { + return getFinalProbabilityInternal(probability, word, wordLength, inputSize); } int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, - int *wordLength, const int inputLength) { + int *wordLength, const int inputSize) { const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; - if (outputIndex < MIN_SUGGEST_DEPTH) { - return NOT_A_PROBABILITY; - } - *word = mWord; int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( - inputIndex, outputIndex, probability, mEditDistanceTable, this, inputLength); + inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize); return finalProbability; } @@ -230,7 +226,7 @@ int Correction::goDownTree( } // TODO: remove -int Correction::getInputIndex() { +int Correction::getInputIndex() const { return mInputIndex; } @@ -274,13 +270,13 @@ bool Correction::needsToPrune() const { // TODO: use edit distance here return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance // Allow one char longer word for missing character - || (!mDoAutoCompletion && (mOutputIndex > mInputLength)); + || (!mDoAutoCompletion && (mOutputIndex > mInputSize)); } void Correction::addCharToCurrentWord(const int32_t c) { mWord[mOutputIndex] = c; const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); - calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength, + calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord, mOutputIndex + 1); } @@ -329,7 +325,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( // Skip checking this node if (mNeedsToTraverseAllNodes || isSingleQuote(c)) { bool incremented = false; - if (mLastCharExceeded && mInputIndex == mInputLength - 1) { + if (mLastCharExceeded && mInputIndex == mInputSize - 1) { // TODO: Do not check the proximity if EditDistance exceeds the threshold const ProximityType matchId = mProximityInfoState.getMatchedProximityId( mInputIndex, c, true, &proximityIndex); @@ -358,7 +354,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { mExcessivePos = mOutputIndex; } - if (mExcessivePos < mInputLength - 1) { + if (mExcessivePos < mInputSize - 1) { mExceeding = mExcessivePos == mInputIndex && canTryCorrection; } } @@ -377,7 +373,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) { mTransposedPos = mOutputIndex; } - if (mTransposedPos < mInputLength - 1) { + if (mTransposedPos < mInputSize - 1) { mTransposing = mInputIndex == mTransposedPos && canTryCorrection; } } @@ -396,7 +392,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( } else { --mTransposedCount; if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); @@ -427,7 +423,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && isEquivalentChar(mProximityInfoState.getMatchedProximityId( mInputIndex, mWord[mOutputIndex - 1], false))) { if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]); @@ -457,7 +453,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( // As the current char turned out to be an unrelated char, // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] // here refers to the previous state. - if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0 + if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0 && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing && isEquivalentChar(mProximityInfoState.getMatchedProximityId( @@ -494,7 +490,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( ++mSkippedCount; --mProximityCount; return processSkipChar(c, isTerminal, false); - } else if (mInputIndex - 1 < mInputLength + } else if (mInputIndex - 1 < mInputSize && mSkippedCount > 0 && mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching @@ -506,7 +502,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mProximityMatching = true; ++mProximityCount; mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; - } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength + } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize && isEquivalentChar( mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // 1.2. Excessive or transpose correction @@ -517,7 +513,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( incrementInputIndex(); } if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); @@ -533,7 +529,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( // 3. Skip correction ++mSkippedCount; if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, @@ -546,7 +542,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( ++mProximityCount; mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, @@ -554,7 +550,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } else { if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); @@ -564,7 +560,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( return processUnrelatedCorrectionType(); } } else if (secondTransposing) { - // If inputIndex is greater than mInputLength, that means there is no + // If inputIndex is greater than mInputSize, that means there is no // proximity chars. So, we don't need to check proximity. mMatching = true; } else if (isEquivalentChar(matchedProximityCharId)) { @@ -577,7 +573,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex); if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, @@ -589,8 +585,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( // 4. Last char excessive correction mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0 - && mProximityCount == 0 && (mInputIndex == mInputLength - 2); - const bool isSameAsUserTypedLength = (mInputLength == mInputIndex + 1) || mLastCharExceeded; + && mProximityCount == 0 && (mInputIndex == mInputSize - 2); + const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded; if (mLastCharExceeded) { ++mExcessiveCount; } @@ -601,7 +597,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( } const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar = - mExceeding && mInputIndex == mInputLength - 2; + mExceeding && mInputIndex == mInputSize - 2; // Finally, we are ready to go to the next character, the next "virtual node". // We should advance the input index. @@ -617,7 +613,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mTerminalInputIndex = mInputIndex - 1; mTerminalOutputIndex = mOutputIndex - 1; if (DEBUG_CORRECTION - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { DUMP_WORD(mWord, mOutputIndex); AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, @@ -631,9 +627,6 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } -Correction::~Correction() { -} - inline static int getQuoteCount(const unsigned short *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { @@ -655,7 +648,7 @@ inline static bool isUpperCase(unsigned short c) { /* static */ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction, - const int inputLength) { + const int inputSize) { const int excessivePos = correction->getExcessivePos(); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; @@ -667,55 +660,55 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex const bool lastCharExceeded = correction->mLastCharExceeded; const bool useFullEditDistance = correction->mUseFullEditDistance; const int outputLength = outputIndex + 1; - if (skippedCount >= inputLength || inputLength == 0) { + if (skippedCount >= inputSize || inputSize == 0) { return -1; } // TODO: find more robust way - bool sameLength = lastCharExceeded ? (inputLength == inputIndex + 2) - : (inputLength == inputIndex + 1); + bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2) + : (inputSize == inputIndex + 1); // TODO: use mExcessiveCount - const int matchCount = inputLength - correction->mProximityCount - excessiveCount; + const int matchCount = inputSize - correction->mProximityCount - excessiveCount; const unsigned short *word = correction->mWord; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) - - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputLength)); + - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize)); // TODO: Calculate edit distance for transposed and excessive int ed = 0; if (DEBUG_DICT_FULL) { - dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength); + dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength); } int adjustedProximityMatchedCount = proximityMatchedCount; int finalFreq = freq; if (DEBUG_CORRECTION_FREQ - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) { AKLOGI("FinalFreq0: %d", finalFreq); } // TODO: Optimize this. if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { - ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, - inputLength) - transposedCount; + ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength, + inputSize) - transposedCount; const int matchWeight = powerIntCapped(typedLetterMultiplier, - max(inputLength, outputLength) - ed); + max(inputSize, outputLength) - ed); multiplyIntCapped(matchWeight, &finalFreq); // TODO: Demote further if there are two or more excessive chars with longer user input? - if (inputLength > outputLength) { + if (inputSize > outputLength) { multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); } ed = max(0, ed - quoteDiffCount); - adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), + adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)), proximityMatchedCount); if (transposedCount <= 0) { - if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { + if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) { // Promote a word with just one skipped or excessive char if (sameLength) { multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE @@ -744,8 +737,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // Demotion for a word with missing character if (skipped) { const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE - * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) - / (10 * inputLength + * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) + / (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10); if (DEBUG_DICT_FULL) { AKLOGI("Demotion rate for missing character is %d.", demotionRate); @@ -847,7 +840,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex ? adjustedProximityMatchedCount : (proximityMatchedCount + transposedCount); multiplyRate( - 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq); + 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq); // Promotion for an exactly matched word if (ed == 0) { @@ -882,7 +875,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex e ... exceeding p ... proximity matching */ - if (matchCount == inputLength && matchCount >= 2 && !skipped + if (matchCount == inputSize && matchCount >= 2 && !skipped && word[matchCount] == word[matchCount - 1]) { multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); } @@ -892,8 +885,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex multiplyIntCapped(fullWordMultiplier, &finalFreq); } - if (useFullEditDistance && outputLength > inputLength + 1) { - const int diff = outputLength - inputLength - 1; + if (useFullEditDistance && outputLength > inputSize + 1) { + const int diff = outputLength - inputSize - 1; const int divider = diff < 31 ? 1 << diff : S_INT_MAX; finalFreq = divider > finalFreq ? 1 : finalFreq / divider; } @@ -903,8 +896,8 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex } if (DEBUG_CORRECTION_FREQ - && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { - DUMP_WORD(correction->getPrimaryInputWord(), inputLength); + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) { + DUMP_WORD(correction->getPrimaryInputWord(), inputSize); DUMP_WORD(correction->mWord, outputLength); AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, skippedCount, transposedCount, excessiveCount, additionalProximityCount, diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 57e7b7189..f016d5453 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -18,6 +18,7 @@ #define LATINIME_CORRECTION_H #include <cassert> +#include <cstring> // for memset() #include <stdint.h> #include "correction_state.h" @@ -38,66 +39,27 @@ class Correction { NOT_ON_TERMINAL } CorrectionType; - ///////////////////////// - // static inline utils // - ///////////////////////// - - static const int TWO_31ST_DIV_255 = S_INT_MAX / 255; - static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) { - return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX); + Correction() + : mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false), + mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0), + mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0), + mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0), + mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0), + mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0), + mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false), + mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false), + mSkipping(false), mProximityInfoState() { + memset(mWord, 0, sizeof(mWord)); + memset(mDistances, 0, sizeof(mDistances)); + memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable)); + // NOTE: mCorrectionStates is an array of instances. + // No need to initialize it explicitly here. } - static const int TWO_31ST_DIV_2 = S_INT_MAX / 2; - inline static void multiplyIntCapped(const int multiplier, int *base) { - const int temp = *base; - if (temp != S_INT_MAX) { - // Branch if multiplier == 2 for the optimization - if (multiplier < 0) { - if (DEBUG_DICT) { - assert(false); - } - AKLOGI("--- Invalid multiplier: %d", multiplier); - } else if (multiplier == 0) { - *base = 0; - } else if (multiplier == 2) { - *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX; - } else { - // TODO: This overflow check gives a wrong answer when, for example, - // temp = 2^16 + 1 and multiplier = 2^17 + 1. - // Fix this behavior. - const int tempRetval = temp * multiplier; - *base = tempRetval >= temp ? tempRetval : S_INT_MAX; - } - } - } - - inline static int powerIntCapped(const int base, const int n) { - if (n <= 0) return 1; - if (base == 2) { - return n < 31 ? 1 << n : S_INT_MAX; - } else { - int ret = base; - for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); - return ret; - } - } - - inline static void multiplyRate(const int rate, int *freq) { - if (*freq != S_INT_MAX) { - if (*freq > 1000000) { - *freq /= 100; - multiplyIntCapped(rate, freq); - } else { - multiplyIntCapped(rate, freq); - *freq /= 100; - } - } - } - - Correction() {}; + virtual ~Correction() {} void resetCorrection(); void initCorrection( - const ProximityInfo *pi, const int inputLength, const int maxWordLength); + const ProximityInfo *pi, const int inputSize, const int maxWordLength); void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); // TODO: remove @@ -108,27 +70,7 @@ class Correction { bool sameAsTyped(); bool initProcessState(const int index); - int getInputIndex(); - - virtual ~Correction(); - int getSpaceProximityPos() const { - return mSpaceProximityPos; - } - int getMissingSpacePos() const { - return mMissingSpacePos; - } - - int getSkipPos() const { - return mSkipPos; - } - - int getExcessivePos() const { - return mExcessivePos; - } - - int getTransposedPos() const { - return mTransposedPos; - } + int getInputIndex() const; bool needsToPrune() const; @@ -141,7 +83,7 @@ class Correction { const bool isSpaceProximity, const unsigned short *word); int getFinalProbability(const int probability, unsigned short **word, int *wordLength); int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int *wordLength, const int inputLength); + int *wordLength, const int inputSize); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); @@ -165,7 +107,7 @@ class Correction { public: static int calculateFinalProbability(const int inputIndex, const int depth, const int probability, int *editDistanceTable, const Correction *correction, - const int inputLength); + const int inputSize); static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction *correction, const bool isSpaceProximity, const unsigned short *word); @@ -180,9 +122,9 @@ class Correction { // proximity info state void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes, - const int inputLength, const int *xCoordinates, const int *yCoordinates) { - mProximityInfoState.initInputParams( - proximityInfo, inputCodes, inputLength, xCoordinates, yCoordinates); + const int inputSize, const int *xCoordinates, const int *yCoordinates) { + mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH, + proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); } const unsigned short *getPrimaryInputWord() const { @@ -195,6 +137,81 @@ class Correction { private: DISALLOW_COPY_AND_ASSIGN(Correction); + + ///////////////////////// + // static inline utils // + ///////////////////////// + static const int TWO_31ST_DIV_255 = S_INT_MAX / 255; + static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) { + return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX); + } + + static const int TWO_31ST_DIV_2 = S_INT_MAX / 2; + inline static void multiplyIntCapped(const int multiplier, int *base) { + const int temp = *base; + if (temp != S_INT_MAX) { + // Branch if multiplier == 2 for the optimization + if (multiplier < 0) { + if (DEBUG_DICT) { + assert(false); + } + AKLOGI("--- Invalid multiplier: %d", multiplier); + } else if (multiplier == 0) { + *base = 0; + } else if (multiplier == 2) { + *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX; + } else { + // TODO: This overflow check gives a wrong answer when, for example, + // temp = 2^16 + 1 and multiplier = 2^17 + 1. + // Fix this behavior. + const int tempRetval = temp * multiplier; + *base = tempRetval >= temp ? tempRetval : S_INT_MAX; + } + } + } + + inline static int powerIntCapped(const int base, const int n) { + if (n <= 0) return 1; + if (base == 2) { + return n < 31 ? 1 << n : S_INT_MAX; + } else { + int ret = base; + for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); + return ret; + } + } + + inline static void multiplyRate(const int rate, int *freq) { + if (*freq != S_INT_MAX) { + if (*freq > 1000000) { + *freq /= 100; + multiplyIntCapped(rate, freq); + } else { + multiplyIntCapped(rate, freq); + *freq /= 100; + } + } + } + + inline int getSpaceProximityPos() const { + return mSpaceProximityPos; + } + inline int getMissingSpacePos() const { + return mMissingSpacePos; + } + + inline int getSkipPos() const { + return mSkipPos; + } + + inline int getExcessivePos() const { + return mExcessivePos; + } + + inline int getTransposedPos() const { + return mTransposedPos; + } + inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); @@ -204,7 +221,7 @@ class Correction { inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline int getFinalProbabilityInternal(const int probability, unsigned short **word, - int *wordLength, const int inputLength); + int *wordLength, const int inputSize); static const int TYPED_LETTER_MULTIPLIER = 2; static const int FULL_WORD_MULTIPLIER = 2; @@ -214,7 +231,7 @@ class Correction { bool mDoAutoCompletion; int mMaxEditDistance; int mMaxDepth; - int mInputLength; + int mInputSize; int mSpaceProximityPos; int mMissingSpacePos; int mTerminalInputIndex; diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h index 4e2164014..8f6b69d77 100644 --- a/native/jni/src/debug.h +++ b/native/jni/src/debug.h @@ -22,7 +22,7 @@ static inline unsigned char *convertToUnibyteString(unsigned short *input, unsigned char *output, const unsigned int length) { unsigned int i = 0; - for (; i <= length && input[i] != 0; ++i) + for (; i < length && input[i] != 0; ++i) output[i] = input[i] & 0xFF; output[i] = 0; return output; @@ -31,7 +31,7 @@ static inline unsigned char *convertToUnibyteString(unsigned short *input, unsig static inline unsigned char *convertToUnibyteStringAndReplaceLastChar(unsigned short *input, unsigned char *output, const unsigned int length, unsigned char c) { unsigned int i = 0; - for (; i <= length && input[i] != 0; ++i) + for (; i < length && input[i] != 0; ++i) output[i] = input[i] & 0xFF; if (i > 0) output[i-1] = c; output[i] = 0; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 484fc6bde..28661ab20 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -83,12 +83,38 @@ static inline void dumpWordInt(const int *word, const int length) { AKLOGI("i[ %s ]", charBuf); } +#ifndef __ANDROID__ +#define ASSERT(success) do { if(!success) { showStackTrace(); assert(success);};} while (0) +#define SHOW_STACK_TRACE do { showStackTrace(); } while (0) + +#include <execinfo.h> +#include <stdlib.h> +static inline void showStackTrace() { + void *callstack[128]; + int i, frames = backtrace(callstack, 128); + char **strs = backtrace_symbols(callstack, frames); + for (i = 0; i < frames; ++i) { + if (i == 0) { + AKLOGI("=== Trace ==="); + continue; + } + AKLOGI("%s", strs[i]); + } + free(strs); +} +#else +#define ASSERT(success) +#define SHOW_STACK_TRACE +#endif + #else #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) #define DUMP_WORD(word, length) #define DUMP_WORD_INT(word, length) +#define ASSERT(success) +#define SHOW_STACK_TRACE #endif #ifdef FLAG_DO_PROFILE @@ -294,12 +320,13 @@ static inline void prof_out(void) { #define MAX_SPACES_INTERNAL 16 +// Max Distance between point to key +#define MAX_POINT_TO_KEY_LENGTH 10000000 + // TODO: Reduce this constant if possible; check the maximum number of digraphs in the same // word in the dictionary for languages with digraphs, like German and French #define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5 -// Minimum suggest depth for one word for all cases except for missing space suggestions. -#define MIN_SUGGEST_DEPTH 1 #define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3 #define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3 diff --git a/native/jni/src/dic_traverse_wrapper.cpp b/native/jni/src/dic_traverse_wrapper.cpp index 1f7dcbfb2..88ca9fa0d 100644 --- a/native/jni/src/dic_traverse_wrapper.cpp +++ b/native/jni/src/dic_traverse_wrapper.cpp @@ -19,8 +19,8 @@ #include "dic_traverse_wrapper.h" namespace latinime { -void *(*DicTraverseWrapper::sDicTraverseSessionFactoryMethod)(JNIEnv *env, jstring locale) = 0; +void *(*DicTraverseWrapper::sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring) = 0; void (*DicTraverseWrapper::sDicTraverseSessionReleaseMethod)(void *) = 0; void (*DicTraverseWrapper::sDicTraverseSessionInitMethod)( - void *, Dictionary *, const int *, const int) = 0; + void *, const Dictionary *const, const int *, const int) = 0; } // namespace latinime diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h index 8396d0027..292382487 100644 --- a/native/jni/src/dic_traverse_wrapper.h +++ b/native/jni/src/dic_traverse_wrapper.h @@ -34,7 +34,7 @@ class DicTraverseWrapper { return 0; } static void initDicTraverseSession(void *traverseSession, - Dictionary *dictionary, const int *prevWord, const int prevWordLength) { + const Dictionary *const dictionary, const int *prevWord, const int prevWordLength) { if (sDicTraverseSessionInitMethod) { sDicTraverseSessionInitMethod(traverseSession, dictionary, prevWord, prevWordLength); } @@ -45,11 +45,11 @@ class DicTraverseWrapper { } } static void setTraverseSessionFactoryMethod( - void *(*factoryMethod)(JNIEnv *env, jstring locale)) { + void *(*factoryMethod)(JNIEnv *, jstring)) { sDicTraverseSessionFactoryMethod = factoryMethod; } static void setTraverseSessionInitMethod( - void (*initMethod)(void *, Dictionary *, const int *, const int)) { + void (*initMethod)(void *, const Dictionary *const, const int *, const int)) { sDicTraverseSessionInitMethod = initMethod; } static void setTraverseSessionReleaseMethod(void (*releaseMethod)(void *)) { @@ -58,7 +58,8 @@ class DicTraverseWrapper { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseWrapper); static void *(*sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring); - static void (*sDicTraverseSessionInitMethod)(void *, Dictionary *, const int *, const int); + static void (*sDicTraverseSessionInitMethod)( + void *, const Dictionary *const, const int *, const int); static void (*sDicTraverseSessionReleaseMethod)(void *); }; int register_DicTraverseSession(JNIEnv *env); diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 158c3fb22..2fbe83e86 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -30,11 +30,15 @@ namespace latinime { // TODO: Change the type of all keyCodes to uint32_t Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, - int typedLetterMultiplier, int fullWordMultiplier, - int maxWordLength, int maxWords, int maxPredictions) - : mDict(static_cast<unsigned char *>(dict)), - mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)), - mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust) { + int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, + int maxPredictions) + : mDict(static_cast<unsigned char *>(dict)), + mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)), + mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), + mUnigramDictionary(new UnigramDictionary(mOffsetDict, typedLetterMultiplier, + fullWordMultiplier, maxWordLength, maxWords, BinaryFormat::getFlags(mDict))), + mBigramDictionary(new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions)), + mGestureDecoder(new GestureDecoderWrapper(maxWordLength, maxWords)) { if (DEBUG_DICT) { if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) { AKLOGI("Max word length (%d) is greater than %d", @@ -42,11 +46,6 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); } } - const unsigned int options = BinaryFormat::getFlags(mDict); - mUnigramDictionary = new UnigramDictionary(mOffsetDict, typedLetterMultiplier, - fullWordMultiplier, maxWordLength, maxWords, options); - mBigramDictionary = new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions); - mGestureDecoder = new GestureDecoderWrapper(maxWordLength, maxWords); } Dictionary::~Dictionary() { @@ -60,7 +59,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi int *codes, int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes) { + int *frequencies, int *spaceIndices, int *outputTypes) const { int result = 0; if (isGesture) { DicTraverseWrapper::initDicTraverseSession( diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index fd9e77011..e9a03ce55 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -48,7 +48,7 @@ class Dictionary { int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes); + int *frequencies, int *spaceIndices, int *outputTypes) const; int getBigrams(const int32_t *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int *outputTypes) const; diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h index 146eb8055..f30e9fcc0 100644 --- a/native/jni/src/geometry_utils.h +++ b/native/jni/src/geometry_utils.h @@ -19,7 +19,6 @@ #include <cmath> -#define MAX_DISTANCE 10000000 #define MAX_PATHS 2 #define DEBUG_DECODER false diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h index f8bfe7c79..92e1ded49 100644 --- a/native/jni/src/gesture/gesture_decoder_wrapper.h +++ b/native/jni/src/gesture/gesture_decoder_wrapper.h @@ -29,8 +29,8 @@ class ProximityInfo; class GestureDecoderWrapper : public IncrementalDecoderInterface { public: - GestureDecoderWrapper(const int maxWordLength, const int maxWords) { - mIncrementalDecoderInterface = getGestureDecoderInstance(maxWordLength, maxWords); + GestureDecoderWrapper(const int maxWordLength, const int maxWords) + : mIncrementalDecoderInterface(getGestureDecoderInstance(maxWordLength, maxWords)) { } virtual ~GestureDecoderWrapper() { @@ -39,7 +39,8 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface { int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - unsigned short *outWords, int *frequencies, int *outputIndices, int *outputTypes) { + unsigned short *outWords, int *frequencies, int *outputIndices, + int *outputTypes) const { if (!mIncrementalDecoderInterface) { return 0; } diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/gesture/incremental_decoder_interface.h index 04f0095e0..d1395aab9 100644 --- a/native/jni/src/gesture/incremental_decoder_interface.h +++ b/native/jni/src/gesture/incremental_decoder_interface.h @@ -31,7 +31,7 @@ class IncrementalDecoderInterface { virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, unsigned short *outWords, int *frequencies, - int *outputIndices, int *outputTypes) = 0; + int *outputIndices, int *outputTypes) const = 0; IncrementalDecoderInterface() { }; virtual ~IncrementalDecoderInterface() { }; private: diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h index 5cb2ee368..da7afdb8a 100644 --- a/native/jni/src/gesture/incremental_decoder_wrapper.h +++ b/native/jni/src/gesture/incremental_decoder_wrapper.h @@ -29,8 +29,8 @@ class ProximityInfo; class IncrementalDecoderWrapper : public IncrementalDecoderInterface { public: - IncrementalDecoderWrapper(const int maxWordLength, const int maxWords) { - mIncrementalDecoderInterface = getIncrementalDecoderInstance(maxWordLength, maxWords); + IncrementalDecoderWrapper(const int maxWordLength, const int maxWords) + : mIncrementalDecoderInterface(getIncrementalDecoderInstance(maxWordLength, maxWords)) { } virtual ~IncrementalDecoderWrapper() { @@ -39,7 +39,8 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface { int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - unsigned short *outWords, int *frequencies, int *outputIndices, int *outputTypes) { + unsigned short *outWords, int *frequencies, int *outputIndices, + int *outputTypes) const { if (!mIncrementalDecoderInterface) { return 0; } diff --git a/native/jni/src/hash_map_compat.h b/native/jni/src/hash_map_compat.h new file mode 100644 index 000000000..116359a73 --- /dev/null +++ b/native/jni/src/hash_map_compat.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2012, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_HASH_MAP_COMPAT_H +#define LATINIME_HASH_MAP_COMPAT_H + +// TODO: Use std::unordered_map that has been standardized in C++11 + +#ifdef __APPLE__ +#include <ext/hash_map> +#else // __APPLE__ +#include <hash_map> +#endif // __APPLE__ + +#ifdef __SGI_STL_PORT +#define hash_map_compat stlport::hash_map +#else // __SGI_STL_PORT +#define hash_map_compat __gnu_cxx::hash_map +#endif // __SGI_STL_PORT + +#endif // LATINIME_HASH_MAP_COMPAT_H diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index 1b9bac0f0..765632e46 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -29,6 +29,9 @@ namespace latinime { +/* static */ const int ProximityInfo::NOT_A_CODE = -1; +/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f; + static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len, jint *buffer) { if (jArray && buffer) { @@ -54,16 +57,17 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma const jintArray keyWidths, const jintArray keyHeights, const jintArray keyCharCodes, const jfloatArray sweetSpotCenterXs, const jfloatArray sweetSpotCenterYs, const jfloatArray sweetSpotRadii) - : MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize), KEYBOARD_WIDTH(keyboardWidth), - KEYBOARD_HEIGHT(keyboardHeight), GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight), - MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth), + : MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize), GRID_WIDTH(gridWidth), + GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth), MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidth * mostCommonKeyWidth), CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth), CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight), KEY_COUNT(min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)), HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates && keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs - && sweetSpotCenterYs && sweetSpotRadii) { + && sweetSpotCenterYs && sweetSpotRadii), + mProximityCharsArray(new int32_t[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE + /* proximityGridLength */]) { const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE; if (DEBUG_PROXIMITY_INFO) { AKLOGI("Create proximity info array %d", proximityGridLength); @@ -75,7 +79,6 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma } memset(mLocaleStr, 0, sizeof(mLocaleStr)); env->GetStringUTFRegion(localeJStr, 0, env->GetStringLength(localeJStr), mLocaleStr); - mProximityCharsArray = new int32_t[proximityGridLength]; safeGetOrFillZeroIntArrayRegion(env, proximityChars, proximityGridLength, mProximityCharsArray); safeGetOrFillZeroIntArrayRegion(env, keyXCoordinates, KEY_COUNT, mKeyXCoordinates); safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates); @@ -299,6 +302,6 @@ int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const { if (keyId0 >= 0 && keyId1 >= 0) { return mKeyKeyDistancesG[keyId0][keyId1]; } - return 0; + return MAX_POINT_TO_KEY_LENGTH; } } // namespace latinime diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index 8a407e71a..822909b7a 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -41,21 +41,12 @@ class ProximityInfo { float getNormalizedSquaredDistanceFromCenterFloat( const int keyId, const int x, const int y) const; bool sameAsTyped(const unsigned short *word, int length) const; - int squaredDistanceToEdge(const int keyId, const int x, const int y) const; - bool isOnKey(const int keyId, const int x, const int y) const { - if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case - const int left = mKeyXCoordinates[keyId]; - const int top = mKeyYCoordinates[keyId]; - const int right = left + mKeyWidths[keyId] + 1; - const int bottom = top + mKeyHeights[keyId]; - return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom; - } int getKeyIndex(const int c) const; int getKeyCode(const int keyIndex) const; bool hasSweetSpotData(const int keyIndex) const { // When there are no calibration data for a key, // the radius of the key is assigned to zero. - return mSweetSpotRadii[keyIndex] > 0.0; + return mSweetSpotRadii[keyIndex] > 0.0f; } float getSweetSpotRadiiAt(int keyIndex) const { return mSweetSpotRadii[keyIndex]; @@ -111,18 +102,14 @@ class ProximityInfo { float getKeyCenterYOfIdG(int keyId) const; int getKeyKeyDistanceG(int key0, int key1) const; - // Returns the keyboard key-center information. - void getCenters(int *centersX, int *centersY, int *codeToKeyIndex, int *keyToCodeIndex, - int *keyCount, int *keyWidth) const; - private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); // The max number of the keys in one keyboard layout static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64; // The upper limit of the char code in mCodeToKeyIndex static const int MAX_CHAR_CODE = 127; - static const float NOT_A_DISTANCE_FLOAT = -1.0f; - static const int NOT_A_CODE = -1; + static const int NOT_A_CODE; + static const float NOT_A_DISTANCE_FLOAT; int getStartIndexFromCoordinates(const int x, const int y) const; void initializeCodeToKeyIndex(); @@ -131,10 +118,17 @@ class ProximityInfo { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; bool hasInputCoordinates() const; + int squaredDistanceToEdge(const int keyId, const int x, const int y) const; + bool isOnKey(const int keyId, const int x, const int y) const { + if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case + const int left = mKeyXCoordinates[keyId]; + const int top = mKeyYCoordinates[keyId]; + const int right = left + mKeyWidths[keyId] + 1; + const int bottom = top + mKeyHeights[keyId]; + return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom; + } const int MAX_PROXIMITY_CHARS_SIZE; - const int KEYBOARD_WIDTH; - const int KEYBOARD_HEIGHT; const int GRID_WIDTH; const int GRID_HEIGHT; const int MOST_COMMON_KEY_WIDTH; diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index 86c8a697a..e13d4e664 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -20,13 +20,15 @@ #define LOG_TAG "LatinIME: proximity_info_state.cpp" #include "defines.h" +#include "geometry_utils.h" #include "proximity_info.h" #include "proximity_info_state.h" namespace latinime { -void ProximityInfoState::initInputParams( - const ProximityInfo *proximityInfo, const int32_t *inputCodes, const int inputLength, - const int *xCoordinates, const int *yCoordinates) { +void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, + const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize, + const int *const xCoordinates, const int *const yCoordinates, const int *const times, + const int *const pointerIds, const bool isGeometric) { mProximityInfo = proximityInfo; mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData(); mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare(); @@ -36,78 +38,312 @@ void ProximityInfoState::initInputParams( mCellWidth = proximityInfo->getCellWidth(); mGridHeight = proximityInfo->getGridWidth(); mGridWidth = proximityInfo->getGridHeight(); - const int normalizedSquaredDistancesLength = - MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL; - for (int i = 0; i < normalizedSquaredDistancesLength; ++i) { - mNormalizedSquaredDistances[i] = NOT_A_DISTANCE; - } - - memset(mInputCodes, 0, - MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE_INTERNAL * sizeof(mInputCodes[0])); - - for (int i = 0; i < inputLength; ++i) { - const int32_t primaryKey = inputCodes[i]; - const int x = xCoordinates[i]; - const int y = yCoordinates[i]; - int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL]; - mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities); - } - - if (DEBUG_PROXIMITY_CHARS) { - for (int i = 0; i < inputLength; ++i) { - AKLOGI("---"); - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL; ++j) { - int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j]; - int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j]; - icc += 0; - icfjc += 0; - AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc); + + memset(mInputCodes, 0, sizeof(mInputCodes)); + + if (!isGeometric && pointerId == 0) { + // Initialize + // - mInputCodes + // - mNormalizedSquaredDistances + // TODO: Merge + for (int i = 0; i < inputSize; ++i) { + const int32_t primaryKey = inputCodes[i]; + const int x = xCoordinates[i]; + const int y = yCoordinates[i]; + int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL]; + mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities); + } + + if (DEBUG_PROXIMITY_CHARS) { + for (int i = 0; i < inputSize; ++i) { + AKLOGI("---"); + for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL; ++j) { + int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j]; + int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j]; + icc += 0; + icfjc += 0; + AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc); + } } } } - mInputXCoordinates = xCoordinates; - mInputYCoordinates = yCoordinates; - mTouchPositionCorrectionEnabled = - mHasTouchPositionCorrectionData && xCoordinates && yCoordinates; - mInputLength = inputLength; - for (int i = 0; i < inputLength; ++i) { - mPrimaryInputWord[i] = getPrimaryCharAt(i); - } - mPrimaryInputWord[inputLength] = 0; - if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- initInputParams"); - } - for (int i = 0; i < mInputLength; ++i) { - const int *proximityChars = getProximityCharsAt(i); - const int primaryKey = proximityChars[0]; - const int x = xCoordinates[i]; - const int y = yCoordinates[i]; - if (DEBUG_PROXIMITY_CHARS) { - int a = x + y + primaryKey; - a += 0; - AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); + + /////////////////////// + // Setup touch points + mMaxPointToKeyLength = maxPointToKeyLength; + mInputXs.clear(); + mInputYs.clear(); + mTimes.clear(); + mLengthCache.clear(); + mDistanceCache.clear(); + mInputSize = 0; + + if (xCoordinates && yCoordinates) { + const bool proximityOnly = !isGeometric && (xCoordinates[0] < 0 || yCoordinates[0] < 0); + int lastInputIndex = 0; + for (int i = 0; i < inputSize; ++i) { + const int pid = pointerIds ? pointerIds[i] : 0; + if (pointerId == pid) { + lastInputIndex = i; + } } - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) { - const int currentChar = proximityChars[j]; - const float squaredDistance = - hasInputCoordinates() ? calculateNormalizedSquaredDistance( - mProximityInfo->getKeyIndex(currentChar), i) : - NOT_A_DISTANCE_FLOAT; - if (squaredDistance >= 0.0f) { - mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = - (int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); - } else { - mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = - (j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO : - PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; + // Working space to save near keys distances for current, prev and prevprev input point. + NearKeysDistanceMap nearKeysDistances[3]; + // These pointers are swapped for each inputs points. + NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0]; + NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1]; + NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2]; + + for (int i = 0; i < inputSize; ++i) { + // Assuming pointerId == 0 if pointerIds is null. + const int pid = pointerIds ? pointerIds[i] : 0; + if (pointerId == pid) { + const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i); + const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i]; + const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i]; + const int time = times ? times[i] : -1; + if (pushTouchPoint(c, x, y, time, isGeometric, i == lastInputIndex, + currentNearKeysDistances, prevNearKeysDistances, + prevPrevNearKeysDistances)) { + // Previous point information was popped. + NearKeysDistanceMap *tmp = prevNearKeysDistances; + prevNearKeysDistances = currentNearKeysDistances; + currentNearKeysDistances = tmp; + } else { + NearKeysDistanceMap *tmp = prevPrevNearKeysDistances; + prevPrevNearKeysDistances = prevNearKeysDistances; + prevNearKeysDistances = currentNearKeysDistances; + currentNearKeysDistances = tmp; + } + } + } + mInputSize = mInputXs.size(); + } + + if (mInputSize > 0) { + const int keyCount = mProximityInfo->getKeyCount(); + mDistanceCache.resize(mInputSize * keyCount); + for (int i = 0; i < mInputSize; ++i) { + for (int k = 0; k < keyCount; ++k) { + const int index = i * keyCount + k; + const int x = mInputXs[i]; + const int y = mInputYs[i]; + mDistanceCache[index] = + mProximityInfo->getNormalizedSquaredDistanceFromCenterFloat(k, x, y); } + } + } + + // end + /////////////////////// + + memset(mNormalizedSquaredDistances, NOT_A_DISTANCE, sizeof(mNormalizedSquaredDistances)); + memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); + mTouchPositionCorrectionEnabled = mInputSize > 0 && mHasTouchPositionCorrectionData + && xCoordinates && yCoordinates && !isGeometric; + if (!isGeometric && pointerId == 0) { + for (int i = 0; i < inputSize; ++i) { + mPrimaryInputWord[i] = getPrimaryCharAt(i); + } + + for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) { + const int *proximityChars = getProximityCharsAt(i); + const int primaryKey = proximityChars[0]; + const int x = xCoordinates[i]; + const int y = yCoordinates[i]; if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- Proximity (%d) = %c", j, currentChar); + int a = x + y + primaryKey; + a += 0; + AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); + } + for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) { + const int currentChar = proximityChars[j]; + const float squaredDistance = + hasInputCoordinates() ? calculateNormalizedSquaredDistance( + mProximityInfo->getKeyIndex(currentChar), i) : + NOT_A_DISTANCE_FLOAT; + if (squaredDistance >= 0.0f) { + mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = + (int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); + } else { + mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = + (j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO : + PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; + } + if (DEBUG_PROXIMITY_CHARS) { + AKLOGI("--- Proximity (%d) = %c", j, currentChar); + } } } } } +// Calculating point to key distance for all near keys and returning the distance between +// the given point and the nearest key position. +float ProximityInfoState::updateNearKeysDistances(const int x, const int y, + NearKeysDistanceMap *const currentNearKeysDistances) { + static const float NEAR_KEY_THRESHOLD = 10.0f; + + currentNearKeysDistances->clear(); + const int keyCount = mProximityInfo->getKeyCount(); + float nearestKeyDistance = mMaxPointToKeyLength; + for (int k = 0; k < keyCount; ++k) { + const float dist = mProximityInfo->getNormalizedSquaredDistanceFromCenterFloat(k, x, y); + if (dist < NEAR_KEY_THRESHOLD) { + currentNearKeysDistances->insert(std::pair<int, float>(k, dist)); + } + if (nearestKeyDistance > dist) { + nearestKeyDistance = dist; + } + } + return nearestKeyDistance; +} + +// Check if previous point is at local minimum position to near keys. +bool ProximityInfoState::isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const { + static const float MARGIN = 0.5f; + + for (NearKeysDistanceMap::const_iterator it = prevNearKeysDistances->begin(); + it != prevNearKeysDistances->end(); ++it) { + NearKeysDistanceMap::const_iterator itPP = prevPrevNearKeysDistances->find(it->first); + NearKeysDistanceMap::const_iterator itC = currentNearKeysDistances->find(it->first); + if ((itPP == prevPrevNearKeysDistances->end() || itPP->second > it->second + MARGIN) + && (itC == currentNearKeysDistances->end() || itC->second > it->second + MARGIN)) { + return true; + } + } + return false; +} + +// Calculating a point score that indicates usefulness of the point. +float ProximityInfoState::getPointScore( + const int x, const int y, const int time, const bool lastPoint, const float nearest, + const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const { + static const float BASE_SAMPLE_RATE_SCALE = 0.1f; + static const float SAVE_DISTANCE_SCALE = 12.0f; + static const float SAVE_DISTANCE_SCORE = 2.0f; + static const float SKIP_DISTANCE_SCALE = 1.5f; + static const float SKIP_DISTANCE_SCORE = -1.0f; + static const float CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE = 2.5f; + static const float CHECK_LOCALMIN_DISTANCE_SCORE = -1.0f; + static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F / 32.0f; + static const float STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE = 4.0f; + static const float STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD = 0.5f; + static const float STRAIGHT_SKIP_SCORE = -1.0f; + + const std::size_t size = mInputXs.size(); + if (size <= 1) { + return 0; + } + const float baseSampleRate = mProximityInfo->getMostCommonKeyWidth() * BASE_SAMPLE_RATE_SCALE; + const float distNext = getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()); + const float distPrev = getDistanceFloat(mInputXs.back(), mInputYs.back(), + mInputXs[size - 2], mInputYs[size - 2]); + float score = 0.0f; + + // Sum of distances + if (distPrev + distNext > baseSampleRate * SAVE_DISTANCE_SCALE) { + score += SAVE_DISTANCE_SCORE; + } + // Distance + if (distPrev < baseSampleRate * SKIP_DISTANCE_SCALE) { + score += SKIP_DISTANCE_SCORE; + } + // Location + if (!isPrevLocalMin(currentNearKeysDistances, currentNearKeysDistances, + prevPrevNearKeysDistances)) { + if (distPrev < baseSampleRate * CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE) { + score += CHECK_LOCALMIN_DISTANCE_SCORE; + } + } + // Angle + const float angle1 = getAngle(x, y, mInputXs.back(), mInputYs.back()); + const float angle2 = getAngle(mInputXs.back(), mInputYs.back(), + mInputXs[size - 2], mInputYs[size - 2]); + if (getAngleDiff(angle1, angle2) < STRAIGHT_ANGLE_THRESHOLD) { + if (nearest > STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD + && distPrev < baseSampleRate * STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE) { + score += STRAIGHT_SKIP_SCORE; + } + } + return score; +} + +// Sampling touch point and pushing information to vectors. +// Returning if previous point is popped or not. +bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const int time, + const bool sample, const bool isLastPoint, + NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) { + static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f; + + uint32_t size = mInputXs.size(); + bool popped = false; + if (nodeChar < 0 && sample) { + const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances); + const float score = getPointScore(x, y, time, isLastPoint, nearest, + currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances); + if (score < 0) { + // Pop previous point because it would be useless. + mInputXs.pop_back(); + mInputYs.pop_back(); + mTimes.pop_back(); + mLengthCache.pop_back(); + size = mInputXs.size(); + popped = true; + } else { + popped = false; + } + // Check if the last point should be skipped. + if (isLastPoint) { + if (size > 0 && getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()) + < mProximityInfo->getMostCommonKeyWidth() * LAST_POINT_SKIP_DISTANCE_SCALE) { + return popped; + } else if (size > 1) { + int minChar = 0; + float minDist = mMaxPointToKeyLength; + for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin(); + it != currentNearKeysDistances->end(); ++it) { + if(minDist > it->second){ + minChar = it->first; + minDist = it->second; + } + } + NearKeysDistanceMap::const_iterator itPP = + prevNearKeysDistances->find(minChar); + if (itPP != prevNearKeysDistances->end() && minDist > itPP->second) { + return popped; + } + } + } + } + + if (nodeChar >= 0 && (x < 0 || y < 0)) { + const int keyId = mProximityInfo->getKeyIndex(nodeChar); + if (keyId >= 0) { + x = mProximityInfo->getKeyCenterXOfIdG(keyId); + y = mProximityInfo->getKeyCenterYOfIdG(keyId); + } + } + + // Pushing point information. + if (size > 0) { + mLengthCache.push_back( + mLengthCache.back() + getDistanceInt(x, y, mInputXs.back(), mInputYs.back())); + } else { + mLengthCache.push_back(0); + } + mInputXs.push_back(x); + mInputYs.push_back(y); + mTimes.push_back(time); + return popped; +} + float ProximityInfoState::calculateNormalizedSquaredDistance( const int keyIndex, const int inputIndex) const { if (keyIndex == NOT_AN_INDEX) { @@ -116,7 +352,7 @@ float ProximityInfoState::calculateNormalizedSquaredDistance( if (!mProximityInfo->hasSweetSpotData(keyIndex)) { return NOT_A_DISTANCE_FLOAT; } - if (NOT_A_COORDINATE == mInputXCoordinates[inputIndex]) { + if (NOT_A_COORDINATE == mInputXs[inputIndex]) { return NOT_A_DISTANCE_FLOAT; } const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter( @@ -125,12 +361,37 @@ float ProximityInfoState::calculateNormalizedSquaredDistance( return squaredDistance / squaredRadius; } +int ProximityInfoState::getDuration(const int index) const { + if (mInputSize > 0 && index > 0 && index < static_cast<int>(mInputSize) - 1) { + return mTimes[index + 1] - mTimes[index - 1]; + } + return 0; +} + +float ProximityInfoState::getPointToKeyLength(int inputIndex, int charCode, float scale) { + const int keyId = mProximityInfo->getKeyIndex(charCode); + if (keyId >= 0) { + const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; + return min(mDistanceCache[index] * scale, mMaxPointToKeyLength); + } + return 0; +} + +int ProximityInfoState::getKeyKeyDistance(int key0, int key1) { + return mProximityInfo->getKeyKeyDistanceG(key0, key1); +} + +int ProximityInfoState::getSpaceY() { + const int keyId = mProximityInfo->getKeyIndex(' '); + return mProximityInfo->getKeyCenterYOfIdG(keyId); +} + float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const { const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex); const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex); - const float inputX = static_cast<float>(mInputXCoordinates[inputIndex]); - const float inputY = static_cast<float>(mInputYCoordinates[inputIndex]); + const float inputX = static_cast<float>(mInputXs[inputIndex]); + const float inputY = static_cast<float>(mInputYs[inputIndex]); return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY); } } // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 474c40757..746b9c968 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -17,11 +17,14 @@ #ifndef LATINIME_PROXIMITY_INFO_STATE_H #define LATINIME_PROXIMITY_INFO_STATE_H +#include <cstring> // for memset() #include <stdint.h> #include <string> +#include <vector> #include "char_utils.h" #include "defines.h" +#include "hash_map_compat.h" namespace latinime { @@ -40,18 +43,27 @@ class ProximityInfoState { ///////////////////////////////////////// // Defined in proximity_info_state.cpp // ///////////////////////////////////////// - void initInputParams( - const ProximityInfo *proximityInfo, const int32_t *inputCodes, const int inputLength, - const int *xCoordinates, const int *yCoordinates); + void initInputParams(const int pointerId, const float maxPointToKeyLength, + const ProximityInfo *proximityInfo, const int32_t *const inputCodes, + const int inputSize, const int *xCoordinates, const int *yCoordinates, + const int *const times, const int *const pointerIds, const bool isGeometric); ///////////////////////////////////////// // Defined here // ///////////////////////////////////////// - ProximityInfoState() {}; - inline const int *getProximityCharsAt(const int index) const { - return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); + ProximityInfoState() + : mProximityInfo(0), mMaxPointToKeyLength(0), + mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(), + mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0), + mInputXs(), mInputYs(), mTimes(), mDistanceCache(), mLengthCache(), + mTouchPositionCorrectionEnabled(false), mInputSize(0) { + memset(mInputCodes, 0, sizeof(mInputCodes)); + memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances)); + memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); } + virtual ~ProximityInfoState() {} + inline unsigned short getPrimaryCharAt(const int index) const { return getProximityCharsAt(index)[0]; } @@ -68,14 +80,14 @@ class ProximityInfoState { } inline bool existsAdjacentProximityChars(const int index) const { - if (index < 0 || index >= mInputLength) return false; + if (index < 0 || index >= mInputSize) return false; const int currentChar = getPrimaryCharAt(index); const int leftIndex = index - 1; if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) { return true; } const int rightIndex = index + 1; - if (rightIndex < mInputLength && existsCharInProximityAt(rightIndex, currentChar)) { + if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) { return true; } return false; @@ -161,7 +173,7 @@ class ProximityInfoState { } inline bool sameAsTyped(const unsigned short *word, int length) const { - if (length != mInputLength) { + if (length != mInputSize) { return false; } const int *inputCodes = mInputCodes; @@ -175,8 +187,37 @@ class ProximityInfoState { return true; } + int getDuration(const int index) const; + + bool isUsed() const { + return mInputSize > 0; + } + + uint32_t size() const { + return mInputSize; + } + + int getInputX(int index) const { + return mInputXs[index]; + } + + int getInputY(int index) const { + return mInputYs[index]; + } + + int getLengthCache(int index) const { + return mLengthCache[index]; + } + + float getPointToKeyLength(int inputIndex, int charCode, float scale); + + int getKeyKeyDistance(int key0, int key1); + + int getSpaceY(); + private: DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); + typedef hash_map_compat<int, float> NearKeysDistanceMap; ///////////////////////////////////////// // Defined in proximity_info_state.cpp // ///////////////////////////////////////// @@ -185,17 +226,38 @@ class ProximityInfoState { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; + bool pushTouchPoint(const int nodeChar, int x, int y, const int time, + const bool sample, const bool isLastPoint, + NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances); ///////////////////////////////////////// // Defined here // ///////////////////////////////////////// inline float square(const float x) const { return x * x; } bool hasInputCoordinates() const { - return mInputXCoordinates && mInputYCoordinates; + return mInputXs.size() > 0 && mInputYs.size() > 0; } + inline const int *getProximityCharsAt(const int index) const { + return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); + } + + float updateNearKeysDistances(const int x, const int y, + NearKeysDistanceMap *const currentNearKeysDistances); + bool isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; + float getPointScore( + const int x, const int y, const int time, const bool last, const float nearest, + const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; + // const const ProximityInfo *mProximityInfo; + float mMaxPointToKeyLength; bool mHasTouchPositionCorrectionData; int mMostCommonKeyWidthSquare; std::string mLocaleStr; @@ -205,12 +267,15 @@ class ProximityInfoState { int mGridHeight; int mGridWidth; - const int *mInputXCoordinates; - const int *mInputYCoordinates; + std::vector<int> mInputXs; + std::vector<int> mInputYs; + std::vector<int> mTimes; + std::vector<float> mDistanceCache; + std::vector<int> mLengthCache; bool mTouchPositionCorrectionEnabled; int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; - int mInputLength; + int mInputSize; unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; }; } // namespace latinime diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index 1ae9c7cbb..9ff2772b1 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -30,13 +30,13 @@ class TerminalAttributes { public: class ShortcutIterator { const uint8_t *const mDict; - bool mHasNextShortcutTarget; int mPos; + bool mHasNextShortcutTarget; public: - ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags) : mDict(dict), - mPos(pos) { - mHasNextShortcutTarget = (0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)); + ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags) + : mDict(dict), mPos(pos), + mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) { } inline bool hasNextShortcutTarget() const { @@ -62,13 +62,6 @@ class TerminalAttributes { } }; - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); - const uint8_t *const mDict; - const uint8_t mFlags; - const int mStartPos; - - public: TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) : mDict(dict), mFlags(flags), mStartPos(pos) { } @@ -78,6 +71,16 @@ class TerminalAttributes { // skipped quickly, so we ignore it. return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); } + + bool isBlacklistedOrNotAWord() const { + return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); + const uint8_t *const mDict; + const uint8_t mFlags; + const int mStartPos; }; } // namespace latinime #endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index cc6d39a29..d4c51df63 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -237,7 +237,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, + const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const { @@ -247,7 +247,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(1); getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter, - useFullEditDistance, inputLength, correction, queuePool); + useFullEditDistance, inputSize, correction, queuePool); PROF_END(1); PROF_START(2); @@ -263,7 +263,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); if (masterQueue->size() > 0) { float nsForMaster = masterQueue->getHighestNormalizedScore( - correction->getPrimaryInputWord(), inputLength, 0, 0, 0); + correction->getPrimaryInputWord(), inputSize, 0, 0, 0); hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD); } PROF_END(4); @@ -271,9 +271,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(5); // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS - && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { + && inputSize >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, inputLength, correction, queuePool, + useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate); } PROF_END(5); @@ -304,15 +304,15 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, - const int *yCoordinates, const int *codes, const int inputLength, + const int *yCoordinates, const int *codes, const int inputSize, Correction *correction) const { if (DEBUG_DICT) { AKLOGI("initSuggest"); - DUMP_WORD_INT(codes, inputLength); + DUMP_WORD_INT(codes, inputSize); } - correction->initInputParams(proximityInfo, codes, inputLength, xCoordinates, yCoordinates); - const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); - correction->initCorrection(proximityInfo, inputLength, maxDepth); + correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates); + const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); + correction->initCorrection(proximityInfo, inputSize, maxDepth); } static const char QUOTE = '\''; @@ -321,15 +321,15 @@ static const char SPACE = ' '; void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, const int inputLength, + const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool) const { - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction, + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction); + getSuggestionCandidates(useFullEditDistance, inputSize, bigramMap, bigramFilter, correction, queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, - const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, + const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const { uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount(); @@ -351,7 +351,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition); int outputIndex = 0; - correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0)); + correction->initCorrectionState(rootPosition, childCount, (inputSize <= 0)); // Depth first search while (outputIndex >= 0) { @@ -390,43 +390,44 @@ inline void UnigramDictionary::onTerminal(const int probability, WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalProbability = correction->getFinalProbability(probability, &wordPointer, &wordLength); - if (finalProbability != NOT_A_PROBABILITY) { - if (0 != finalProbability) { - // If the probability is 0, we don't want to add this word. However we still - // want to add its shortcuts (including a possible whitelist entry) if any. - addWord(wordPointer, wordLength, finalProbability, masterQueue, - Dictionary::KIND_CORRECTION); - } - const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0; - // Please note that the shortcut candidates will be added to the master queue only. - TerminalAttributes::ShortcutIterator iterator = - terminalAttributes.getShortcutIterator(); - while (iterator.hasNextShortcutTarget()) { - // TODO: addWord only supports weak ordering, meaning we have no means - // to control the order of the shortcuts relative to one another or to the word. - // We need to either modulate the probability of each shortcut according - // to its own shortcut probability or to make the queue - // so that the insert order is protected inside the queue for words - // with the same score. For the moment we use -1 to make sure the shortcut will - // never be in front of the word. - uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; - int shortcutFrequency; - const int shortcutTargetStringLength = iterator.getNextShortcutTarget( - MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); - int shortcutScore; - int kind; - if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY - && correction->sameAsTyped()) { - shortcutScore = S_INT_MAX; - kind = Dictionary::KIND_WHITELIST; - } else { - shortcutScore = shortcutProbability; - kind = Dictionary::KIND_CORRECTION; - } - addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore, - masterQueue, kind); + if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) { + // If the probability is 0, we don't want to add this word. However we still + // want to add its shortcuts (including a possible whitelist entry) if any. + // Furthermore, if this is not a word (shortcut only for example) or a blacklisted + // entry then we never want to suggest this. + addWord(wordPointer, wordLength, finalProbability, masterQueue, + Dictionary::KIND_CORRECTION); + } + + const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0; + // Please note that the shortcut candidates will be added to the master queue only. + TerminalAttributes::ShortcutIterator iterator = + terminalAttributes.getShortcutIterator(); + while (iterator.hasNextShortcutTarget()) { + // TODO: addWord only supports weak ordering, meaning we have no means + // to control the order of the shortcuts relative to one another or to the word. + // We need to either modulate the probability of each shortcut according + // to its own shortcut probability or to make the queue + // so that the insert order is protected inside the queue for words + // with the same score. For the moment we use -1 to make sure the shortcut will + // never be in front of the word. + uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + int shortcutFrequency; + const int shortcutTargetStringLength = iterator.getNextShortcutTarget( + MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); + int shortcutScore; + int kind; + if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY + && correction->sameAsTyped()) { + shortcutScore = S_INT_MAX; + kind = Dictionary::KIND_WHITELIST; + } else { + shortcutScore = shortcutProbability; + kind = Dictionary::KIND_CORRECTION; } + addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore, + masterQueue, kind); } } @@ -447,7 +448,7 @@ inline void UnigramDictionary::onTerminal(const int probability, int UnigramDictionary::getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool, const int inputLength, + WordsPriorityQueuePool *queuePool, const int inputSize, const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, @@ -498,7 +499,7 @@ int UnigramDictionary::getSubStringSuggestion( int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, - inputLength, correction); + inputSize, correction); unsigned short word[MAX_WORD_LENGTH_INTERNAL]; int freq = getMostFrequentWordLike( @@ -567,7 +568,7 @@ int UnigramDictionary::getSubStringSuggestion( *outputWordLength = tempOutputWordLength; } - if ((inputWordStartPos + inputWordLength) < inputLength) { + if ((inputWordStartPos + inputWordLength) < inputSize) { if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_SKIP; } @@ -586,7 +587,7 @@ int UnigramDictionary::getSubStringSuggestion( freqArray[i], wordLengthArray[i]); } AKLOGI("Split two words: freq = %d, length = %d, %d, isSpace ? %d", pairFreq, - inputLength, tempOutputWordLength, isSpaceProximity); + inputSize, tempOutputWordLength, isSpaceProximity); } addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue(), Dictionary::KIND_CORRECTION); @@ -596,7 +597,7 @@ int UnigramDictionary::getSubStringSuggestion( void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, + const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const int outputWordLength, int *freqArray, int *wordLengthArray, @@ -607,11 +608,11 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, } if (startWordIndex >= 1 && (hasAutoCorrectionCandidate - || inputLength < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) { + || inputSize < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) { // Do not suggest 3+ words if already has auto correction candidate return; } - for (int i = startInputPos + 1; i < inputLength; ++i) { + for (int i = startInputPos + 1; i < inputSize; ++i) { if (DEBUG_CORRECTION_FREQ) { AKLOGI("Multi words(%d), start in %d sep %d start out %d", startWordIndex, startInputPos, i, outputWordLength); @@ -622,7 +623,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, int inputWordStartPos = startInputPos; int inputWordLength = i - startInputPos; const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, - codes, useFullEditDistance, correction, queuePool, inputLength, + codes, useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength, outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord, &tempOutputWordLength); @@ -639,14 +640,14 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, // Next word // Missing space inputWordStartPos = i; - inputWordLength = inputLength - i; + inputWordLength = inputSize - i; if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate, startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, 0) != FLAG_MULTIPLE_SUGGEST_CONTINUE) { getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, inputLength, correction, queuePool, + useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1, tempOutputWordLength, freqArray, wordLengthArray, outputWord); } @@ -669,7 +670,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, AKLOGI("Do mistyped space correction"); } getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate, startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); } @@ -677,10 +678,10 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, + const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate) const { - if (inputLength >= MAX_WORD_LENGTH) return; + if (inputSize >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { AKLOGI("--- Suggest multiple words"); } @@ -693,7 +694,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit const int startInputPos = 0; const int startWordIndex = 0; getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, - useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate, + useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate, startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray, outputWord); } @@ -701,13 +702,13 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, - const int inputLength, Correction *correction, unsigned short *word) const { - uint16_t inWord[inputLength]; + const int inputSize, Correction *correction, unsigned short *word) const { + uint16_t inWord[inputSize]; - for (int i = 0; i < inputLength; ++i) { + for (int i = 0; i < inputSize; ++i) { inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i); } - return getMostFrequentWordLikeInner(inWord, inputLength, word); + return getMostFrequentWordLikeInner(inWord, inputSize, word); } // This function will take the position of a character array within a CharGroup, @@ -842,6 +843,12 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt return NOT_A_PROBABILITY; } const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { + // If this is not a word, or if it's a blacklisted entry, it should behave as + // having no frequency outside of the suggestion process (where it should be used + // for shortcuts). + return NOT_A_PROBABILITY; + } const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); if (hasMultipleChars) { pos = BinaryFormat::skipOtherCharacters(root, pos); diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index 6083f0175..2c6622210 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -53,7 +53,7 @@ class UnigramDictionary { private: DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary); void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int inputLength, + const int *ycoordinates, const int *codes, const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const; @@ -72,16 +72,16 @@ class UnigramDictionary { Correction *correction) const; void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, - const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength, + const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool) const; void getSuggestionCandidates( - const bool useFullEditDistance, const int inputLength, + const bool useFullEditDistance, const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const; void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, + const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate) const; void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, @@ -92,21 +92,21 @@ class UnigramDictionary { const uint8_t *bigramFilter, Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) const; - int getMostFrequentWordLike(const int startInputIndex, const int inputLength, + int getMostFrequentWordLike(const int startInputIndex, const int inputSize, Correction *correction, unsigned short *word) const; int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length, short unsigned int *outWord) const; int getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool, const int inputLength, + WordsPriorityQueuePool *queuePool, const int inputSize, const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const; void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, + const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const int outputWordLength, int *freqArray, int *wordLengthArray, diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index 1e4e00a23..19efa5da3 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -44,17 +44,16 @@ class WordsPriorityQueue { } }; - WordsPriorityQueue(int maxWords, int maxWordLength) : - MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( - (unsigned int) maxWordLength) { - mSuggestedWords = new SuggestedWord[maxWordLength]; + WordsPriorityQueue(int maxWords, int maxWordLength) + : mSuggestions(), MAX_WORDS(static_cast<unsigned int>(maxWords)), + MAX_WORD_LENGTH(static_cast<unsigned int>(maxWordLength)), + mSuggestedWords(new SuggestedWord[maxWordLength]), mHighestSuggestedWord(0) { for (int i = 0; i < maxWordLength; ++i) { mSuggestedWords[i].mUsed = false; } - mHighestSuggestedWord = 0; } - ~WordsPriorityQueue() { + virtual ~WordsPriorityQueue() { delete[] mSuggestedWords; } diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h index 38887291e..c5de9797f 100644 --- a/native/jni/src/words_priority_queue_pool.h +++ b/native/jni/src/words_priority_queue_pool.h @@ -24,9 +24,10 @@ namespace latinime { class WordsPriorityQueuePool { public: - WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { - // Note: using placement new() requires the caller to call the destructor explicitly. - mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength); + WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) + // Note: using placement new() requires the caller to call the destructor explicitly. + : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue( + mainQueueMaxWords, maxWordLength)) { for (int i = 0, subQueueBufOffset = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) { @@ -85,11 +86,11 @@ class WordsPriorityQueuePool { private: DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueuePool); + char mMasterQueueBuf[sizeof(WordsPriorityQueue)]; + char mSubQueueBuf[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS + * sizeof(WordsPriorityQueue)]; WordsPriorityQueue *mMasterQueue; WordsPriorityQueue *mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; - char mMasterQueueBuf[sizeof(WordsPriorityQueue)]; - char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; }; } // namespace latinime #endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H |