diff options
Diffstat (limited to 'native/jni/src')
22 files changed, 254 insertions, 260 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index e62ae6fd9..44dc75e9c 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -26,8 +26,7 @@ namespace latinime { -BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions) - : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_PREDICTIONS(maxPredictions) { +BigramDictionary::BigramDictionary(const uint8_t *const streamStart) : DICT_ROOT(streamStart) { if (DEBUG_DICT) { AKLOGI("BigramDictionary - constructor"); } @@ -36,7 +35,7 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, BigramDictionary::~BigramDictionary() { } -bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq, +void BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints, int *outputTypes) const { word[length] = 0; if (DEBUG_DICT) { @@ -49,7 +48,7 @@ bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int * // Find the right insertion point int insertAt = 0; - while (insertAt < MAX_PREDICTIONS) { + while (insertAt < MAX_RESULTS) { if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency && length < Dictionary::wideStrLen( bigramCodePoints + insertAt * MAX_WORD_LENGTH))) { @@ -58,35 +57,34 @@ bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int * insertAt++; } if (DEBUG_DICT) { - AKLOGI("Bigram: InsertAt -> %d MAX_PREDICTIONS: %d", insertAt, MAX_PREDICTIONS); + AKLOGI("Bigram: InsertAt -> %d MAX_RESULTS: %d", insertAt, MAX_RESULTS); } - if (insertAt < MAX_PREDICTIONS) { - memmove(bigramFreq + (insertAt + 1), - bigramFreq + insertAt, - (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0])); - bigramFreq[insertAt] = frequency; - outputTypes[insertAt] = Dictionary::KIND_PREDICTION; - memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH, - bigramCodePoints + insertAt * MAX_WORD_LENGTH, - (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH); - int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH; - while (length--) { - *dest++ = *word++; - } - *dest = 0; // NULL terminate - if (DEBUG_DICT) { - AKLOGI("Bigram: Added word at %d", insertAt); - } - return true; + if (insertAt >= MAX_RESULTS) { + return; + } + memmove(bigramFreq + (insertAt + 1), + bigramFreq + insertAt, + (MAX_RESULTS - insertAt - 1) * sizeof(bigramFreq[0])); + bigramFreq[insertAt] = frequency; + outputTypes[insertAt] = Dictionary::KIND_PREDICTION; + memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH, + bigramCodePoints + insertAt * MAX_WORD_LENGTH, + (MAX_RESULTS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH); + int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH; + while (length--) { + *dest++ = *word++; + } + *dest = 0; // NULL terminate + if (DEBUG_DICT) { + AKLOGI("Bigram: Added word at %d", insertAt); } - return false; } /* Parameters : * prevWord: the word before, the one for which we need to look up bigrams. * prevWordLength: its length. - * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions. - * codesSize: the size of the codes array. + * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions. + * inputSize: the size of the codes array. * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * bigramFreq: an array to output frequencies. * outputTypes: an array to output types. @@ -98,12 +96,12 @@ bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int * * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes, - int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const { +int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints, + int inputSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name - const uint8_t *const root = DICT; + const uint8_t *const root = DICT_ROOT; int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams @@ -125,8 +123,8 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH, bigramBuffer, &unigramFreq); - // codesSize == 0 means we are trying to find bigram predictions. - if (codesSize < 1 || checkFirstCharacter(bigramBuffer, inputCodes)) { + // inputSize == 0 means we are trying to find bigram predictions. + if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; // Due to space constraints, the frequency for bigrams is approximate - the lower the // unigram frequency, the worse the precision. The theoritical maximum error in @@ -135,13 +133,12 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i // here, but it can't get too bad. const int frequency = BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp); - if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints, - outputTypes)) { - ++bigramCount; - } + addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints, + outputTypes); + ++bigramCount; } } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); - return bigramCount; + return min(bigramCount, MAX_RESULTS); } // Returns a pointer to the start of the bigram list. @@ -149,7 +146,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return 0; - const uint8_t *const root = DICT; + const uint8_t *const root = DICT_ROOT; int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength, forceLowerCaseSearch); @@ -170,7 +167,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const { memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE); - const uint8_t *const root = DICT; + const uint8_t *const root = DICT_ROOT; int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (0 == pos) { @@ -191,17 +188,17 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevW } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } -bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const { +bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. int maxAlt = MAX_ALTERNATIVES; - const int firstBaseChar = toBaseLowerCase(*word); + const int firstBaseLowerCodePoint = toBaseLowerCase(*word); while (maxAlt > 0) { - if (toBaseLowerCase(*inputCodes) == firstBaseChar) { + if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { return true; } - inputCodes++; + inputCodePoints++; maxAlt--; } return false; @@ -209,7 +206,7 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const { bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const { - const uint8_t *const root = DICT; + const uint8_t *const root = DICT_ROOT; int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 150192de2..2ce6c1d0d 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -26,8 +26,8 @@ namespace latinime { class BigramDictionary { public: - BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); - int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords, + BigramDictionary(const uint8_t *const streamStart); + int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const; @@ -35,20 +35,13 @@ class BigramDictionary { ~BigramDictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); - bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints, + void addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints, int *outputTypes) const; - int getBigramAddress(int *pos, bool advance); - int getBigramFreq(int *pos); - void searchForTerminalNode(int addressLookingFor, int frequency); - bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } - bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } - bool checkFirstCharacter(int *word, int *inputCodes) const; + bool checkFirstCharacter(int *word, int *inputCodePoints) const; int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; - const unsigned char *DICT; - const int MAX_WORD_LENGTH; - const int MAX_PREDICTIONS; + const uint8_t *const DICT_ROOT; // TODO: Re-implement proximity correction for bigram correction static const int MAX_ALTERNATIVES = 1; }; diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index a0256ee40..e892c8591 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -16,10 +16,13 @@ #define LOG_TAG "LatinIME: correction.cpp" +#include <cmath> + #include "char_utils.h" #include "correction.h" #include "defines.h" #include "proximity_info_state.h" +#include "suggest_utils.h" namespace latinime { @@ -30,7 +33,7 @@ class ProximityInfo; ///////////////////////////// inline static void initEditDistance(int *editDistanceTable) { - for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { + for (int i = 0; i <= MAX_WORD_LENGTH; ++i) { editDistanceTable[i] = i; } } @@ -77,7 +80,7 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputSize, co mMaxDepth = maxDepth; mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2; // TODO: This is not supposed to be required. Check what's going wrong with - // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] + // editDistance[0 ~ MAX_WORD_LENGTH] initEditDistance(mEditDistanceTable); } @@ -671,27 +674,9 @@ inline static bool isUpperCase(unsigned short c) { if (i < adjustedProximityMatchedCount) { multiplyIntCapped(typedLetterMultiplier, &finalFreq); } - if (squaredDistance >= 0) { - // Promote or demote the score according to the distance from the sweet spot - static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; - static const float B = 1.0f; - static const float C = 0.5f; - static const float MIN = 0.3f; - static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; - static const float R2 = HALF_SCORE_SQUARED_RADIUS; - const float x = static_cast<float>(squaredDistance) - / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; - const float factor = max((x < R1) - ? (A * (R1 - x) + B * x) / R1 - : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); - // factor is a piecewise linear function like: - // A -_ . - // ^-_ . - // B \ . - // \_ . - // C ------------. - // . - // 0 R1 R2 . + const float factor = + SuggestUtils::getDistanceScalingFactor(static_cast<float>(squaredDistance)); + if (factor > 0.0f) { multiplyRate((int)(factor * 100.0f), &finalFreq); } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 8c477716a..89e300d75 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -57,7 +57,7 @@ class Correction { // Non virtual inline destructor -- never inherit this class ~Correction() {} void resetCorrection(); - void initCorrection(const ProximityInfo *pi, const int inputSize, const int maxWordLength); + void initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth); void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); // TODO: remove @@ -237,14 +237,14 @@ class Correction { int mTotalTraverseCount; // The following arrays are state buffer. - int mWord[MAX_WORD_LENGTH_INTERNAL]; - int mDistances[MAX_WORD_LENGTH_INTERNAL]; + int mWord[MAX_WORD_LENGTH]; + int mDistances[MAX_WORD_LENGTH]; // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N. // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot. - int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)]; + int mEditDistanceTable[(MAX_WORD_LENGTH + 1) * (MAX_WORD_LENGTH + 1)]; - CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL]; + CorrectionState mCorrectionStates[MAX_WORD_LENGTH]; // The following member variables are being used as cache values of the correction state. bool mNeedsToTraverseAllNodes; @@ -336,7 +336,7 @@ inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() { AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input, const int inputSize, const int *output, const int outputLength) { - // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. + // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH] is not touched. // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize]. diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 96abfe8d4..4d5a2b261 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -23,19 +23,25 @@ #define AK_FORCE_INLINE inline #endif // __GNUC__ -// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java -#define MAX_WORD_LENGTH_INTERNAL 48 +#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#undef AK_FORCE_INLINE +#define AK_FORCE_INLINE inline +#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) + +// Must be identical to Constants.Dictionary.MAX_WORD_LENGTH in Java +#define MAX_WORD_LENGTH 48 +// Must be identical to BinaryDictionary.MAX_RESULTS in Java +#define MAX_RESULTS 18 #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #include <android/log.h> #ifndef LOG_TAG #define LOG_TAG "LatinIME: " -#endif +#endif // LOG_TAG #define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) #define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) -#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ - dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) +#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) #define INTS_TO_CHARS(input, length, output) do { \ intArrayToCharArray(input, length, output); } while (0) @@ -45,7 +51,7 @@ AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sour char *dest) { int si = 0; int di = 0; - while (si < sourceSize && di < MAX_WORD_LENGTH_INTERNAL - 1 && 0 != source[si]) { + while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) { const int codePoint = source[si++]; if (codePoint < 0x7F) { dest[di++] = codePoint; @@ -71,11 +77,10 @@ static inline void dumpWordInfo(const int *word, const int length, const int ran } } -static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts, - const int maxWordLength) { +static inline void dumpResult(const int *outWords, const int *frequencies) { AKLOGI("--- DUMP RESULT ---------"); - for (int i = 0; i < maxWordCounts; ++i) { - dumpWordInfo(&outWords[i * maxWordLength], maxWordLength, i, frequencies[i]); + for (int i = 0; i < MAX_RESULTS; ++i) { + dumpWordInfo(&outWords[i * MAX_WORD_LENGTH], MAX_WORD_LENGTH, i, frequencies[i]); } AKLOGI("-------------------------"); } @@ -110,23 +115,23 @@ static inline void showStackTrace() { } free(strs); } -#else +#else // __ANDROID__ #include <cassert> #define DO_ASSERT_TEST #define ASSERT(success) assert(success) #define SHOW_STACK_TRACE -#endif +#endif // __ANDROID__ -#else +#else // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) -#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) +#define DUMP_RESULT(words, frequencies) #define DUMP_WORD(word, length) #undef DO_ASSERT_TEST #define ASSERT(success) #define SHOW_STACK_TRACE #define INTS_TO_CHARS(input, length, output) -#endif +#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #ifdef FLAG_DO_PROFILE // Profiler @@ -311,7 +316,7 @@ static inline void prof_out(void) { #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 #define TWO_WORDS_CORRECTION_DEMOTION_BASE 80 #define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1 -#define ZERO_DISTANCE_PROMOTION_RATE 110 +#define ZERO_DISTANCE_PROMOTION_RATE 110.0f #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_FREQ 255 @@ -347,14 +352,8 @@ static inline void prof_out(void) { #define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100) #define MAX_DEPTH_MULTIPLIER 3 - #define FIRST_WORD_INDEX 0 -#define MAX_SPACES_INTERNAL 16 - -// TODO: Change this to MAX_WORDS, remove MAX_WORDS in Java, and stop getting it from Java -#define MAX_WORDS_INTERNAL 18 - // Max Distance between point to key #define MAX_POINT_TO_KEY_LENGTH 10000000 diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 167b36f11..2be1f4f39 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -28,22 +28,13 @@ namespace latinime { -Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int maxWordLength, - int maxWords, int maxPredictions) +Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust) : mDict(static_cast<unsigned char *>(dict)), mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)), mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), - mUnigramDictionary(new UnigramDictionary(mOffsetDict, maxWordLength, maxWords, - BinaryFormat::getFlags(mDict))), - mBigramDictionary(new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions)), - mGestureSuggest(new GestureSuggest(maxWordLength, maxWords)) { - if (DEBUG_DICT) { - if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) { - AKLOGI("Max word length (%d) is greater than %d", - maxWordLength, MAX_WORD_LENGTH_INTERNAL); - AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); - } - } + mUnigramDictionary(new UnigramDictionary(mOffsetDict, BinaryFormat::getFlags(mDict))), + mBigramDictionary(new BigramDictionary(mOffsetDict)), + mGestureSuggest(new GestureSuggest()) { } Dictionary::~Dictionary() { @@ -53,38 +44,38 @@ Dictionary::~Dictionary() { } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, - int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes, - int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, + int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, + int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, bool isGesture, bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes) const { int result = 0; if (isGesture) { DicTraverseWrapper::initDicTraverseSession( - traverseSession, this, prevWordChars, prevWordLength); - result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, - xcoordinates, ycoordinates, times, pointerIds, codes, codesSize, commitPoint, - outWords, frequencies, spaceIndices, outputTypes); + traverseSession, this, prevWordCodePoints, prevWordLength); + result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, + ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, + frequencies, spaceIndices, outputTypes); if (DEBUG_DICT) { - DUMP_RESULT(outWords, frequencies, 18 /* MAX_WORDS */, MAX_WORD_LENGTH_INTERNAL); + DUMP_RESULT(outWords, frequencies); } return result; } else { std::map<int, int> bigramMap; uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; - mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars, + mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordCodePoints, prevWordLength, &bigramMap, bigramFilter); - result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, - ycoordinates, codes, codesSize, &bigramMap, bigramFilter, - useFullEditDistance, outWords, frequencies, outputTypes); + result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, + inputCodePoints, inputSize, &bigramMap, bigramFilter, useFullEditDistance, outWords, + frequencies, outputTypes); return result; } } -int Dictionary::getBigrams(const int *word, int length, int *codes, int codesSize, +int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const { if (length <= 0) return 0; - return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, - outputTypes); + return mBigramDictionary->getBigrams(word, length, inputCodePoints, inputSize, outWords, + frequencies, outputTypes); } int Dictionary::getFrequency(const int *word, int length) const { diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 26edc4f2f..121cf058d 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -41,16 +41,15 @@ class Dictionary { const static int KIND_SHORTCUT = 7; // A shortcut const static int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input) - Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int maxWordLength, - int maxWords, int maxPredictions); + Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust); int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates, - int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize, - int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, + int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int inputSize, + int *prevWordCodePoints, int prevWordLength, int commitPoint, bool isGesture, bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes) const; - int getBigrams(const int *word, int length, int *codes, int codesSize, int *outWords, + int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getFrequency(const int *word, int length) const; diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h index 4060a7bd3..4bff80f15 100644 --- a/native/jni/src/geometry_utils.h +++ b/native/jni/src/geometry_utils.h @@ -21,8 +21,6 @@ #include "defines.h" -#define DEBUG_DECODER false - #define M_PI_F 3.14159265f #define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \ ? (floorf((f) * 10000.0f) / 10000.0f) : (f) @@ -36,19 +34,8 @@ static inline float getSquaredDistanceFloat(const float x1, const float y1, cons return SQUARE_FLOAT(x1 - x2) + SQUARE_FLOAT(y1 - y2); } -static inline float getNormalizedSquaredDistanceFloat(const float x1, const float y1, - const float x2, const float y2, const float scale) { - return getSquaredDistanceFloat(x1, y1, x2, y2) / SQUARE_FLOAT(scale); -} - -static inline float getDistanceFloat(const float x1, const float y1, const float x2, - const float y2) { - return hypotf(x1 - x2, y1 - y2); -} - static AK_FORCE_INLINE int getDistanceInt(const int x1, const int y1, const int x2, const int y2) { - return static_cast<int>(getDistanceFloat(static_cast<float>(x1), static_cast<float>(y1), - static_cast<float>(x2), static_cast<float>(y2))); + return static_cast<int>(hypotf(static_cast<float>(x1 - x2), static_cast<float>(y1 - y2))); } static AK_FORCE_INLINE float getAngle(const int x1, const int y1, const int x2, const int y2) { diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index 8ad9c77dc..9b99554d6 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -144,7 +144,7 @@ float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloatG( const float touchX = static_cast<float>(x); const float touchY = static_cast<float>(y); const float keyWidth = static_cast<float>(getMostCommonKeyWidth()); - return getNormalizedSquaredDistanceFloat(centerX, centerY, touchX, touchY, keyWidth); + return getSquaredDistanceFloat(centerX, centerY, touchX, touchY) / SQUARE_FLOAT(keyWidth); } int ProximityInfo::squaredDistanceToEdge(const int keyId, const int x, const int y) const { diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index 5362d69f0..aa029297e 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -20,6 +20,7 @@ #define LOG_TAG "LatinIME: proximity_info_state.cpp" #include "defines.h" +#include "geometry_utils.h" #include "proximity_info.h" #include "proximity_info_state.h" @@ -100,7 +101,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi mTimes.clear(); mInputIndice.clear(); mLengthCache.clear(); - mDistanceCache.clear(); + mDistanceCache_G.clear(); mNearKeysVector.clear(); mSearchKeysVector.clear(); mSpeedRates.clear(); @@ -209,7 +210,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const int keyCount = mProximityInfo->getKeyCount(); mNearKeysVector.resize(mSampledInputSize); mSearchKeysVector.resize(mSampledInputSize); - mDistanceCache.resize(mSampledInputSize * keyCount); + mDistanceCache_G.resize(mSampledInputSize * keyCount); for (int i = lastSavedInputSize; i < mSampledInputSize; ++i) { mNearKeysVector[i].reset(); mSearchKeysVector[i].reset(); @@ -220,7 +221,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const int y = mSampledInputYs[i]; const float normalizedSquaredDistance = mProximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y); - mDistanceCache[index] = normalizedSquaredDistance; + mDistanceCache_G[index] = normalizedSquaredDistance; if (normalizedSquaredDistance < NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) { mNearKeysVector[i][k] = true; } @@ -486,7 +487,7 @@ bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSiz // Assuming the cache is invalid if the previous input size is larger than the new one. return false; } - for (int i = 0; i < mSampledInputSize && i < MAX_WORD_LENGTH_INTERNAL; ++i) { + for (int i = 0; i < mSampledInputSize && i < MAX_WORD_LENGTH; ++i) { if (xCoordinates[i] != mSampledInputXs[i] || yCoordinates[i] != mSampledInputYs[i]) { return false; @@ -685,7 +686,7 @@ float ProximityInfoState::getPointToKeyLength( const int keyId = mProximityInfo->getKeyIndexOf(codePoint); if (keyId != NOT_AN_INDEX) { const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; - return min(mDistanceCache[index] * scale, mMaxPointToKeyLength); + return min(mDistanceCache_G[index] * scale, mMaxPointToKeyLength); } if (isSkippableCodePoint(codePoint)) { return 0.0f; @@ -694,7 +695,7 @@ float ProximityInfoState::getPointToKeyLength( return MAX_POINT_TO_KEY_LENGTH; } -float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint) const { +float ProximityInfoState::getPointToKeyLength_G(const int inputIndex, const int codePoint) const { return getPointToKeyLength(inputIndex, codePoint, 1.0f); } @@ -705,7 +706,7 @@ float ProximityInfoState::getPointToKeyByIdLength( const int inputIndex, const int keyId, const float scale) const { if (keyId != NOT_AN_INDEX) { const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; - return min(mDistanceCache[index] * scale, mMaxPointToKeyLength); + return min(mDistanceCache_G[index] * scale, mMaxPointToKeyLength); } // If the char is not a key on the keyboard then return the max length. return static_cast<float>(MAX_POINT_TO_KEY_LENGTH); @@ -1184,7 +1185,7 @@ float ProximityInfoState::getMostProbableString(int *const codePointBuf) const { int index = 0; float sumLogProbability = 0.0f; // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases. - for (int i = 0; i < mSampledInputSize && index < MAX_WORD_LENGTH_INTERNAL - 1; ++i) { + for (int i = 0; i < mSampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) { float minLogProbability = static_cast<float>(MAX_POINT_TO_KEY_LENGTH); int character = NOT_AN_INDEX; for (hash_map_compat<int, float>::const_iterator it = mCharProbabilities[i].begin(); diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 10e74a0a3..d747bae2a 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -24,7 +24,6 @@ #include "char_utils.h" #include "defines.h" -#include "geometry_utils.h" #include "hash_map_compat.h" namespace latinime { @@ -59,7 +58,7 @@ class ProximityInfoState { mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(), mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0), mIsContinuationPossible(false), mSampledInputXs(), mSampledInputYs(), mTimes(), - mInputIndice(), mLengthCache(), mBeelineSpeedPercentiles(), mDistanceCache(), + mInputIndice(), mLengthCache(), mBeelineSpeedPercentiles(), mDistanceCache_G(), mSpeedRates(), mDirections(), mCharProbabilities(), mNearKeysVector(), mSearchKeysVector(), mTouchPositionCorrectionEnabled(false), mSampledInputSize(0) { memset(mInputCodes, 0, sizeof(mInputCodes)); @@ -158,7 +157,7 @@ class ProximityInfoState { float getPointToKeyByIdLength(const int inputIndex, const int keyId, const float scale) const; float getPointToKeyByIdLength(const int inputIndex, const int keyId) const; float getPointToKeyLength(const int inputIndex, const int codePoint, const float scale) const; - float getPointToKeyLength(const int inputIndex, const int codePoint) const; + float getPointToKeyLength_G(const int inputIndex, const int codePoint) const; ProximityType getMatchedProximityId(const int index, const int c, const bool checkProximityChars, int *proximityIndex = 0) const; @@ -275,7 +274,7 @@ class ProximityInfoState { std::vector<int> mInputIndice; std::vector<int> mLengthCache; std::vector<int> mBeelineSpeedPercentiles; - std::vector<float> mDistanceCache; + std::vector<float> mDistanceCache_G; std::vector<float> mSpeedRates; std::vector<float> mDirections; // probabilities of skipping or mapping to a key for each point. @@ -290,10 +289,10 @@ class ProximityInfoState { // inputs including the current input point. std::vector<NearKeycodesSet> mSearchKeysVector; bool mTouchPositionCorrectionEnabled; - int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; - int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; + int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH]; + int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH]; int mSampledInputSize; - int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; + int mPrimaryInputWord[MAX_WORD_LENGTH]; }; } // namespace latinime #endif // LATINIME_PROXIMITY_INFO_STATE_H diff --git a/native/jni/src/suggest/gesture_suggest.cpp b/native/jni/src/suggest/gesture_suggest.cpp index 2a604b8ab..fce5621d5 100644 --- a/native/jni/src/suggest/gesture_suggest.cpp +++ b/native/jni/src/suggest/gesture_suggest.cpp @@ -17,7 +17,7 @@ #include "gesture_suggest.h" namespace latinime { - SuggestInterface *(*GestureSuggest::sGestureSuggestFactoryMethod)(int, int) = 0; + SuggestInterface *(*GestureSuggest::sGestureSuggestFactoryMethod)() = 0; GestureSuggest::~GestureSuggest() { delete mSuggestInterface; diff --git a/native/jni/src/suggest/gesture_suggest.h b/native/jni/src/suggest/gesture_suggest.h index e4af03fb8..82c3a69ad 100644 --- a/native/jni/src/suggest/gesture_suggest.h +++ b/native/jni/src/suggest/gesture_suggest.h @@ -26,37 +26,35 @@ class ProximityInfo; class GestureSuggest : public SuggestInterface { public: - GestureSuggest(const int maxWordLength, const int maxWords) - : mSuggestInterface(getGestureSuggestInstance(maxWordLength, maxWords)) { - } + GestureSuggest() : mSuggestInterface(getGestureSuggestInstance()) {} virtual ~GestureSuggest(); int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords, - int *frequencies, int *outputIndices, int *outputTypes) const { + int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, + int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const { if (!mSuggestInterface) { return 0; } return mSuggestInterface->getSuggestions(pInfo, traverseSession, inputXs, inputYs, times, - pointerIds, codes, inputSize, commitPoint, outWords, frequencies, outputIndices, - outputTypes); + pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, + outputIndices, outputTypes); } - static void setGestureSuggestFactoryMethod(SuggestInterface *(*factoryMethod)(int, int)) { + static void setGestureSuggestFactoryMethod(SuggestInterface *(*factoryMethod)()) { sGestureSuggestFactoryMethod = factoryMethod; } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(GestureSuggest); - static SuggestInterface *getGestureSuggestInstance(int maxWordLength, int maxWords) { + DISALLOW_COPY_AND_ASSIGN(GestureSuggest); + static SuggestInterface *getGestureSuggestInstance() { if (!sGestureSuggestFactoryMethod) { return 0; } - return sGestureSuggestFactoryMethod(maxWordLength, maxWords); + return sGestureSuggestFactoryMethod(); } - static SuggestInterface *(*sGestureSuggestFactoryMethod)(int, int); + static SuggestInterface *(*sGestureSuggestFactoryMethod)(); SuggestInterface *mSuggestInterface; }; } // namespace latinime diff --git a/native/jni/src/suggest/suggest_interface.h b/native/jni/src/suggest/suggest_interface.h index 0fb54266c..0bb85d7e5 100644 --- a/native/jni/src/suggest/suggest_interface.h +++ b/native/jni/src/suggest/suggest_interface.h @@ -26,8 +26,9 @@ class ProximityInfo; class SuggestInterface { public: virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, - int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0; + int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize, + int commitPoint, int *outWords, int *frequencies, int *outputIndices, + int *outputTypes) const = 0; SuggestInterface() {} virtual ~SuggestInterface() {} private: diff --git a/native/jni/src/suggest/typing_suggest.cpp b/native/jni/src/suggest/typing_suggest.cpp index 40d4a98b0..56bd5b69a 100644 --- a/native/jni/src/suggest/typing_suggest.cpp +++ b/native/jni/src/suggest/typing_suggest.cpp @@ -17,7 +17,7 @@ #include "typing_suggest.h" namespace latinime { - SuggestInterface *(*TypingSuggest::sTypingSuggestFactoryMethod)(int, int) = 0; + SuggestInterface *(*TypingSuggest::sTypingSuggestFactoryMethod)() = 0; TypingSuggest::~TypingSuggest() { delete mSuggestInterface; diff --git a/native/jni/src/suggest/typing_suggest.h b/native/jni/src/suggest/typing_suggest.h index 9de4158f5..678037aa2 100644 --- a/native/jni/src/suggest/typing_suggest.h +++ b/native/jni/src/suggest/typing_suggest.h @@ -26,37 +26,35 @@ class ProximityInfo; class TypingSuggest : public SuggestInterface { public: - TypingSuggest(const int maxWordLength, const int maxWords) - : mSuggestInterface(getTypingSuggestInstance(maxWordLength, maxWords)) { - } + TypingSuggest() : mSuggestInterface(getTypingSuggestInstance()) {} virtual ~TypingSuggest(); int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords, - int *frequencies, int *outputIndices, int *outputTypes) const { + int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, + int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const { if (!mSuggestInterface) { return 0; } return mSuggestInterface->getSuggestions(pInfo, traverseSession, inputXs, inputYs, times, - pointerIds, codes, inputSize, commitPoint, outWords, frequencies, outputIndices, - outputTypes); + pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, + outputIndices, outputTypes); } - static void setTypingSuggestFactoryMethod(SuggestInterface *(*factoryMethod)(int, int)) { + static void setTypingSuggestFactoryMethod(SuggestInterface *(*factoryMethod)()) { sTypingSuggestFactoryMethod = factoryMethod; } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TypingSuggest); - static SuggestInterface *getTypingSuggestInstance(int maxWordLength, int maxWords) { + DISALLOW_COPY_AND_ASSIGN(TypingSuggest); + static SuggestInterface *getTypingSuggestInstance() { if (!sTypingSuggestFactoryMethod) { return 0; } - return sTypingSuggestFactoryMethod(maxWordLength, maxWords); + return sTypingSuggestFactoryMethod(); } - static SuggestInterface *(*sTypingSuggestFactoryMethod)(int, int); + static SuggestInterface *(*sTypingSuggestFactoryMethod)(); SuggestInterface *mSuggestInterface; }; } // namespace latinime diff --git a/native/jni/src/suggest_utils.h b/native/jni/src/suggest_utils.h new file mode 100644 index 000000000..42cc5dea0 --- /dev/null +++ b/native/jni/src/suggest_utils.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SUGGEST_UTILS_H +#define LATINIME_SUGGEST_UTILS_H + +#include "defines.h" +#include "proximity_info_state.h" + +namespace latinime { +class SuggestUtils { + public: + static float getDistanceScalingFactor(const float normalizedSquaredDistance) { + if (normalizedSquaredDistance < 0.0f) { + return -1.0f; + } + // Promote or demote the score according to the distance from the sweet spot + static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; + static const float B = 1.0f; + static const float C = 0.5f; + static const float MIN = 0.3f; + static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; + static const float R2 = HALF_SCORE_SQUARED_RADIUS; + const float x = normalizedSquaredDistance / static_cast<float>( + ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); + const float factor = max((x < R1) + ? (A * (R1 - x) + B * x) / R1 + : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); + // factor is a piecewise linear function like: + // A -_ . + // ^-_ . + // B \ . + // \_ . + // C ------------. + // . + // 0 R1 R2 . + return factor; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestUtils); +}; +} // namespace latinime +#endif // LATINIME_SUGGEST_UTILS_H diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index 6c2e0dce1..a8cc03b8d 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -30,10 +30,6 @@ namespace latinime { class TerminalAttributes { public: class ShortcutIterator { - const uint8_t *const mDict; - int mPos; - bool mHasNextShortcutTarget; - public: ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags) : mDict(dict), mPos(pos), @@ -50,7 +46,7 @@ class TerminalAttributes { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); unsigned int i; - for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { + for (i = 0; i < MAX_WORD_LENGTH; ++i) { const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos); if (NOT_A_CODE_POINT == codePoint) break; outWord[i] = codePoint; @@ -58,6 +54,11 @@ class TerminalAttributes { *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); return i; } + + private: + const uint8_t *const mDict; + int mPos; + bool mHasNextShortcutTarget; }; TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 0a144253a..0b18e78a3 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -40,10 +40,9 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[ { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE // TODO: check the header -UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int maxWordLength, - int maxWords, const unsigned int flags) - : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), - ROOT_POS(0), MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) { +UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags) + : DICT_ROOT(streamStart), ROOT_POS(0), + MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) { if (DEBUG_DICT) { AKLOGI("UnigramDictionary - constructor"); } @@ -52,21 +51,17 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int maxWo UnigramDictionary::~UnigramDictionary() { } -static inline int getCodesBufferSize(const int *codes, const int codesSize) { - return sizeof(*codes) * codesSize; -} - // TODO: This needs to take a const int* and not tinker with its contents static void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, int type) { queue->push(frequency, word, length, type); } // Return the replacement code point for a digraph, or 0 if none. -int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int codesSize, +int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize, const digraph_t *const digraphs, const unsigned int digraphsSize) const { // There can't be a digraph if we don't have at least 2 characters to examine - if (i + 2 > codesSize) return false; + if (i + 2 > inputSize) return false; // Search for the first char of some digraph int lastDigraphIndex = -1; @@ -87,7 +82,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons // Mostly the same arguments as the non-recursive version, except: // codes is the original value. It points to the start of the work buffer, and gets passed as is. -// codesSize is the size of the user input (thus, it is the size of codesSrc). +// inputSize is the size of the user input (thus, it is the size of codesSrc). // codesDest is the current point in the work buffer. // codesSrc is the current point in the user-input, original, content-unmodified buffer. // codesRemain is the remaining size in codesSrc. @@ -167,49 +162,49 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter // in bigram_dictionary.cpp int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, + const int *ycoordinates, const int *inputCodePoints, const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const { - WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH); + WordsPriorityQueuePool queuePool(MAX_RESULTS, SUB_QUEUE_MAX_WORDS); queuePool.clearAll(); Correction masterCorrection; masterCorrection.resetCorrection(); if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS) { // Incrementally tune the word and try all possibilities - int codesBuffer[getCodesBufferSize(codes, codesSize)]; - int xCoordinatesBuffer[codesSize]; - int yCoordinatesBuffer[codesSize]; + int codesBuffer[sizeof(*inputCodePoints) * inputSize]; + int xCoordinatesBuffer[inputSize]; + int yCoordinatesBuffer[inputSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, - useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection, + xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter, + useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection, &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS)); } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { - int codesBuffer[getCodesBufferSize(codes, codesSize)]; - int xCoordinatesBuffer[codesSize]; - int yCoordinatesBuffer[codesSize]; + int codesBuffer[sizeof(*inputCodePoints) * inputSize]; + int xCoordinatesBuffer[inputSize]; + int yCoordinatesBuffer[inputSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, - useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection, + xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter, + useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection, &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS)); } else { // Normal processing - getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, + getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize, bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool); } PROF_START(20); if (DEBUG_DICT) { float ns = queuePool.getMasterQueue()->getHighestNormalizedScore( - masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0); + masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0); ns += 0; AKLOGI("Max normalized score = %f", ns); } const int suggestedWordsCount = queuePool.getMasterQueue()->outputSuggestions(masterCorrection.getPrimaryInputWord(), - codesSize, frequencies, outWords, outputTypes); + inputSize, frequencies, outWords, outputTypes); if (DEBUG_DICT) { float ns = queuePool.getMasterQueue()->getHighestNormalizedScore( - masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0); + masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0); ns += 0; AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words @@ -227,7 +222,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x } void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int inputSize, + const int *ycoordinates, const int *inputCodePoints, const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const { @@ -236,8 +231,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const i PROF_END(0); PROF_START(1); - getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter, - useFullEditDistance, inputSize, correction, queuePool); + getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, bigramMap, + bigramFilter, useFullEditDistance, inputSize, correction, queuePool); PROF_END(1); PROF_START(2); @@ -262,7 +257,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const i // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS && inputSize >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { - getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, + getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate); } @@ -398,10 +393,10 @@ void UnigramDictionary::onTerminal(const int probability, // so that the insert order is protected inside the queue for words // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. - int shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + int shortcutTarget[MAX_WORD_LENGTH]; int shortcutFrequency; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( - MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); + MAX_WORD_LENGTH, shortcutTarget, &shortcutFrequency); int shortcutScore; int kind; if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY @@ -487,7 +482,7 @@ int UnigramDictionary::getSubStringSuggestion( initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction); - int word[MAX_WORD_LENGTH_INTERNAL]; + int word[MAX_WORD_LENGTH]; int freq = getMostFrequentWordLike( inputWordStartPos, inputWordLength, correction, word); if (freq > 0) { @@ -761,13 +756,13 @@ static inline void onTerminalWordLike(const int freq, int *newWord, const int le // that is, everything that only differs by case/accents. int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize, int *outWord) const { - int newWord[MAX_WORD_LENGTH_INTERNAL]; + int newWord[MAX_WORD_LENGTH]; int depth = 0; int maxFreq = -1; const uint8_t *const root = DICT_ROOT; - int stackChildCount[MAX_WORD_LENGTH_INTERNAL]; - int stackInputIndex[MAX_WORD_LENGTH_INTERNAL]; - int stackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; + int stackChildCount[MAX_WORD_LENGTH]; + int stackInputIndex[MAX_WORD_LENGTH]; + int stackSiblingPos[MAX_WORD_LENGTH]; int startPos = 0; stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos); diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index f5850b4f4..502bf4790 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -39,12 +39,11 @@ class UnigramDictionary { static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0; static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1; static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; - UnigramDictionary(const uint8_t *const streamStart, int maxWordLength, int maxWords, - const unsigned int flags); + UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags); int getFrequency(const int *const inWord, const int length) const; int getBigramPosition(int pos, int *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, + const int *ycoordinates, const int *inputCodePoints, const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const; @@ -53,11 +52,11 @@ class UnigramDictionary { private: DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary); void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int inputSize, + const int *ycoordinates, const int *inputCodePoints, const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) const; - int getDigraphReplacement(const int *codes, const int i, const int codesSize, + int getDigraphReplacement(const int *codes, const int i, const int inputSize, const digraph_t *const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, @@ -67,7 +66,7 @@ class UnigramDictionary { WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, const unsigned int digraphsSize) const; void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, + const int *ycoordinates, const int *codes, const int inputSize, Correction *correction) const; void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, @@ -109,8 +108,6 @@ class UnigramDictionary { int *outputWord) const; const uint8_t *const DICT_ROOT; - const int MAX_WORD_LENGTH; - const int MAX_WORDS; const int ROOT_POS; const int MAX_DIGRAPH_SEARCH_DEPTH; const int FLAGS; diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index 84b4b484f..7aab1e083 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -27,10 +27,9 @@ namespace latinime { class WordsPriorityQueue { public: - class SuggestedWord { - public: + struct SuggestedWord { int mScore; - int mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWord[MAX_WORD_LENGTH]; int mWordLength; bool mUsed; int mType; @@ -44,11 +43,10 @@ class WordsPriorityQueue { } }; - WordsPriorityQueue(int maxWords, int maxWordLength) - : mSuggestions(), MAX_WORDS(static_cast<unsigned int>(maxWords)), - MAX_WORD_LENGTH(static_cast<unsigned int>(maxWordLength)), - mSuggestedWords(new SuggestedWord[maxWordLength]), mHighestSuggestedWord(0) { - for (int i = 0; i < maxWordLength; ++i) { + WordsPriorityQueue(int maxWords) + : mSuggestions(), MAX_WORDS(maxWords), + mSuggestedWords(new SuggestedWord[MAX_WORD_LENGTH]), mHighestSuggestedWord(0) { + for (int i = 0; i < MAX_WORD_LENGTH; ++i) { mSuggestedWords[i].mUsed = false; } } @@ -171,7 +169,6 @@ class WordsPriorityQueue { wordComparator> Suggestions; Suggestions mSuggestions; const int MAX_WORDS; - const int MAX_WORD_LENGTH; SuggestedWord *mSuggestedWords; SuggestedWord *mHighestSuggestedWord; }; diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h index f7c08fb52..cfe7ede63 100644 --- a/native/jni/src/words_priority_queue_pool.h +++ b/native/jni/src/words_priority_queue_pool.h @@ -24,15 +24,14 @@ namespace latinime { class WordsPriorityQueuePool { public: - WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) + WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords) // Note: using placement new() requires the caller to call the destructor explicitly. - : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue( - mainQueueMaxWords, maxWordLength)) { + : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords)) { for (int i = 0, subQueueBufOffset = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) { mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset) - WordsPriorityQueue(subQueueMaxWords, maxWordLength); + WordsPriorityQueue(subQueueMaxWords); } } |