diff options
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/basechars.cpp (renamed from native/src/basechars.h) | 12 | ||||
-rw-r--r-- | native/src/bigram_dictionary.h | 4 | ||||
-rw-r--r-- | native/src/binary_format.h | 18 | ||||
-rw-r--r-- | native/src/char_utils.h | 39 | ||||
-rw-r--r-- | native/src/correction.cpp | 211 | ||||
-rw-r--r-- | native/src/correction.h | 35 | ||||
-rw-r--r-- | native/src/defines.h | 10 | ||||
-rw-r--r-- | native/src/dictionary.cpp | 5 | ||||
-rw-r--r-- | native/src/dictionary.h | 27 | ||||
-rw-r--r-- | native/src/proximity_info.cpp | 18 | ||||
-rw-r--r-- | native/src/proximity_info.h | 6 | ||||
-rw-r--r-- | native/src/terminal_attributes.h | 78 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 300 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 87 | ||||
-rw-r--r-- | native/src/words_priority_queue.h | 157 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 54 |
16 files changed, 755 insertions, 306 deletions
diff --git a/native/src/basechars.h b/native/src/basechars.cpp index 3843e11c5..31f1e18a8 100644 --- a/native/src/basechars.h +++ b/native/src/basechars.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 The Android Open Source Project + * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,9 @@ * limitations under the License. */ -#ifndef LATINIME_BASECHARS_H -#define LATINIME_BASECHARS_H +#include "char_utils.h" + +namespace latinime { /** * Table mapping most combined Latin, Greek, and Cyrillic characters @@ -23,7 +24,7 @@ * if c is not a combined character, or the base character if it * is combined. */ -static unsigned short BASE_CHARS[] = { +const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, @@ -189,4 +190,5 @@ static unsigned short BASE_CHARS[] = { // generated with: // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' -#endif // LATINIME_BASECHARS_H + +} // namespace latinime diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h index c07458a38..585a1866a 100644 --- a/native/src/bigram_dictionary.h +++ b/native/src/bigram_dictionary.h @@ -21,14 +21,14 @@ namespace latinime { class Dictionary; class BigramDictionary { -public: + public: BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives, const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary); int getBigrams(unsigned short *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams, int maxAlternatives); ~BigramDictionary(); -private: + private: bool addWordBigram(unsigned short *word, int length, int frequency); int getBigramAddress(int *pos, bool advance); int getBigramFreq(int *pos); diff --git a/native/src/binary_format.h b/native/src/binary_format.h index 6f65088db..9944fa2bd 100644 --- a/native/src/binary_format.h +++ b/native/src/binary_format.h @@ -22,12 +22,12 @@ namespace latinime { class BinaryFormat { -private: + private: const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; -public: + public: const static int UNKNOWN_FORMAT = -1; const static int FORMAT_VERSION_1 = 1; const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1; @@ -145,15 +145,15 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) { - // This function skips all attributes. The format makes provision for future extension - // with other attributes (notably shortcuts) but for the time being, bigrams are the - // only attributes that may be found in a character group, so we only look at bigrams - // in this version. + // This function skips all attributes: shortcuts and bigrams. + int newPos = pos; + if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) { + newPos = skipAttributes(dict, newPos); + } if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) { - return skipAttributes(dict, pos); - } else { - return pos; + newPos = skipAttributes(dict, newPos); } + return newPos; } inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict, diff --git a/native/src/char_utils.h b/native/src/char_utils.h index a69a35e7a..607dc5195 100644 --- a/native/src/char_utils.h +++ b/native/src/char_utils.h @@ -19,8 +19,47 @@ namespace latinime { +inline static int isAsciiUpper(unsigned short c) { + return c >= 'A' && c <= 'Z'; +} + +inline static unsigned short toAsciiLower(unsigned short c) { + return c - 'A' + 'a'; +} + +inline static int isAscii(unsigned short c) { + return c <= 127; +} + unsigned short latin_tolower(unsigned short c); +/** + * Table mapping most combined Latin, Greek, and Cyrillic characters + * to their base characters. If c is in range, BASE_CHARS[c] == c + * if c is not a combined character, or the base character if it + * is combined. + */ + +static const int BASE_CHARS_SIZE = 0x0500; +extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; + +inline static unsigned short toBaseChar(unsigned short c) { + if (c < BASE_CHARS_SIZE) { + return BASE_CHARS[c]; + } + return c; +} + +inline static unsigned short toBaseLowerCase(unsigned short c) { + c = toBaseChar(c); + if (isAsciiUpper(c)) { + return toAsciiLower(c); + } else if (isAscii(c)) { + return c; + } + return latin_tolower(c); +} + } // namespace latinime #endif // LATINIME_CHAR_UTILS_H diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 27dc40745..dc31bfae7 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -16,11 +16,13 @@ #include <assert.h> #include <ctype.h> +#include <math.h> #include <stdio.h> #include <string.h> #define LOG_TAG "LatinIME: correction.cpp" +#include "char_utils.h" #include "correction.h" #include "dictionary.h" #include "proximity_info.h" @@ -31,73 +33,49 @@ namespace latinime { // edit distance funcitons // ///////////////////////////// -#if 0 /* no longer used */ -inline static int editDistance( - int* editDistanceTable, const unsigned short* input, - const int inputLength, const unsigned short* output, const int outputLength) { - // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int* dp = editDistanceTable; - const int li = inputLength + 1; - const int lo = outputLength + 1; - for (int i = 0; i < li; ++i) { - dp[lo * i] = i; - } - for (int i = 0; i < lo; ++i) { - dp[i] = i; - } - - for (int i = 0; i < li - 1; ++i) { - for (int j = 0; j < lo - 1; ++j) { - const uint32_t ci = Dictionary::toBaseLowerCase(input[i]); - const uint32_t co = Dictionary::toBaseLowerCase(output[j]); - const uint16_t cost = (ci == co) ? 0 : 1; - dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, - min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); - if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1]) - && co == Dictionary::toBaseLowerCase(input[i - 1])) { - dp[(i + 1) * lo + (j + 1)] = min( - dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); - } - } +inline static void initEditDistance(int *editDistanceTable) { + for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { + editDistanceTable[i] = i; } +} - if (DEBUG_EDIT_DISTANCE) { - LOGI("IN = %d, OUT = %d", inputLength, outputLength); - for (int i = 0; i < li; ++i) { - for (int j = 0; j < lo; ++j) { - LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); +inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength, + const int outputLength) { + if (DEBUG_DICT) { + LOGI("EditDistanceTable"); + for (int i = 0; i <= 10; ++i) { + int c[11]; + for (int j = 0; j <= 10; ++j) { + if (j < inputLength + 1 && i < outputLength + 1) { + c[j] = (editDistanceTable + i * (inputLength + 1))[j]; + } else { + c[j] = -1; + } } + LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); } } - return dp[li * lo - 1]; -} -#endif - -inline static void initEditDistance(int *editDistanceTable) { - for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { - editDistanceTable[i] = i; - } } inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, const int inputLength, const unsigned short *output, const int outputLength) { + // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j]. // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated, // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength]. int *const current = editDistanceTable + outputLength * (inputLength + 1); const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1); const int *const prevprev = - outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : NULL; + outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0; current[0] = outputLength; - const uint32_t co = Dictionary::toBaseLowerCase(output[outputLength - 1]); - const uint32_t prevCO = - outputLength >= 2 ? Dictionary::toBaseLowerCase(output[outputLength - 2]) : 0; + const uint32_t co = toBaseLowerCase(output[outputLength - 1]); + const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; for (int i = 1; i <= inputLength; ++i) { - const uint32_t ci = Dictionary::toBaseLowerCase(input[i - 1]); + const uint32_t ci = toBaseLowerCase(input[i - 1]); const uint16_t cost = (ci == co) ? 0 : 1; current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); - if (i >= 2 && prevprev && ci == prevCO - && co == Dictionary::toBaseLowerCase(input[i - 2])) { + if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { current[i] = min(current[i], prevprev[i - 2] + 1); } } @@ -105,6 +83,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne inline static int getCurrentEditDistance( int *editDistanceTable, const int inputLength, const int outputLength) { + if (DEBUG_DICT) { + LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); + } return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; } @@ -133,6 +114,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, mInputLength = inputLength; mMaxDepth = maxDepth; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; + // TODO: This is not supposed to be required. Check what's going wrong with + // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] + initEditDistance(mEditDistanceTable); } void Correction::initCorrectionState( @@ -146,7 +130,7 @@ void Correction::initCorrectionState( void Correction::setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos, - const bool useFullEditDistance) { + const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) { // TODO: remove mTransposedPos = transposedPos; mExcessivePos = excessivePos; @@ -159,6 +143,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos, mSpaceProximityPos = spaceProximityPos; mMissingSpacePos = missingSpacePos; mUseFullEditDistance = useFullEditDistance; + mDoAutoCompletion = doAutoCompletion; + mMaxErrors = maxErrors; } void Correction::checkState() { @@ -280,7 +266,9 @@ void Correction::startToTraverseAllNodes() { bool Correction::needsToPrune() const { // TODO: use edit distance here - return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance; + return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance + // Allow one char longer word for missing character + || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength)); } void Correction::addCharToCurrentWord(const int32_t c) { @@ -312,12 +300,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) { Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); + if (correctionCount > mMaxErrors) { + return UNRELATED; + } + // TODO: Change the limit if we'll allow two or more corrections const bool noCorrectionsHappenedSoFar = correctionCount == 0; const bool canTryCorrection = noCorrectionsHappenedSoFar; int proximityIndex = 0; mDistances[mOutputIndex] = NOT_A_DISTANCE; + // Skip checking this node if (mNeedsToTraverseAllNodes || isQuote(c)) { bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { @@ -342,6 +335,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( return processSkipChar(c, isTerminal, incremented); } + // Check possible corrections. if (mExcessivePos >= 0) { if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { mExcessivePos = mOutputIndex; @@ -392,7 +386,12 @@ Correction::CorrectionType Correction::processCharAndCalcState( } // TODO: Change the limit if we'll allow two or more proximity chars with corrections - const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0; + // Work around: When the mMaxErrors is 1, we only allow just one error + // including proximity correction. + const bool checkProximityChars = (mMaxErrors > 1) + ? (noCorrectionsHappenedSoFar || mProximityCount == 0) + : (noCorrectionsHappenedSoFar && mProximityCount == 0); + ProximityInfo::ProximityType matchedProximityCharId = secondTransposing ? ProximityInfo::EQUIVALENT_CHAR : mProximityInfo->getMatchedProximityId( @@ -607,13 +606,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) { } inline static bool isUpperCase(unsigned short c) { - if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { - c = BASE_CHARS[c]; - } - if (isupper(c)) { - return true; - } - return false; + return isAsciiUpper(toBaseChar(c)); } ////////////////////// @@ -654,6 +647,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // TODO: Calculate edit distance for transposed and excessive int ed = 0; + if (DEBUG_DICT_FULL) { + dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1); + } int adjustedProximityMatchedCount = proximityMatchedCount; int finalFreq = freq; @@ -938,4 +934,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( return totalFreq; } +/* Damerau-Levenshtein distance */ +inline static int editDistanceInternal( + int* editDistanceTable, const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength) { + // dp[li][lo] dp[a][b] = dp[ a * lo + b] + int* dp = editDistanceTable; + const int li = beforeLength + 1; + const int lo = afterLength + 1; + for (int i = 0; i < li; ++i) { + dp[lo * i] = i; + } + for (int i = 0; i < lo; ++i) { + dp[i] = i; + } + + for (int i = 0; i < li - 1; ++i) { + for (int j = 0; j < lo - 1; ++j) { + const uint32_t ci = toBaseLowerCase(before[i]); + const uint32_t co = toBaseLowerCase(after[j]); + const uint16_t cost = (ci == co) ? 0 : 1; + dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, + min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); + if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) + && co == toBaseLowerCase(before[i - 1])) { + dp[(i + 1) * lo + (j + 1)] = min( + dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); + } + } + } + + if (DEBUG_EDIT_DISTANCE) { + LOGI("IN = %d, OUT = %d", beforeLength, afterLength); + for (int i = 0; i < li; ++i) { + for (int j = 0; j < lo; ++j) { + LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); + } + } + } + return dp[li * lo - 1]; +} + +int Correction::RankingAlgorithm::editDistance(const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength) { + int table[(beforeLength + 1) * (afterLength + 1)]; + return editDistanceInternal(table, before, beforeLength, after, afterLength); +} + + +// In dictionary.cpp, getSuggestion() method, +// suggestion scores are computed using the below formula. +// original score +// := pow(mTypedLetterMultiplier (this is defined 2), +// (the number of matched characters between typed word and suggested word)) +// * (individual word's score which defined in the unigram dictionary, +// and this score is defined in range [0, 255].) +// Then, the following processing is applied. +// - If the dictionary word is matched up to the point of the user entry +// (full match up to min(before.length(), after.length()) +// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) +// - If the word is a true full match except for differences in accents or +// capitalization, then treat it as if the score was 255. +// - If before.length() == after.length() +// => multiply by mFullWordMultiplier (this is defined 2)) +// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2 +// For historical reasons we ignore the 1.2 modifier (because the measure for a good +// autocorrection threshold was done at a time when it didn't exist). This doesn't change +// the result. +// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. + +/* static */ +double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength, + const int score) { + if (0 == beforeLength || 0 == afterLength) { + return 0; + } + const int distance = editDistance(before, beforeLength, after, afterLength); + int spaceCount = 0; + for (int i = 0; i < afterLength; ++i) { + if (after[i] == CODE_SPACE) { + ++spaceCount; + } + } + + if (spaceCount == afterLength) { + return 0; + } + + const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE + * pow((double)TYPED_LETTER_MULTIPLIER, + (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; + + // add a weight based on edit distance. + // distance <= max(afterLength, beforeLength) == afterLength, + // so, 0 <= distance / afterLength <= 1 + const double weight = 1.0 - (double) distance / afterLength; + return (score / maxScore) * weight; +} + } // namespace latinime diff --git a/native/src/correction.h b/native/src/correction.h index d4e99f0ce..4012e7e82 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -27,8 +27,7 @@ namespace latinime { class ProximityInfo; class Correction { - -public: + public: typedef enum { TRAVERSE_ALL_ON_TERMINAL, TRAVERSE_ALL_NOT_ON_TERMINAL, @@ -44,7 +43,8 @@ public: // TODO: remove void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance); + const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, + const bool doAutoCompletion, const int maxErrors); void checkState(); bool initProcessState(const int index); @@ -94,7 +94,25 @@ public: inline int getTreeParentIndex(const int index) const { return mCorrectionStates[index].mParentIndex; } -private: + + class RankingAlgorithm { + public: + static int calculateFinalFreq(const int inputIndex, const int depth, + const int freq, int *editDistanceTable, const Correction* correction); + static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, + const Correction* correction, const unsigned short *word); + static double calcNormalizedScore(const unsigned short* before, const int beforeLength, + const unsigned short* after, const int afterLength, const int score); + static int editDistance(const unsigned short* before, + const int beforeLength, const unsigned short* after, const int afterLength); + private: + static const int CODE_SPACE = ' '; + static const int MAX_INITIAL_SCORE = 255; + static const int TYPED_LETTER_MULTIPLIER = 2; + static const int FULL_WORD_MULTIPLIER = 2; + }; + + private: inline void incrementInputIndex(); inline void incrementOutputIndex(); inline bool needsToTraverseAllNodes(); @@ -109,6 +127,7 @@ private: const ProximityInfo *mProximityInfo; bool mUseFullEditDistance; + bool mDoAutoCompletion; int mMaxEditDistance; int mMaxDepth; int mInputLength; @@ -116,6 +135,7 @@ private: int mMissingSpacePos; int mTerminalInputIndex; int mTerminalOutputIndex; + int mMaxErrors; // The following arrays are state buffer. unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; @@ -150,13 +170,6 @@ private: bool mTransposing; bool mSkipping; - class RankingAlgorithm { - public: - static int calculateFinalFreq(const int inputIndex, const int depth, - const int freq, int *editDistanceTable, const Correction* correction); - static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, - const Correction* correction, const unsigned short *word); - }; }; } // namespace latinime #endif // LATINIME_CORRECTION_H diff --git a/native/src/defines.h b/native/src/defines.h index ef1beb92f..d636e5197 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -98,9 +98,10 @@ static void prof_out(void) { #define DEBUG_SHOW_FOUND_WORD false #define DEBUG_NODE DEBUG_DICT_FULL #define DEBUG_TRACE DEBUG_DICT_FULL -#define DEBUG_PROXIMITY_INFO true +#define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ true +#define DEBUG_WORDS_PRIORITY_QUEUE true #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) @@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) { #define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false +#define DEBUG_WORDS_PRIORITY_QUEUE false #define DUMP_WORD(word, length) @@ -196,10 +198,14 @@ static void dumpWord(const unsigned short* word, const int length) { #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f -// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java +// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. #define MAX_WORD_LENGTH_INTERNAL 48 +// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used +// for better performance. +#define SUB_QUEUE_MAX_WORDS 5 + #define MAX_DEPTH_MULTIPLIER 3 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index a49769bdb..e3673d425 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -38,6 +38,9 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); } } + mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); + mWordsPriorityQueuePool = new WordsPriorityQueuePool( + maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength); mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives, @@ -45,6 +48,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, } Dictionary::~Dictionary() { + delete mCorrection; + delete mWordsPriorityQueuePool; delete mUnigramDictionary; delete mBigramDictionary; } diff --git a/native/src/dictionary.h b/native/src/dictionary.h index d5de0083a..79d377a4f 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -17,22 +17,25 @@ #ifndef LATINIME_DICTIONARY_H #define LATINIME_DICTIONARY_H -#include "basechars.h" #include "bigram_dictionary.h" #include "char_utils.h" +#include "correction.h" #include "defines.h" #include "proximity_info.h" #include "unigram_dictionary.h" +#include "words_priority_queue_pool.h" namespace latinime { class Dictionary { -public: + public: Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives); + int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) { - return mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, + return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, + mCorrection, xcoordinates, ycoordinates, codes, codesSize, flags, outWords, frequencies); } @@ -63,9 +66,8 @@ public: static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion, const int pos, unsigned short *c, int *childrenPosition, bool *terminal, int *freq); - static inline unsigned short toBaseLowerCase(unsigned short c); -private: + private: bool hasBigram(); const unsigned char *mDict; @@ -79,6 +81,8 @@ private: const bool IS_LATEST_DICT_VERSION; UnigramDictionary *mUnigramDictionary; BigramDictionary *mBigramDictionary; + WordsPriorityQueuePool *mWordsPriorityQueuePool; + Correction *mCorrection; }; // public static utility methods @@ -156,19 +160,6 @@ inline int Dictionary::setDictionaryValues(const unsigned char *dict, return position; } - -inline unsigned short Dictionary::toBaseLowerCase(unsigned short c) { - if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { - c = BASE_CHARS[c]; - } - if (c >='A' && c <= 'Z') { - c |= 32; - } else if (c > 127) { - c = latin_tolower(c); - } - return c; -} - } // namespace latinime #endif // LATINIME_DICTIONARY_H diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp index 763a3a174..95e35263b 100644 --- a/native/src/proximity_info.cpp +++ b/native/src/proximity_info.cpp @@ -47,7 +47,7 @@ ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboard HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates && keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs && sweetSpotCenterYs && sweetSpotRadii), - mInputXCoordinates(NULL), mInputYCoordinates(NULL), + mInputXCoordinates(0), mInputYCoordinates(0), mTouchPositionCorrectionEnabled(false) { const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE; mProximityCharsArray = new uint32_t[proximityGridLength]; @@ -100,9 +100,17 @@ inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) } bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { + if (x < 0 || y < 0) { + if (DEBUG_DICT) { + LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y); + assert(false); + } + return false; + } + const int startIndex = getStartIndexFromCoordinates(x, y); if (DEBUG_PROXIMITY_INFO) { - LOGI("hasSpaceProximity: index %d", startIndex); + LOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y); } for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) { if (DEBUG_PROXIMITY_INFO) { @@ -167,7 +175,7 @@ int ProximityInfo::getKeyIndex(const int c) const { // We do not have the coordinate data return NOT_A_INDEX; } - const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c); + const unsigned short baseLowerC = toBaseLowerCase(c); if (baseLowerC > MAX_CHAR_CODE) { return NOT_A_INDEX; } @@ -232,7 +240,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde const unsigned short c, const bool checkProximityChars, int *proximityIndex) const { const int *currentChars = getProximityCharsAt(index); const int firstChar = currentChars[0]; - const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c); + const unsigned short baseLowerC = toBaseLowerCase(c); // The first char in the array is what user typed. If it matches right away, // that means the user typed that same char for this pos. @@ -245,7 +253,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde // If the non-accented, lowercased version of that first character matches c, // then we have a non-accented version of the accented character the user // typed. Treat it as a close char. - if (Dictionary::toBaseLowerCase(firstChar) == baseLowerC) + if (toBaseLowerCase(firstChar) == baseLowerC) return NEAR_PROXIMITY_CHAR; // Not an exact nor an accent-alike match: search the list of close keys diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h index 35e354c6e..9ca5505a7 100644 --- a/native/src/proximity_info.h +++ b/native/src/proximity_info.h @@ -26,7 +26,7 @@ namespace latinime { class Correction; class ProximityInfo { -public: + public: static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; @@ -56,7 +56,7 @@ public: bool existsCharInProximityAt(const int index, const int c) const; bool existsAdjacentProximityChars(const int index) const; ProximityType getMatchedProximityId(const int index, const unsigned short c, - const bool checkProximityChars, int *proximityIndex = NULL) const; + const bool checkProximityChars, int *proximityIndex = 0) const; int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const { return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex]; } @@ -68,7 +68,7 @@ public: return mTouchPositionCorrectionEnabled; } -private: + private: // The max number of the keys in one keyboard layout static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64; // The upper limit of the char code in mCodeToKeyIndex diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h new file mode 100644 index 000000000..1f9815936 --- /dev/null +++ b/native/src/terminal_attributes.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TERMINAL_ATTRIBUTES_H +#define LATINIME_TERMINAL_ATTRIBUTES_H + +#include "unigram_dictionary.h" + +namespace latinime { + +/** + * This class encapsulates information about a terminal that allows to + * retrieve local node attributes like the list of shortcuts without + * exposing the format structure to the client. + */ +class TerminalAttributes { + public: + class ShortcutIterator { + const uint8_t* const mDict; + bool mHasNextShortcutTarget; + int mPos; + + public: + ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict), + mPos(pos) { + mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS)); + } + + inline bool hasNextShortcutTarget() const { + return mHasNextShortcutTarget; + } + + // Gets the shortcut target itself as a uint16_t string. For parameters and return value + // see BinaryFormat::getWordAtAddress. + inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) { + const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); + mHasNextShortcutTarget = + 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT); + int shortcutAddress = + BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos); + return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord); + } + }; + + private: + const uint8_t* const mDict; + const uint8_t mFlags; + const int mStartPos; + + public: + TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) : + mDict(dict), mFlags(flags), mStartPos(pos) { + } + + inline bool isShortcutOnly() const { + return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY); + } + + inline ShortcutIterator getShortcutIterator() const { + return ShortcutIterator(mDict, mStartPos, mFlags); + } +}; +} // namespace latinime + +#endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 8eb5a9700..e95e03ce5 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -25,6 +25,7 @@ #include "unigram_dictionary.h" #include "binary_format.h" +#include "terminal_attributes.h" namespace latinime { @@ -48,19 +49,23 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed if (DEBUG_DICT) { LOGI("UnigramDictionary - constructor"); } - mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); } UnigramDictionary::~UnigramDictionary() { - delete mCorrection; } -static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, +static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize, const int MAX_PROXIMITY_CHARS) { return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize; } -bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const { +// TODO: This needs to take an const unsigned short* and not tinker with its contents +static inline void addWord( + unsigned short *word, int length, int frequency, WordsPriorityQueue *queue) { + queue->push(frequency, word, length); +} + +bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codesSize) const { // There can't be a digraph if we don't have at least 2 characters to examine if (i + 2 > codesSize) return false; @@ -86,9 +91,10 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes // codesSrc is the current point in the user-input, original, content-unmodified buffer. // codesRemain is the remaining size in codesSrc. void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, - const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, - const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) { + const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, + const int codesBufferSize, const int flags, const int *codesSrc, + const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, + WordsPriorityQueuePool *queuePool) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { @@ -105,8 +111,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, - currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords, - frequencies); + currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, + queuePool); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. @@ -114,9 +120,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS, BYTES_IN_ONE_CHAR); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, - codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, - codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, - outWords, frequencies); + codesBuffer, codesBufferSize, flags, + codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1, + codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool); return; } } @@ -132,40 +138,39 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest, codesSrc, remainingBytes); getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies, - flags); + (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction, + queuePool); } -int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, +int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, + WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies) { + Correction* masterCorrection = correction; if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags) { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies); + codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool); } else { // Normal processing - getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, - outWords, frequencies, flags); + getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags, + masterCorrection, queuePool); } PROF_START(20); - // Get the word count - int suggestedWordsCount = 0; - while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) { - suggestedWordsCount++; - } + const int suggestedWordsCount = + queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { #ifdef FLAG_DBG - short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH; + short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; - LOGI("%s %i", s, mFrequencies[j]); + LOGI("%s %i", s, frequencies[j]); #endif } } @@ -175,23 +180,19 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x } void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies, const int flags) { + const int *xcoordinates, const int *ycoordinates, const int *codes, + const int inputLength, const int flags, Correction *correction, + WordsPriorityQueuePool *queuePool) { PROF_OPEN; PROF_START(0); - initSuggestions( - proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies); - if (DEBUG_DICT) assert(codesSize == mInputLength); - - const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); - mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth); + // Note: This line is intentionally left blank PROF_END(0); - const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags; - // TODO: remove PROF_START(1); - getSuggestionCandidates(useFullEditDistance); + const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags; + getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, + inputLength, correction, queuePool); PROF_END(1); PROF_START(2); @@ -209,12 +210,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(5); // Suggestions with missing space if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER - && mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) { - for (int i = 1; i < codesSize; ++i) { + && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) { + for (int i = 1; i < inputLength; ++i) { if (DEBUG_DICT) { LOGI("--- Suggest missing space characters %d", i); } - getMissingSpaceWords(mInputLength, i, mCorrection, useFullEditDistance); + getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, inputLength, i, correction, queuePool); } } PROF_END(5); @@ -222,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(6); if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) { // The first and last "mistyped spaces" are taken care of by excessive character handling - for (int i = 1; i < codesSize - 1; ++i) { + for (int i = 1; i < inputLength - 1; ++i) { if (DEBUG_DICT) { LOGI("--- Suggest words with proximity space %d", i); } @@ -233,7 +235,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, i, x, y, proximityInfo->hasSpaceProximity(x, y)); } if (proximityInfo->hasSpaceProximity(x, y)) { - getMistypedSpaceWords(mInputLength, i, mCorrection, useFullEditDistance); + getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, inputLength, i, correction, queuePool); } } } @@ -241,153 +244,118 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, - const int *yCoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies) { + const int *yCoordinates, const int *codes, const int inputLength, + WordsPriorityQueue *queue, Correction *correction) { if (DEBUG_DICT) { LOGI("initSuggest"); } - mFrequencies = frequencies; - mOutputChars = outWords; - mInputLength = codesSize; - proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); - mProximityInfo = proximityInfo; -} - -static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { - if (c < nextLettersSize) { - nextLetters[c]++; - } -} - -// TODO: We need to optimize addWord by using STL or something -// TODO: This needs to take an const unsigned short* and not tinker with its contents -bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { - word[length] = 0; - if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { -#ifdef FLAG_DBG - char s[length + 1]; - for (int i = 0; i <= length; i++) s[i] = word[i]; - LOGI("Found word = %s, freq = %d", s, frequency); -#endif - } - if (length > MAX_WORD_LENGTH) { - if (DEBUG_DICT) { - LOGI("Exceeded max word length."); - } - return false; - } - - // Find the right insertion point - int insertAt = 0; - while (insertAt < MAX_WORDS) { - // TODO: How should we sort words with the same frequency? - if (frequency > mFrequencies[insertAt]) { - break; - } - insertAt++; - } - if (insertAt < MAX_WORDS) { - if (DEBUG_DICT) { -#ifdef FLAG_DBG - char s[length + 1]; - for (int i = 0; i <= length; i++) s[i] = word[i]; - LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX); -#endif - } - memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), - (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), - (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); - mFrequencies[insertAt] = frequency; - memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), - (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short), - (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); - unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH; - while (length--) { - *dest++ = *word++; - } - *dest = 0; // NULL terminate - if (DEBUG_DICT) { - LOGI("Added word at %d", insertAt); - } - return true; + proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); + if (queue) { + queue->clear(); } - return false; + const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); + correction->initCorrection(proximityInfo, inputLength, maxDepth); } static const char QUOTE = '\''; static const char SPACE = ' '; -void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance) { +void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, + const int *xcoordinates, const int *ycoordinates, const int *codes, + const bool useFullEditDistance, const int inputLength, Correction *correction, + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + initSuggestions( + proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction); + getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue, + true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); +} + +void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, + const int inputLength, Correction *correction, WordsPriorityQueue *queue, + const bool doAutoCompletion, const int maxErrors) { // TODO: Remove setCorrectionParams - mCorrection->setCorrectionParams(0, 0, 0, - -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance); + correction->setCorrectionParams(0, 0, 0, + -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, + doAutoCompletion, maxErrors); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); int outputIndex = 0; - mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0)); + correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0)); // Depth first search while (outputIndex >= 0) { - if (mCorrection->initProcessState(outputIndex)) { - int siblingPos = mCorrection->getTreeSiblingPos(outputIndex); + if (correction->initProcessState(outputIndex)) { + int siblingPos = correction->getTreeSiblingPos(outputIndex); int firstChildPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - mCorrection, &childCount, &firstChildPos, &siblingPos); + correction, &childCount, &firstChildPos, &siblingPos, queue); // Update next sibling pos - mCorrection->setTreeSiblingPos(outputIndex, siblingPos); + correction->setTreeSiblingPos(outputIndex, siblingPos); if (needsToTraverseChildrenNodes) { // Goes to child node - outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos); + outputIndex = correction->goDownTree(outputIndex, childCount, firstChildPos); } } else { // Goes to parent sibling node - outputIndex = mCorrection->getTreeParentIndex(outputIndex); + outputIndex = correction->getTreeParentIndex(outputIndex); } } } -void UnigramDictionary::getMissingSpaceWords( +void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Correction *correction, - const bool useFullEditDistance) { - correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, - -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, - useFullEditDistance); - getSplitTwoWordsSuggestion(inputLength, correction); + WordsPriorityQueuePool* queuePool) { + getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */, + correction, queuePool); } -void UnigramDictionary::getMistypedSpaceWords( +void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Correction *correction, - const bool useFullEditDistance) { - correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, - -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, - useFullEditDistance); - getSplitTwoWordsSuggestion(inputLength, correction); -} - -inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, - const int inputIndex, const int skipPos, const int depth) { - const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(inputIndex); - // Skip the ' or other letter and continue deeper - return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; + WordsPriorityQueuePool* queuePool) { + getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos, + correction, queuePool); } -inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) { +inline void UnigramDictionary::onTerminal(const int freq, + const TerminalAttributes& terminalAttributes, Correction *correction, + WordsPriorityQueue *queue) { int wordLength; unsigned short* wordPointer; const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { - addWord(wordPointer, wordLength, finalFreq); + if (!terminalAttributes.isShortcutOnly()) { + addWord(wordPointer, wordLength, finalFreq, queue); + } + TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); + while (iterator.hasNextShortcutTarget()) { + // TODO: addWord only supports weak ordering, meaning we have no means to control the + // order of the shortcuts relative to one another or to the word. We need to either + // modulate the frequency of each shortcut according to its own shortcut frequency or + // to make the queue so that the insert order is protected inside the queue for words + // with the same score. + uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + const int shortcutTargetStringLength = iterator.getNextShortcutTarget( + MAX_WORD_LENGTH_INTERNAL, shortcutTarget); + addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue); + } } } -void UnigramDictionary::getSplitTwoWordsSuggestion( - const int inputLength, Correction* correction) { - const int spaceProximityPos = correction->getSpaceProximityPos(); - const int missingSpacePos = correction->getMissingSpacePos(); +void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, + const int *xcoordinates, const int *ycoordinates, const int *codes, + const bool useFullEditDistance, const int inputLength, const int missingSpacePos, + const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; @@ -408,9 +376,19 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( return; const int newWordLength = firstWordLength + secondWordLength + 1; + + + // Space proximity preparation + //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); + //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, + //correction); + //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false, + //MAX_ERRORS_FOR_TWO_WORDS); + // Allocating variable length array on stack unsigned short word[newWordLength]; - const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord); + const int firstFreq = getMostFrequentWordLike( + firstWordStartPos, firstWordLength, proximityInfo, mWord); if (DEBUG_DICT) { LOGI("First freq: %d", firstFreq); } @@ -420,7 +398,8 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( word[i] = mWord[i]; } - const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord); + const int secondFreq = getMostFrequentWordLike( + secondWordStartPos, secondWordLength, proximityInfo, mWord); if (DEBUG_DICT) { LOGI("Second freq: %d", secondFreq); } @@ -431,22 +410,29 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( word[i] = mWord[i - firstWordLength - 1]; } - const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq, word); + // TODO: Remove initSuggestions and correction->setCorrectionParams + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, + 0 /* do not clear queue */, correction); + + correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, + -1 /* transposedPos */, spaceProximityPos, missingSpacePos, + useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); + const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } - addWord(word, newWordLength, pairFreq); + addWord(word, newWordLength, pairFreq, masterQueue); return; } // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, - const int inputLength, unsigned short *word) { + const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) { uint16_t inWord[inputLength]; for (int i = 0; i < inputLength; ++i) { - inWord[i] = (uint16_t)mProximityInfo->getPrimaryCharAt(startInputIndex + i); + inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i); } return getMostFrequentWordLikeInner(inWord, inputLength, word); } @@ -470,8 +456,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); - int32_t baseChar = Dictionary::toBaseLowerCase(character); - const uint16_t wChar = Dictionary::toBaseLowerCase(inWord[startInputIndex]); + int32_t baseChar = toBaseLowerCase(character); + const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); if (baseChar != wChar) { *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; @@ -483,8 +469,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, if (hasMultipleChars) { character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); while (NOT_A_CHARACTER != character) { - baseChar = Dictionary::toBaseLowerCase(character); - if (Dictionary::toBaseLowerCase(inWord[++inputIndex]) != baseChar) { + baseChar = toBaseLowerCase(character); + if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) { *outPos = BinaryFormat::skipOtherCharacters(root, pos); *outInputIndex = startInputIndex; return false; @@ -597,7 +583,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, Correction *correction, int *newCount, - int *newChildrenPosition, int *nextSiblingPosition) { + int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) { if (DEBUG_DICT) { correction->checkState(); } @@ -648,7 +634,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL || stateType == Correction::ON_TERMINAL) { needsToInvokeOnTerminal = true; - } else if (stateType == Correction::UNRELATED) { + } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) { // We found that this is an unrelated character, so we should give up traversing // this node and its children entirely. // However we may not be on the last virtual node yet so we skip the remaining @@ -676,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); - onTerminal(freq, mCorrection); + TerminalAttributes terminalAttributes(DICT_ROOT, flags, + BinaryFormat::skipFrequency(flags, pos)); + onTerminal(freq, terminalAttributes, correction, queue); } // If there are more chars in this node, then this virtual node has children. diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index ef9709a89..23581425a 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -22,17 +22,14 @@ #include "correction_state.h" #include "defines.h" #include "proximity_info.h" - -#ifndef NULL -#define NULL 0 -#endif +#include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { +class TerminalAttributes; class UnigramDictionary { - -public: - + public: // Mask and flags for children address type selection. static const int MASK_GROUP_ADDRESS_TYPE = 0xC0; static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; @@ -46,8 +43,14 @@ public: // Flag for terminal groups static const int FLAG_IS_TERMINAL = 0x10; + // Flag for shortcut targets presence + static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; + // Flag for shortcut-only words. Some words are shortcut-only, which means they match when + // the user types them but they don't pop in the suggestion strip, only the words they are + // shortcuts for do. + static const int FLAG_IS_SHORTCUT_ONLY = 0x02; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes @@ -64,47 +67,63 @@ public: static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; + // Error tolerances + static const int DEFAULT_MAX_ERRORS = 2; + static const int MAX_ERRORS_FOR_TWO_WORDS = 1; + UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); bool isValidWord(const uint16_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, + Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); virtual ~UnigramDictionary(); -private: - + private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies, const int flags); - bool isDigraph(const int* codes, const int i, const int codesSize) const; + const int *ycoordinates, const int *codes, const int inputLength, + const int flags, Correction *correction, WordsPriorityQueuePool *queuePool); + bool isDigraph(const int *codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, - const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies); + const int codesBufferSize, const int flags, const int* codesSrc, + const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, + WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies); - void getSuggestionCandidates(const bool useFullEditDistance); - bool addWord(unsigned short *word, int length, int frequency); - void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); - void getMissingSpaceWords(const int inputLength, const int missingSpacePos, - Correction *correction, const bool useFullEditDistance); - void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos, - Correction *correction, const bool useFullEditDistance); - void onTerminal(const int freq, Correction *correction); + WordsPriorityQueue *queue, Correction *correction); + void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); + void getSuggestionCandidates( + const bool useFullEditDistance, const int inputLength, Correction *correction, + WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors); + void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, + const int *xcoordinates, const int *ycoordinates, const int *codes, + const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, + const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool); + void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputLength, const int missingSpacePos, Correction *correction, + WordsPriorityQueuePool* queuePool); + void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputLength, const int spaceProximityPos, Correction *correction, + WordsPriorityQueuePool* queuePool); + void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, + Correction *correction, WordsPriorityQueue *queue); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character - bool processCurrentNode(const int initialPos, - Correction *correction, int *newCount, - int *newChildPosition, int *nextSiblingPosition); + bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, + int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, - unsigned short *word); + ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, - short unsigned int* outWord); + short unsigned int *outWord); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; @@ -127,14 +146,8 @@ private: }; static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[]; - int *mFrequencies; - unsigned short *mOutputChars; - ProximityInfo *mProximityInfo; - Correction *mCorrection; - int mInputLength; - // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; - + // Still bundled members + unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h new file mode 100644 index 000000000..84f2523c2 --- /dev/null +++ b/native/src/words_priority_queue.h @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H +#define LATINIME_WORDS_PRIORITY_QUEUE_H + +#include <iostream> +#include <queue> +#include "defines.h" + +namespace latinime { + +class WordsPriorityQueue { + public: + class SuggestedWord { + public: + int mScore; + unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWordLength; + bool mUsed; + + void setParams(int score, unsigned short* word, int wordLength) { + mScore = score; + mWordLength = wordLength; + memcpy(mWord, word, sizeof(unsigned short) * wordLength); + mUsed = true; + } + }; + + WordsPriorityQueue(int maxWords, int maxWordLength) : + MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( + (unsigned int) maxWordLength) { + mSuggestedWords = new SuggestedWord[maxWordLength]; + for (int i = 0; i < maxWordLength; ++i) { + mSuggestedWords[i].mUsed = false; + } + } + ~WordsPriorityQueue() { + delete[] mSuggestedWords; + } + + void push(int score, unsigned short* word, int wordLength) { + SuggestedWord* sw = 0; + if (mSuggestions.size() >= MAX_WORDS) { + sw = mSuggestions.top(); + const int minScore = sw->mScore; + if (minScore >= score) { + return; + } else { + sw->mUsed = false; + mSuggestions.pop(); + } + } + if (sw == 0) { + sw = getFreeSuggestedWord(score, word, wordLength); + } else { + sw->setParams(score, word, wordLength); + } + if (sw == 0) { + LOGE("SuggestedWord is accidentally null."); + return; + } + if (DEBUG_WORDS_PRIORITY_QUEUE) { + LOGI("Push word. %d, %d", score, wordLength); + DUMP_WORD(word, wordLength); + } + mSuggestions.push(sw); + } + + SuggestedWord* topAndPop() { + if (mSuggestions.empty()) return 0; + SuggestedWord* sw = mSuggestions.top(); + mSuggestions.pop(); + return sw; + } + + int outputSuggestions(int *frequencies, unsigned short *outputChars) { + const unsigned int size = min(MAX_WORDS, mSuggestions.size()); + int index = size - 1; + while (!mSuggestions.empty() && index >= 0) { + SuggestedWord* sw = mSuggestions.top(); + if (DEBUG_WORDS_PRIORITY_QUEUE) { + LOGI("dump word. %d", sw->mScore); + DUMP_WORD(sw->mWord, sw->mWordLength); + } + const unsigned int wordLength = sw->mWordLength; + char* targetAdr = (char*) outputChars + + (index) * MAX_WORD_LENGTH * sizeof(short); + frequencies[index] = sw->mScore; + memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short)); + if (wordLength < MAX_WORD_LENGTH) { + ((unsigned short*) targetAdr)[wordLength] = 0; + } + sw->mUsed = false; + mSuggestions.pop(); + --index; + } + return size; + } + + int size() { + return mSuggestions.size(); + } + + void clear() { + while (!mSuggestions.empty()) { + SuggestedWord* sw = mSuggestions.top(); + if (DEBUG_WORDS_PRIORITY_QUEUE) { + LOGI("Clear word. %d", sw->mScore); + DUMP_WORD(sw->mWord, sw->mWordLength); + } + sw->mUsed = false; + mSuggestions.pop(); + } + } + + private: + struct wordComparator { + bool operator ()(SuggestedWord * left, SuggestedWord * right) { + return left->mScore > right->mScore; + } + }; + + SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + int wordLength) { + for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + if (!mSuggestedWords[i].mUsed) { + mSuggestedWords[i].setParams(score, word, wordLength); + return &mSuggestedWords[i]; + } + } + return 0; + } + + typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, + wordComparator> Suggestions; + Suggestions mSuggestions; + const unsigned int MAX_WORDS; + const unsigned int MAX_WORD_LENGTH; + SuggestedWord* mSuggestedWords; +}; +} + +#endif // LATINIME_WORDS_PRIORITY_QUEUE_H diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h new file mode 100644 index 000000000..386297650 --- /dev/null +++ b/native/src/words_priority_queue_pool.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H +#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H + +#include "words_priority_queue.h" + +namespace latinime { + +class WordsPriorityQueuePool { + public: + WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { + mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength); + mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + } + + ~WordsPriorityQueuePool() { + delete mMasterQueue; + } + + WordsPriorityQueue* getMasterQueue() { + return mMasterQueue; + } + // TODO: Come up with more generic pool + WordsPriorityQueue* getSubQueue1() { + return mSubQueue1; + } + WordsPriorityQueue* getSubQueue2() { + return mSubQueue2; + } + + private: + WordsPriorityQueue *mMasterQueue; + WordsPriorityQueue *mSubQueue1; + WordsPriorityQueue *mSubQueue2; +}; +} + +#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H |