diff options
Diffstat (limited to 'native')
-rw-r--r-- | native/src/correction.cpp | 105 | ||||
-rw-r--r-- | native/src/correction.h | 5 | ||||
-rw-r--r-- | native/src/defines.h | 4 | ||||
-rw-r--r-- | native/src/dictionary.cpp | 5 | ||||
-rw-r--r-- | native/src/dictionary.h | 6 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 61 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 17 | ||||
-rw-r--r-- | native/src/words_priority_queue.h | 58 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 53 |
9 files changed, 204 insertions, 110 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 22ee75a24..2da82dc3d 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -32,48 +32,6 @@ namespace latinime { // edit distance funcitons // ///////////////////////////// -#if 0 /* no longer used */ -inline static int editDistance( - int* editDistanceTable, const unsigned short* input, - const int inputLength, const unsigned short* output, const int outputLength) { - // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int* dp = editDistanceTable; - const int li = inputLength + 1; - const int lo = outputLength + 1; - for (int i = 0; i < li; ++i) { - dp[lo * i] = i; - } - for (int i = 0; i < lo; ++i) { - dp[i] = i; - } - - for (int i = 0; i < li - 1; ++i) { - for (int j = 0; j < lo - 1; ++j) { - const uint32_t ci = toBaseLowerCase(input[i]); - const uint32_t co = toBaseLowerCase(output[j]); - const uint16_t cost = (ci == co) ? 0 : 1; - dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, - min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); - if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1]) - && co == toBaseLowerCase(input[i - 1])) { - dp[(i + 1) * lo + (j + 1)] = min( - dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); - } - } - } - - if (DEBUG_EDIT_DISTANCE) { - LOGI("IN = %d, OUT = %d", inputLength, outputLength); - for (int i = 0; i < li; ++i) { - for (int j = 0; j < lo; ++j) { - LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); - } - } - } - return dp[li * lo - 1]; -} -#endif - inline static void initEditDistance(int *editDistanceTable) { for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { editDistanceTable[i] = i; @@ -145,7 +103,7 @@ void Correction::initCorrectionState( void Correction::setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, const int spaceProximityPos, const int missingSpacePos, - const bool useFullEditDistance) { + const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) { // TODO: remove mTransposedPos = transposedPos; mExcessivePos = excessivePos; @@ -158,6 +116,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos, mSpaceProximityPos = spaceProximityPos; mMissingSpacePos = missingSpacePos; mUseFullEditDistance = useFullEditDistance; + mDoAutoCompletion = doAutoCompletion; + mMaxErrors = maxErrors; } void Correction::checkState() { @@ -279,7 +239,9 @@ void Correction::startToTraverseAllNodes() { bool Correction::needsToPrune() const { // TODO: use edit distance here - return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance; + return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance + // Allow one char longer word for missing character + || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength)); } void Correction::addCharToCurrentWord(const int32_t c) { @@ -311,12 +273,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) { Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); + if (correctionCount > mMaxErrors) { + return UNRELATED; + } + // TODO: Change the limit if we'll allow two or more corrections const bool noCorrectionsHappenedSoFar = correctionCount == 0; const bool canTryCorrection = noCorrectionsHappenedSoFar; int proximityIndex = 0; mDistances[mOutputIndex] = NOT_A_DISTANCE; + // Skip checking this node if (mNeedsToTraverseAllNodes || isQuote(c)) { bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { @@ -341,6 +308,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( return processSkipChar(c, isTerminal, incremented); } + // Check possible corrections. if (mExcessivePos >= 0) { if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { mExcessivePos = mOutputIndex; @@ -391,7 +359,12 @@ Correction::CorrectionType Correction::processCharAndCalcState( } // TODO: Change the limit if we'll allow two or more proximity chars with corrections - const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0; + // Work around: When the mMaxErrors is 1, we only allow just one error + // including proximity correction. + const bool checkProximityChars = (mMaxErrors > 1) + ? (noCorrectionsHappenedSoFar || mProximityCount == 0) + : (noCorrectionsHappenedSoFar && mProximityCount == 0); + ProximityInfo::ProximityType matchedProximityCharId = secondTransposing ? ProximityInfo::EQUIVALENT_CHAR : mProximityInfo->getMatchedProximityId( @@ -931,4 +904,46 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( return totalFreq; } +#if 0 /* no longer used. keep just for reference */ +inline static int editDistance( + int* editDistanceTable, const unsigned short* input, + const int inputLength, const unsigned short* output, const int outputLength) { + // dp[li][lo] dp[a][b] = dp[ a * lo + b] + int* dp = editDistanceTable; + const int li = inputLength + 1; + const int lo = outputLength + 1; + for (int i = 0; i < li; ++i) { + dp[lo * i] = i; + } + for (int i = 0; i < lo; ++i) { + dp[i] = i; + } + + for (int i = 0; i < li - 1; ++i) { + for (int j = 0; j < lo - 1; ++j) { + const uint32_t ci = toBaseLowerCase(input[i]); + const uint32_t co = toBaseLowerCase(output[j]); + const uint16_t cost = (ci == co) ? 0 : 1; + dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, + min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); + if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1]) + && co == toBaseLowerCase(input[i - 1])) { + dp[(i + 1) * lo + (j + 1)] = min( + dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); + } + } + } + + if (DEBUG_EDIT_DISTANCE) { + LOGI("IN = %d, OUT = %d", inputLength, outputLength); + for (int i = 0; i < li; ++i) { + for (int j = 0; j < lo; ++j) { + LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); + } + } + } + return dp[li * lo - 1]; +} +#endif + } // namespace latinime diff --git a/native/src/correction.h b/native/src/correction.h index d4e99f0ce..e55be8dd6 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -44,7 +44,8 @@ public: // TODO: remove void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance); + const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, + const bool doAutoCompletion, const int maxErrors); void checkState(); bool initProcessState(const int index); @@ -109,6 +110,7 @@ private: const ProximityInfo *mProximityInfo; bool mUseFullEditDistance; + bool mDoAutoCompletion; int mMaxEditDistance; int mMaxDepth; int mInputLength; @@ -116,6 +118,7 @@ private: int mMissingSpacePos; int mTerminalInputIndex; int mTerminalOutputIndex; + int mMaxErrors; // The following arrays are state buffer. unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; diff --git a/native/src/defines.h b/native/src/defines.h index ea643bf07..e1d2ca351 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -202,6 +202,10 @@ static void dumpWord(const unsigned short* word, const int length) { // This is only used for the size of array. Not to be used in c functions. #define MAX_WORD_LENGTH_INTERNAL 48 +// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used +// for better performance. +#define SUB_QUEUE_MAX_WORDS 5 + #define MAX_DEPTH_MULTIPLIER 3 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index 55358ec81..e3673d425 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -39,7 +39,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, } } mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); - mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength); + mWordsPriorityQueuePool = new WordsPriorityQueuePool( + maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength); mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives, @@ -48,7 +49,7 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, Dictionary::~Dictionary() { delete mCorrection; - delete mWordsPriorityQueue; + delete mWordsPriorityQueuePool; delete mUnigramDictionary; delete mBigramDictionary; } diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 2a6e4e0d5..52048ecca 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -23,7 +23,7 @@ #include "defines.h" #include "proximity_info.h" #include "unigram_dictionary.h" -#include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -34,7 +34,7 @@ public: int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) { - return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueue, + return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, mCorrection, xcoordinates, ycoordinates, codes, codesSize, flags, outWords, frequencies); } @@ -81,7 +81,7 @@ private: const bool IS_LATEST_DICT_VERSION; UnigramDictionary *mUnigramDictionary; BigramDictionary *mBigramDictionary; - WordsPriorityQueue *mWordsPriorityQueue; + WordsPriorityQueuePool *mWordsPriorityQueuePool; Correction *mCorrection; }; diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index a2c1f72a1..0db33e7d8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -93,7 +93,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, - WordsPriorityQueue *queue) { + WordsPriorityQueuePool *queuePool) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { @@ -110,7 +110,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, - currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, queue); + currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction, + queuePool); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. @@ -120,7 +121,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1, - codesDest + i * MAX_PROXIMITY_CHARS, correction, queue); + codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool); return; } } @@ -137,27 +138,28 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction, - queue); + queuePool); } -int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue, - Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int codesSize, const int flags, unsigned short *outWords, int *frequencies) { +int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, + WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int codesSize, const int flags, + unsigned short *outWords, int *frequencies) { - WordsPriorityQueue* masterQueue = queue; Correction* masterCorrection = correction; if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags) { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, masterQueue); + codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags, - masterCorrection, masterQueue); + masterCorrection, queuePool); } PROF_START(20); - const int suggestedWordsCount = masterQueue->outputSuggestions(frequencies, outWords); + const int suggestedWordsCount = + queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); @@ -178,11 +180,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorit void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputLength, const int flags, Correction *correction, WordsPriorityQueue *queue) { + const int inputLength, const int flags, Correction *correction, + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); PROF_OPEN; PROF_START(0); - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, queue); + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue); if (DEBUG_DICT) assert(codesSize == inputLength); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); @@ -192,7 +196,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags; // TODO: remove PROF_START(1); - getSuggestionCandidates(useFullEditDistance, inputLength, correction, queue); + getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue); PROF_END(1); PROF_START(2); @@ -216,7 +220,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, LOGI("--- Suggest missing space characters %d", i); } getMissingSpaceWords( - inputLength, i, proximityInfo, correction, useFullEditDistance, queue); + inputLength, i, proximityInfo, correction, useFullEditDistance, queuePool); } } PROF_END(5); @@ -235,8 +239,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, i, x, y, proximityInfo->hasSpaceProximity(x, y)); } if (proximityInfo->hasSpaceProximity(x, y)) { - getMistypedSpaceWords( - inputLength, i, proximityInfo, correction, useFullEditDistance, queue); + getMistypedSpaceWords(inputLength, i, proximityInfo, correction, + useFullEditDistance, queuePool); } } } @@ -260,7 +264,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, - -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance); + -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, + true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); @@ -292,20 +297,20 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getMissingSpaceWords( const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo, - Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { + Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, - useFullEditDistance); - getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); + useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); + getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool); } void UnigramDictionary::getMistypedSpaceWords( const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo, - Correction *correction, const bool useFullEditDistance, WordsPriorityQueue *queue) { + Correction *correction, const bool useFullEditDistance, WordsPriorityQueuePool *queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, - useFullEditDistance); - getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queue); + useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); + getSplitTwoWordsSuggestion(inputLength, proximityInfo, correction, queuePool); } inline void UnigramDictionary::onTerminal( @@ -320,7 +325,9 @@ inline void UnigramDictionary::onTerminal( void UnigramDictionary::getSplitTwoWordsSuggestion( const int inputLength, ProximityInfo *proximityInfo, Correction *correction, - WordsPriorityQueue *queue) { + WordsPriorityQueuePool *queuePool) { + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + const int spaceProximityPos = correction->getSpaceProximityPos(); const int missingSpacePos = correction->getMissingSpacePos(); if (DEBUG_DICT) { @@ -372,7 +379,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } - addWord(word, newWordLength, pairFreq, queue); + addWord(word, newWordLength, pairFreq, masterQueue); return; } @@ -585,7 +592,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL || stateType == Correction::ON_TERMINAL) { needsToInvokeOnTerminal = true; - } else if (stateType == Correction::UNRELATED) { + } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) { // We found that this is an unrelated character, so we should give up traversing // this node and its children entirely. // However we may not be on the last virtual node yet so we skip the remaining diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 0b0126524..ce15cdd8f 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -23,6 +23,7 @@ #include "defines.h" #include "proximity_info.h" #include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { @@ -61,12 +62,16 @@ public: static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; + // Error tolerances + static const int DEFAULT_MAX_ERRORS = 2; + static const int MAX_ERRORS_FOR_TWO_WORDS = 1; + UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); bool isValidWord(const uint16_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueue *queue, + int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); @@ -76,13 +81,13 @@ private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, - const int flags, Correction *correction, WordsPriorityQueue *queue); + const int flags, Correction *correction, WordsPriorityQueuePool *queuePool); bool isDigraph(const int *codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, - WordsPriorityQueue* queue); + WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue); @@ -90,13 +95,13 @@ private: const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue); void getSplitTwoWordsSuggestion(const int inputLength, ProximityInfo *proximityInfo, - Correction *correction, WordsPriorityQueue *queue); + Correction *correction, WordsPriorityQueuePool *queuePool); void getMissingSpaceWords(const int inputLength, const int missingSpacePos, ProximityInfo *proximityInfo, Correction *correction, - const bool useFullEditDistance, WordsPriorityQueue *queue); + const bool useFullEditDistance, WordsPriorityQueuePool *queuePool); void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos, ProximityInfo *proximityInfo, Correction *correction, - const bool useFullEditDistance, WordsPriorityQueue *queue); + const bool useFullEditDistance, WordsPriorityQueuePool *queuePool); void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h index 366b1b67a..a4175d3e0 100644 --- a/native/src/words_priority_queue.h +++ b/native/src/words_priority_queue.h @@ -24,7 +24,7 @@ namespace latinime { class WordsPriorityQueue { -private: +public: class SuggestedWord { public: int mScore; @@ -40,31 +40,6 @@ private: } }; - struct wordComparator { - bool operator ()(SuggestedWord * left, SuggestedWord * right) { - return left->mScore > right->mScore; - } - }; - - SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, - int wordLength) { - for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { - if (!mSuggestedWords[i].mUsed) { - mSuggestedWords[i].setParams(score, word, wordLength); - return &mSuggestedWords[i]; - } - } - return 0; - } - - typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, - wordComparator> Suggestions; - Suggestions mSuggestions; - const unsigned int MAX_WORDS; - const unsigned int MAX_WORD_LENGTH; - SuggestedWord* mSuggestedWords; - -public: WordsPriorityQueue(int maxWords, int maxWordLength) : MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( (unsigned int) maxWordLength) { @@ -105,6 +80,13 @@ public: mSuggestions.push(sw); } + SuggestedWord* topAndPop() { + if (mSuggestions.empty()) return 0; + SuggestedWord* sw = mSuggestions.top(); + mSuggestions.pop(); + return sw; + } + int outputSuggestions(int *frequencies, unsigned short *outputChars) { const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; @@ -140,6 +122,30 @@ public: mSuggestions.pop(); } } +private: + struct wordComparator { + bool operator ()(SuggestedWord * left, SuggestedWord * right) { + return left->mScore > right->mScore; + } + }; + + SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + int wordLength) { + for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + if (!mSuggestedWords[i].mUsed) { + mSuggestedWords[i].setParams(score, word, wordLength); + return &mSuggestedWords[i]; + } + } + return 0; + } + + typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, + wordComparator> Suggestions; + Suggestions mSuggestions; + const unsigned int MAX_WORDS; + const unsigned int MAX_WORD_LENGTH; + SuggestedWord* mSuggestedWords; }; } diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h new file mode 100644 index 000000000..d964bfc3b --- /dev/null +++ b/native/src/words_priority_queue_pool.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H +#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H + +#include "words_priority_queue.h" + +namespace latinime { + +class WordsPriorityQueuePool { +public: + WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { + mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength); + mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); + } + + ~WordsPriorityQueuePool() { + delete mMasterQueue; + } + + WordsPriorityQueue* getMasterQueue() { + return mMasterQueue; + } + // TODO: Come up with more generic pool + WordsPriorityQueue* getSubQueue1() { + return mSubQueue1; + } + WordsPriorityQueue* getSubQueue2() { + return mSubQueue2; + } +private: + WordsPriorityQueue *mMasterQueue; + WordsPriorityQueue *mSubQueue1; + WordsPriorityQueue *mSubQueue2; +}; +} + +#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H |