diff options
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/defines.h | 1 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 205 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 7 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 37 |
4 files changed, 118 insertions, 132 deletions
diff --git a/native/src/defines.h b/native/src/defines.h index 9c2d08777..7e171acfd 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -217,6 +217,7 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 +#define SUB_QUEUE_MAX_WORD_INDEX 2 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 6a8973761..fd6f14af8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -260,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = queuePool->getSubQueue1(i); + WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord* sw = queue->top(); const int score = sw->mScore; @@ -395,11 +395,8 @@ inline void UnigramDictionary::onTerminal(const int freq, // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { WordsPriorityQueue *subQueue; - if (currentWordIndex == 1) { - subQueue = queuePool->getSubQueue1(inputIndex); - } else if (currentWordIndex == 2) { - subQueue = queuePool->getSubQueue2(inputIndex); - } else { + subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex); + if (!subQueue) { return; } const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, @@ -408,6 +405,78 @@ inline void UnigramDictionary::onTerminal(const int freq, } } +int UnigramDictionary::getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, Correction *correction, + WordsPriorityQueuePool* queuePool, const int inputLength, + const bool hasAutoCorrectionCandidate, const int currentWordIndex, + const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) { + unsigned short* tempOutputWord = 0; + int tempOutputWordLength = 0; + int freq = getMostFrequentWordLike( + inputWordStartPos, inputWordLength, proximityInfo, mWord); + if (freq > 0) { + tempOutputWordLength = inputWordLength; + tempOutputWord = mWord; + } else if (!hasAutoCorrectionCandidate) { + if (inputWordStartPos > 0) { + const int offset = inputWordStartPos; + initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], + codes + offset * MAX_PROXIMITY_CHARS, inputWordLength, correction); + queuePool->clearSubQueue(currentWordIndex); + getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, + queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); + if (DEBUG_DICT) { + if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) { + AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); + for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { + queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord(); + } + } + } + } + WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); + if (!queue || queue->size() < 1) { + return 0; + } + int score = 0; + const double ns = queue->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), inputWordLength, + &tempOutputWord, &score, &tempOutputWordLength); + if (DEBUG_DICT) { + AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); + } + // Two words correction won't be done if the score of the first word doesn't exceed the + // threshold. + if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD + || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { + return 0; + } + freq = score >> (tempOutputWordLength + + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); + } + if (DEBUG_DICT) { + AKLOGI("Freq(%d): %d", currentWordIndex, freq); + } + if (freq <= 0 || tempOutputWordLength <= 0 + || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) { + return 0; + } + for (int i = 0; i < tempOutputWordLength; ++i) { + outputWord[outputWordStartPos + i] = tempOutputWord[i]; + } + if ((inputWordStartPos + inputWordLength) < inputLength) { + if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) { + return 0; + } + outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; + ++tempOutputWordLength; + } + *outputWordLength = outputWordStartPos + tempOutputWordLength; + return freq; +} + void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, @@ -425,124 +494,36 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - const bool isSpaceProximity = spaceProximityPos >= 0; - - // First word - const int firstInputWordStartPos = 0; - const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - int firstFreq = getMostFrequentWordLike( - firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord); - unsigned short* firstOutputWord = 0; - int firstOutputWordLength = 0; - if (firstFreq > 0) { - firstOutputWordLength = firstInputWordLength; - firstOutputWord = mWord; - } else if (!hasAutoCorrectionCandidate) { - WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); - if (!firstWordQueue || firstWordQueue->size() < 1) { - return; - } - int score = 0; - const double ns = firstWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), firstInputWordLength, - &firstOutputWord, &score, &firstOutputWordLength); - if (DEBUG_DICT) { - AKLOGI("NS1 = %f, Score = %d", ns, score); - } - // Two words correction won't be done if the score of the first word doesn't exceed the - // threshold. - if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD - || firstOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { - return; - } - firstFreq = score >> (firstOutputWordLength - + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); - } - - if (DEBUG_DICT) { - AKLOGI("First freq: %d", firstFreq); - } - - if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) { - return; - } // Allocating fixed length array on stack unsigned short outputWord[MAX_WORD_LENGTH]; int outputWordLength = 0; - for (int i = 0; i < firstOutputWordLength; ++i) { - outputWord[i] = firstOutputWord[i]; - } + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + const bool isSpaceProximity = spaceProximityPos >= 0; - outputWord[firstOutputWordLength] = SPACE; - outputWordLength = firstOutputWordLength + 1; + // First word + int inputWordStartPos = 0; + int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; + const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength); + if (firstFreq <= 0) { + return; + } // Second word - const int secondInputWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) + inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; + inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); - const int secondInputWordStartPos = - isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; - int secondFreq = getMostFrequentWordLike( - secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord); - unsigned short* secondOutputWord = 0; - int secondOutputWordLength = 0; - - if (secondFreq > 0) { - secondOutputWordLength = secondInputWordLength; - secondOutputWord = mWord; - } else if (!hasAutoCorrectionCandidate) { - const int offset = secondInputWordStartPos; - initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], - codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction); - queuePool->clearSubQueue2(); - getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction, - queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX); - if (DEBUG_DICT) { - AKLOGI("Dump second word candidates %d", secondInputWordLength); - for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - queuePool->getSubQueue2(i)->dumpTopWord(); - } - } - WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength); - if (!secondWordQueue || secondWordQueue->size() < 1) { - return; - } - int score = 0; - const double ns = secondWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), secondInputWordLength, - &secondOutputWord, &score, &secondOutputWordLength); - if (DEBUG_DICT) { - AKLOGI("NS2 = %f, Score = %d", ns, score); - } - // Two words correction won't be done if the score of the first word doesn't exceed the - // threshold. - if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD - || secondOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { - return; - } - secondFreq = score >> (secondOutputWordLength - + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); - } - - if (DEBUG_DICT) { - DUMP_WORD(secondOutputWord, secondOutputWordLength); - AKLOGI("Second freq: %d", secondFreq); - } - - if (secondFreq <= 0 || secondOutputWordLength <= 0 - || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) { + const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord, + &outputWordLength); + if (secondFreq <= 0) { return; } - for (int i = 0; i < secondOutputWordLength; ++i) { - outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i]; - } - - outputWordLength += secondOutputWordLength; - // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 0b8271954..0f50ccbd8 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -127,6 +127,13 @@ class UnigramDictionary { ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, short unsigned int *outWord); + int getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, Correction *correction, + WordsPriorityQueuePool* queuePool, const int inputLength, + const bool hasAutoCorrectionCandidate, const int currentWordIndex, + const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h index 599b89711..a4aa8b6ca 100644 --- a/native/src/words_priority_queue_pool.h +++ b/native/src/words_priority_queue_pool.h @@ -43,25 +43,24 @@ class WordsPriorityQueuePool { return mMasterQueue; } - // TODO: Come up with more generic pool - WordsPriorityQueue* getSubQueue1(const int id) { - if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { - if (DEBUG_WORDS_PRIORITY_QUEUE) { - assert(false); - } + WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { + if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) { return 0; } - return mSubQueues1[id]; - } - - WordsPriorityQueue* getSubQueue2(const int id) { - if (id < 0 || id >= SUB_QUEUE_MAX_COUNT) { + if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) { if (DEBUG_WORDS_PRIORITY_QUEUE) { assert(false); } return 0; } - return mSubQueues2[id]; + // TODO: Come up with more generic pool + if (wordIndex == 1) { + return mSubQueues1[inputWordLength]; + } else if (wordIndex == 2) { + return mSubQueues2[inputWordLength]; + } else { + return 0; + } } inline void clearAll() { @@ -72,15 +71,13 @@ class WordsPriorityQueuePool { } } - inline void clearSubQueue1() { + inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - mSubQueues1[i]->clear(); - } - } - - inline void clearSubQueue2() { - for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - mSubQueues2[i]->clear(); + if (wordIndex == 1) { + mSubQueues1[i]->clear(); + } else if (wordIndex == 2) { + mSubQueues2[i]->clear(); + } } } |