diff options
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r-- | native/src/unigram_dictionary.cpp | 269 |
1 files changed, 88 insertions, 181 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index a7eb4e10d..fd6f14af8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -260,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = queuePool->getSubQueue1(i); + WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord* sw = queue->top(); const int score = sw->mScore; @@ -395,11 +395,8 @@ inline void UnigramDictionary::onTerminal(const int freq, // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { WordsPriorityQueue *subQueue; - if (currentWordIndex == 1) { - subQueue = queuePool->getSubQueue1(inputIndex); - } else if (currentWordIndex == 2) { - subQueue = queuePool->getSubQueue2(inputIndex); - } else { + subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex); + if (!subQueue) { return; } const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, @@ -408,213 +405,123 @@ inline void UnigramDictionary::onTerminal(const int freq, } } -void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, const int missingSpacePos, - const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool, - const bool hasAutoCorrectionCandidate) { - if (inputLength >= MAX_WORD_LENGTH) return; - if (DEBUG_DICT) { - int inputCount = 0; - if (spaceProximityPos >= 0) ++inputCount; - if (missingSpacePos >= 0) ++inputCount; - assert(inputCount <= 1); - // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 - assert(MAX_PROXIMITY_CHARS == 16); - } - - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, - inputLength, correction); - WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - const bool isSpaceProximity = spaceProximityPos >= 0; - - // First word - const int firstInputWordStartPos = 0; - const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - int firstFreq = getMostFrequentWordLike( - firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord); - unsigned short* firstOutputWord = 0; - int firstOutputWordLength = 0; - if (firstFreq > 0) { - firstOutputWordLength = firstInputWordLength; - firstOutputWord = mWord; +int UnigramDictionary::getSubStringSuggestion( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, + const int *codes, const bool useFullEditDistance, Correction *correction, + WordsPriorityQueuePool* queuePool, const int inputLength, + const bool hasAutoCorrectionCandidate, const int currentWordIndex, + const int inputWordStartPos, const int inputWordLength, + const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) { + unsigned short* tempOutputWord = 0; + int tempOutputWordLength = 0; + int freq = getMostFrequentWordLike( + inputWordStartPos, inputWordLength, proximityInfo, mWord); + if (freq > 0) { + tempOutputWordLength = inputWordLength; + tempOutputWord = mWord; } else if (!hasAutoCorrectionCandidate) { - WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); - if (!firstWordQueue || firstWordQueue->size() < 1) { - return; - } - int score = 0; - const double ns = firstWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), firstInputWordLength, - &firstOutputWord, &score, &firstOutputWordLength); - if (DEBUG_DICT) { - AKLOGI("NS1 = %f, Score = %d", ns, score); - } - // Two words correction won't be done if the score of the first word doesn't exceed the - // threshold. - if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD - || firstOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { - return; - } - firstFreq = score >> (firstOutputWordLength - + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); - } - - if (DEBUG_DICT) { - AKLOGI("First freq: %d", firstFreq); - } - - if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) { - return; - } - - // Allocating fixed length array on stack - unsigned short outputWord[MAX_WORD_LENGTH]; - int outputWordLength = 0; - - for (int i = 0; i < firstOutputWordLength; ++i) { - outputWord[i] = firstOutputWord[i]; - } - - outputWord[firstOutputWordLength] = SPACE; - outputWordLength = firstOutputWordLength + 1; - - // Second word - const int secondInputWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) - : (inputLength - missingSpacePos); - const int secondInputWordStartPos = - isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; - int secondFreq = getMostFrequentWordLike( - secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord); - unsigned short* secondOutputWord = 0; - int secondOutputWordLength = 0; - - if (secondFreq > 0) { - secondOutputWordLength = secondInputWordLength; - secondOutputWord = mWord; - } else if (!hasAutoCorrectionCandidate) { - const int offset = secondInputWordStartPos; - initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], - codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction); - queuePool->clearSubQueue2(); - getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction, - queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX); - if (DEBUG_DICT) { - AKLOGI("Dump second word candidates %d", secondInputWordLength); - for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - queuePool->getSubQueue2(i)->dumpTopWord(); + if (inputWordStartPos > 0) { + const int offset = inputWordStartPos; + initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], + codes + offset * MAX_PROXIMITY_CHARS, inputWordLength, correction); + queuePool->clearSubQueue(currentWordIndex); + getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, + queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); + if (DEBUG_DICT) { + if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) { + AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); + for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { + queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord(); + } + } } } - WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength); - if (!secondWordQueue || secondWordQueue->size() < 1) { - return; + WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); + if (!queue || queue->size() < 1) { + return 0; } int score = 0; - const double ns = secondWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), secondInputWordLength, - &secondOutputWord, &score, &secondOutputWordLength); + const double ns = queue->getHighestNormalizedScore( + proximityInfo->getPrimaryInputWord(), inputWordLength, + &tempOutputWord, &score, &tempOutputWordLength); if (DEBUG_DICT) { - AKLOGI("NS2 = %f, Score = %d", ns, score); + AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); } // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD - || secondOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { - return; + || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { + return 0; } - secondFreq = score >> (secondOutputWordLength + freq = score >> (tempOutputWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } - if (DEBUG_DICT) { - DUMP_WORD(secondOutputWord, secondOutputWordLength); - AKLOGI("Second freq: %d", secondFreq); + AKLOGI("Freq(%d): %d", currentWordIndex, freq); } - - if (secondFreq <= 0 || secondOutputWordLength <= 0 - || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) { - return; + if (freq <= 0 || tempOutputWordLength <= 0 + || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) { + return 0; } - - for (int i = 0; i < secondOutputWordLength; ++i) { - outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i]; + for (int i = 0; i < tempOutputWordLength; ++i) { + outputWord[outputWordStartPos + i] = tempOutputWord[i]; } - - outputWordLength += secondOutputWordLength; - - // TODO: Remove initSuggestions and correction->setCorrectionParams - initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - - correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, - -1 /* transposedPos */, spaceProximityPos, missingSpacePos, - useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); - const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord); - if (DEBUG_DICT) { - AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); + if ((inputWordStartPos + inputWordLength) < inputLength) { + if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) { + return 0; + } + outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; + ++tempOutputWordLength; } - addWord(outputWord, outputWordLength, pairFreq, masterQueue); - return; + *outputWordLength = outputWordStartPos + tempOutputWordLength; + return freq; } -void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo, +void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, - const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { - WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - + const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool, + const bool hasAutoCorrectionCandidate) { + if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; if (missingSpacePos >= 0) ++inputCount; assert(inputCount <= 1); + // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 + assert(MAX_PROXIMITY_CHARS == 16); } - const bool isSpaceProximity = spaceProximityPos >= 0; - const int firstWordStartPos = 0; - const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; - const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - const int secondWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) - : (inputLength - missingSpacePos); - - if (inputLength >= MAX_WORD_LENGTH) return; - if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos - || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) - return; - - const int newWordLength = firstWordLength + secondWordLength + 1; + initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, + inputLength, correction); - // Space proximity preparation - //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); - //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, - //correction); - //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false, - //MAX_ERRORS_FOR_TWO_WORDS); - - // Allocating variable length array on stack - unsigned short word[newWordLength]; - const int firstFreq = getMostFrequentWordLike( - firstWordStartPos, firstWordLength, proximityInfo, mWord); - if (DEBUG_DICT) { - AKLOGI("First freq: %d", firstFreq); - } - if (firstFreq <= 0) return; + // Allocating fixed length array on stack + unsigned short outputWord[MAX_WORD_LENGTH]; + int outputWordLength = 0; - for (int i = 0; i < firstWordLength; ++i) { - word[i] = mWord[i]; - } + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + const bool isSpaceProximity = spaceProximityPos >= 0; - const int secondFreq = getMostFrequentWordLike( - secondWordStartPos, secondWordLength, proximityInfo, mWord); - if (DEBUG_DICT) { - AKLOGI("Second freq: %d", secondFreq); + // First word + int inputWordStartPos = 0; + int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; + const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength); + if (firstFreq <= 0) { + return; } - if (secondFreq <= 0) return; - word[firstWordLength] = SPACE; - for (int i = (firstWordLength + 1); i < newWordLength; ++i) { - word[i] = mWord[i - firstWordLength - 1]; + // Second word + inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; + inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) + : (inputLength - missingSpacePos); + const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, + SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord, + &outputWordLength); + if (secondFreq <= 0) { + return; } // TODO: Remove initSuggestions and correction->setCorrectionParams @@ -623,11 +530,11 @@ void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityI correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); - const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); + const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord); if (DEBUG_DICT) { AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } - addWord(word, newWordLength, pairFreq, masterQueue); + addWord(outputWord, outputWordLength, pairFreq, masterQueue); return; } |