aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp269
1 files changed, 88 insertions, 181 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index a7eb4e10d..fd6f14af8 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -260,7 +260,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) {
queuePool->dumpSubQueue1TopSuggestions();
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- WordsPriorityQueue* queue = queuePool->getSubQueue1(i);
+ WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i);
if (queue->size() > 0) {
WordsPriorityQueue::SuggestedWord* sw = queue->top();
const int score = sw->mScore;
@@ -395,11 +395,8 @@ inline void UnigramDictionary::onTerminal(const int freq,
// or more length.
if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
WordsPriorityQueue *subQueue;
- if (currentWordIndex == 1) {
- subQueue = queuePool->getSubQueue1(inputIndex);
- } else if (currentWordIndex == 2) {
- subQueue = queuePool->getSubQueue2(inputIndex);
- } else {
+ subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex);
+ if (!subQueue) {
return;
}
const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
@@ -408,213 +405,123 @@ inline void UnigramDictionary::onTerminal(const int freq,
}
}
-void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
- const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
- const bool hasAutoCorrectionCandidate) {
- if (inputLength >= MAX_WORD_LENGTH) return;
- if (DEBUG_DICT) {
- int inputCount = 0;
- if (spaceProximityPos >= 0) ++inputCount;
- if (missingSpacePos >= 0) ++inputCount;
- assert(inputCount <= 1);
- // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
- assert(MAX_PROXIMITY_CHARS == 16);
- }
-
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- inputLength, correction);
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- const bool isSpaceProximity = spaceProximityPos >= 0;
-
- // First word
- const int firstInputWordStartPos = 0;
- const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
- int firstFreq = getMostFrequentWordLike(
- firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord);
- unsigned short* firstOutputWord = 0;
- int firstOutputWordLength = 0;
- if (firstFreq > 0) {
- firstOutputWordLength = firstInputWordLength;
- firstOutputWord = mWord;
+int UnigramDictionary::getSubStringSuggestion(
+ ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
+ const int *codes, const bool useFullEditDistance, Correction *correction,
+ WordsPriorityQueuePool* queuePool, const int inputLength,
+ const bool hasAutoCorrectionCandidate, const int currentWordIndex,
+ const int inputWordStartPos, const int inputWordLength,
+ const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) {
+ unsigned short* tempOutputWord = 0;
+ int tempOutputWordLength = 0;
+ int freq = getMostFrequentWordLike(
+ inputWordStartPos, inputWordLength, proximityInfo, mWord);
+ if (freq > 0) {
+ tempOutputWordLength = inputWordLength;
+ tempOutputWord = mWord;
} else if (!hasAutoCorrectionCandidate) {
- WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength);
- if (!firstWordQueue || firstWordQueue->size() < 1) {
- return;
- }
- int score = 0;
- const double ns = firstWordQueue->getHighestNormalizedScore(
- proximityInfo->getPrimaryInputWord(), firstInputWordLength,
- &firstOutputWord, &score, &firstOutputWordLength);
- if (DEBUG_DICT) {
- AKLOGI("NS1 = %f, Score = %d", ns, score);
- }
- // Two words correction won't be done if the score of the first word doesn't exceed the
- // threshold.
- if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
- || firstOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
- return;
- }
- firstFreq = score >> (firstOutputWordLength
- + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
- }
-
- if (DEBUG_DICT) {
- AKLOGI("First freq: %d", firstFreq);
- }
-
- if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) {
- return;
- }
-
- // Allocating fixed length array on stack
- unsigned short outputWord[MAX_WORD_LENGTH];
- int outputWordLength = 0;
-
- for (int i = 0; i < firstOutputWordLength; ++i) {
- outputWord[i] = firstOutputWord[i];
- }
-
- outputWord[firstOutputWordLength] = SPACE;
- outputWordLength = firstOutputWordLength + 1;
-
- // Second word
- const int secondInputWordLength = isSpaceProximity
- ? (inputLength - spaceProximityPos - 1)
- : (inputLength - missingSpacePos);
- const int secondInputWordStartPos =
- isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
- int secondFreq = getMostFrequentWordLike(
- secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord);
- unsigned short* secondOutputWord = 0;
- int secondOutputWordLength = 0;
-
- if (secondFreq > 0) {
- secondOutputWordLength = secondInputWordLength;
- secondOutputWord = mWord;
- } else if (!hasAutoCorrectionCandidate) {
- const int offset = secondInputWordStartPos;
- initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
- codes + offset * MAX_PROXIMITY_CHARS, secondInputWordLength, correction);
- queuePool->clearSubQueue2();
- getSuggestionCandidates(useFullEditDistance, secondInputWordLength, correction,
- queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, SECOND_WORD_INDEX);
- if (DEBUG_DICT) {
- AKLOGI("Dump second word candidates %d", secondInputWordLength);
- for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- queuePool->getSubQueue2(i)->dumpTopWord();
+ if (inputWordStartPos > 0) {
+ const int offset = inputWordStartPos;
+ initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
+ codes + offset * MAX_PROXIMITY_CHARS, inputWordLength, correction);
+ queuePool->clearSubQueue(currentWordIndex);
+ getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
+ queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
+ if (DEBUG_DICT) {
+ if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) {
+ AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
+ }
+ }
}
}
- WordsPriorityQueue* secondWordQueue = queuePool->getSubQueue2(secondInputWordLength);
- if (!secondWordQueue || secondWordQueue->size() < 1) {
- return;
+ WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
+ if (!queue || queue->size() < 1) {
+ return 0;
}
int score = 0;
- const double ns = secondWordQueue->getHighestNormalizedScore(
- proximityInfo->getPrimaryInputWord(), secondInputWordLength,
- &secondOutputWord, &score, &secondOutputWordLength);
+ const double ns = queue->getHighestNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), inputWordLength,
+ &tempOutputWord, &score, &tempOutputWordLength);
if (DEBUG_DICT) {
- AKLOGI("NS2 = %f, Score = %d", ns, score);
+ AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
}
// Two words correction won't be done if the score of the first word doesn't exceed the
// threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
- || secondOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
- return;
+ || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
+ return 0;
}
- secondFreq = score >> (secondOutputWordLength
+ freq = score >> (tempOutputWordLength
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
}
-
if (DEBUG_DICT) {
- DUMP_WORD(secondOutputWord, secondOutputWordLength);
- AKLOGI("Second freq: %d", secondFreq);
+ AKLOGI("Freq(%d): %d", currentWordIndex, freq);
}
-
- if (secondFreq <= 0 || secondOutputWordLength <= 0
- || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) {
- return;
+ if (freq <= 0 || tempOutputWordLength <= 0
+ || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
+ return 0;
}
-
- for (int i = 0; i < secondOutputWordLength; ++i) {
- outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i];
+ for (int i = 0; i < tempOutputWordLength; ++i) {
+ outputWord[outputWordStartPos + i] = tempOutputWord[i];
}
-
- outputWordLength += secondOutputWordLength;
-
- // TODO: Remove initSuggestions and correction->setCorrectionParams
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
-
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
- useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
- const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
- if (DEBUG_DICT) {
- AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ if ((inputWordStartPos + inputWordLength) < inputLength) {
+ if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
+ return 0;
+ }
+ outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
+ ++tempOutputWordLength;
}
- addWord(outputWord, outputWordLength, pairFreq, masterQueue);
- return;
+ *outputWordLength = outputWordStartPos + tempOutputWordLength;
+ return freq;
}
-void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
- const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
-
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
+ const bool hasAutoCorrectionCandidate) {
+ if (inputLength >= MAX_WORD_LENGTH) return;
if (DEBUG_DICT) {
int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount;
if (missingSpacePos >= 0) ++inputCount;
assert(inputCount <= 1);
+ // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
+ assert(MAX_PROXIMITY_CHARS == 16);
}
- const bool isSpaceProximity = spaceProximityPos >= 0;
- const int firstWordStartPos = 0;
- const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
- const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
- const int secondWordLength = isSpaceProximity
- ? (inputLength - spaceProximityPos - 1)
- : (inputLength - missingSpacePos);
-
- if (inputLength >= MAX_WORD_LENGTH) return;
- if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
- || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
- return;
-
- const int newWordLength = firstWordLength + secondWordLength + 1;
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ inputLength, correction);
- // Space proximity preparation
- //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
- //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
- //correction);
- //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
- //MAX_ERRORS_FOR_TWO_WORDS);
-
- // Allocating variable length array on stack
- unsigned short word[newWordLength];
- const int firstFreq = getMostFrequentWordLike(
- firstWordStartPos, firstWordLength, proximityInfo, mWord);
- if (DEBUG_DICT) {
- AKLOGI("First freq: %d", firstFreq);
- }
- if (firstFreq <= 0) return;
+ // Allocating fixed length array on stack
+ unsigned short outputWord[MAX_WORD_LENGTH];
+ int outputWordLength = 0;
- for (int i = 0; i < firstWordLength; ++i) {
- word[i] = mWord[i];
- }
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ const bool isSpaceProximity = spaceProximityPos >= 0;
- const int secondFreq = getMostFrequentWordLike(
- secondWordStartPos, secondWordLength, proximityInfo, mWord);
- if (DEBUG_DICT) {
- AKLOGI("Second freq: %d", secondFreq);
+ // First word
+ int inputWordStartPos = 0;
+ int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength);
+ if (firstFreq <= 0) {
+ return;
}
- if (secondFreq <= 0) return;
- word[firstWordLength] = SPACE;
- for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
- word[i] = mWord[i - firstWordLength - 1];
+ // Second word
+ inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
+ inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord,
+ &outputWordLength);
+ if (secondFreq <= 0) {
+ return;
}
// TODO: Remove initSuggestions and correction->setCorrectionParams
@@ -623,11 +530,11 @@ void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityI
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
- const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
if (DEBUG_DICT) {
AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
}
- addWord(word, newWordLength, pairFreq, masterQueue);
+ addWord(outputWord, outputWordLength, pairFreq, masterQueue);
return;
}