aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp177
1 files changed, 101 insertions, 76 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8b1a25d90..597e5c821 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -224,14 +224,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
// Multiple word suggestions
if (SUGGEST_MULTIPLE_WORDS
&& inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
- for (int i = 1; i < inputLength; ++i) {
- if (DEBUG_DICT) {
- AKLOGI("--- Suggest multiple words %d", i);
- }
- getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, i, correction, queuePool,
- hasAutoCorrectionCandidate);
- }
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, correction, queuePool,
+ hasAutoCorrectionCandidate);
}
PROF_END(5);
@@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
int wordLength;
unsigned short* wordPointer;
- if ((currentWordIndex == 1) && addToMasterQueue) {
+ if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq != NOT_A_FREQUENCY) {
@@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion(
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
- if (DEBUG_DICT) {
- assert(currentWordIndex >= 1);
- }
unsigned short* tempOutputWord = 0;
- int tempOutputWordLength = 0;
+ int nextWordLength = 0;
// TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputLength, correction);
@@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion(
int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, proximityInfo, mWord);
if (freq > 0) {
- tempOutputWordLength = inputWordLength;
+ nextWordLength = inputWordLength;
tempOutputWord = mWord;
} else if (!hasAutoCorrectionCandidate) {
if (inputWordStartPos > 0) {
@@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion(
getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
if (DEBUG_DICT) {
- if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) {
+ if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
@@ -415,59 +407,122 @@ bool UnigramDictionary::getSubStringSuggestion(
int score = 0;
const double ns = queue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), inputWordLength,
- &tempOutputWord, &score, &tempOutputWordLength);
+ &tempOutputWord, &score, &nextWordLength);
if (DEBUG_DICT) {
AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
}
// Two words correction won't be done if the score of the first word doesn't exceed the
// threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
- || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
+ || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
return false;
}
- freq = score >> (tempOutputWordLength
- + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
+ freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
}
if (DEBUG_DICT) {
- AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
- , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
+ AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)"
+ , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
+ wordLengthArray[0]);
}
- if (freq <= 0 || tempOutputWordLength <= 0
- || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
+ if (freq <= 0 || nextWordLength <= 0
+ || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
return false;
}
- for (int i = 0; i < tempOutputWordLength; ++i) {
+ for (int i = 0; i < nextWordLength; ++i) {
outputWord[outputWordStartPos + i] = tempOutputWord[i];
}
// Put output values
- freqArray[currentWordIndex - 1] = freq;
+ freqArray[currentWordIndex] = freq;
// TODO: put output length instead of input length
- wordLengthArray[currentWordIndex - 1] = inputWordLength;
- *outputWordLength = outputWordStartPos + tempOutputWordLength;
+ wordLengthArray[currentWordIndex] = inputWordLength;
+ const int tempOutputWordLength = outputWordStartPos + nextWordLength;
+ if (outputWordLength) {
+ *outputWordLength = tempOutputWordLength;
+ }
if ((inputWordStartPos + inputWordLength) < inputLength) {
- if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
+ if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
return false;
}
outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
- ++*outputWordLength;
- } else if (currentWordIndex >= 2) {
+ if (outputWordLength) {
+ ++*outputWordLength;
+ }
+ } else if (currentWordIndex >= 1) {
// TODO: Handle 3 or more words
const int pairFreq = correction->getFreqForSplitTwoWords(
freqArray, wordLengthArray, isSpaceProximity, outputWord);
if (DEBUG_DICT) {
- AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
- inputLength);
+ AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq,
+ inputLength, wordLengthArray[0]);
}
- addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
+ addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
}
return true;
}
+void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength,
+ Correction *correction, WordsPriorityQueuePool* queuePool,
+ const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
+ const int outputWordLength, int *freqArray, int* wordLengthArray,
+ unsigned short* outputWord) {
+ if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
+ // Return if the last word index
+ return;
+ }
+ for (int i = 1; i < inputLength; ++i) {
+ int tempOutputWordLength = 0;
+ // First word
+ int inputWordStartPos = 0;
+ int inputWordLength = i;
+ if (DEBUG_CORRECTION_FREQ) {
+ AKLOGI("Two words, %d", inputWordLength);
+ }
+ if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
+ freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) {
+ continue;
+ }
+
+ // Second word
+ // Missing space
+ inputWordStartPos = i;
+ inputWordLength = inputLength - i;
+ getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
+ false /* missing space */, freqArray, wordLengthArray, outputWord,
+ 0);
+
+ // Mistyped space
+ ++inputWordStartPos;
+ --inputWordLength;
+
+ if (inputWordLength <= 0) {
+ continue;
+ }
+
+ const int x = xcoordinates[inputWordStartPos - 1];
+ const int y = ycoordinates[inputWordStartPos - 1];
+ if (!proximityInfo->hasSpaceProximity(x, y)) {
+ continue;
+ }
+
+ getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
+ true /* mistyped space */, freqArray, wordLengthArray, outputWord,
+ 0);
+ }
+}
+
void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
+ const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate) {
if (inputLength >= MAX_WORD_LENGTH) return;
@@ -475,51 +530,21 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
// MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
assert(MAX_PROXIMITY_CHARS == 16);
}
+ if (DEBUG_DICT) {
+ AKLOGI("--- Suggest multiple words");
+ }
// Allocating fixed length array on stack
unsigned short outputWord[MAX_WORD_LENGTH];
- int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
- int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
- int outputWordLength = 0;
-
- // First word
- int inputWordStartPos = 0;
- int inputWordLength = wordDivideIndex;
- if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
- freqArray, wordLengthArray, outputWord, &outputWordLength)) {
- return;
- }
-
- const int tempOutputWordLength = outputWordLength;
- // Second word
- // Missing space
- inputWordStartPos = wordDivideIndex;
- inputWordLength = inputLength - wordDivideIndex;
- getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
- false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
-
- // Mistyped space
- ++inputWordStartPos;
- --inputWordLength;
-
- if (inputWordLength <= 0) {
- return;
- }
-
- const int x = xcoordinates[inputWordStartPos - 1];
- const int y = ycoordinates[inputWordStartPos - 1];
- if (!proximityInfo->hasSpaceProximity(x, y)) {
- return;
- }
-
- getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
- true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
+ int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
+ int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
+ const int outputWordLength = 0;
+ const int startInputPos = 0;
+ const int startWordIndex = 0;
+ getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate,
+ startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
+ outputWord);
}
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous