aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp158
1 files changed, 71 insertions, 87 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index fd6f14af8..8b1a25d90 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -211,7 +211,6 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_END(3);
PROF_START(4);
- // Note: This line is intentionally left blank
bool hasAutoCorrectionCandidate = false;
WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
if (masterQueue->size() > 0) {
@@ -222,14 +221,14 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_END(4);
PROF_START(5);
- // Suggestions with missing space
- if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
- && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
+ // Multiple word suggestions
+ if (SUGGEST_MULTIPLE_WORDS
+ && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
for (int i = 1; i < inputLength; ++i) {
if (DEBUG_DICT) {
- AKLOGI("--- Suggest missing space characters %d", i);
+ AKLOGI("--- Suggest multiple words %d", i);
}
- getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, inputLength, i, correction, queuePool,
hasAutoCorrectionCandidate);
}
@@ -237,26 +236,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_END(5);
PROF_START(6);
- if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
- // The first and last "mistyped spaces" are taken care of by excessive character handling
- for (int i = 1; i < inputLength - 1; ++i) {
- if (DEBUG_DICT) {
- AKLOGI("--- Suggest words with proximity space %d", i);
- }
- const int x = xcoordinates[i];
- const int y = ycoordinates[i];
- if (DEBUG_PROXIMITY_INFO) {
- AKLOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
- i, x, y, proximityInfo->hasSpaceProximity(x, y));
- }
- if (proximityInfo->hasSpaceProximity(x, y)) {
- getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, i, correction, queuePool,
- hasAutoCorrectionCandidate);
- }
- }
- }
+ // Note: This line is intentionally left blank
PROF_END(6);
+
if (DEBUG_DICT) {
queuePool->dumpSubQueue1TopSuggestions();
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
@@ -337,24 +319,6 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
}
}
-void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const bool useFullEditDistance,
- const int inputLength, const int missingSpacePos, Correction *correction,
- WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
- getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
- correction, queuePool, hasAutoCorrectionCandidate);
-}
-
-void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const bool useFullEditDistance,
- const int inputLength, const int spaceProximityPos, Correction *correction,
- WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
- getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
- correction, queuePool, hasAutoCorrectionCandidate);
-}
-
inline void UnigramDictionary::onTerminal(const int freq,
const TerminalAttributes& terminalAttributes, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
@@ -405,15 +369,23 @@ inline void UnigramDictionary::onTerminal(const int freq,
}
}
-int UnigramDictionary::getSubStringSuggestion(
+bool UnigramDictionary::getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool* queuePool, const int inputLength,
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
- const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) {
+ const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
+ int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
+ if (DEBUG_DICT) {
+ assert(currentWordIndex >= 1);
+ }
unsigned short* tempOutputWord = 0;
int tempOutputWordLength = 0;
+ // TODO: Optimize init suggestion
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ inputLength, correction);
+
int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, proximityInfo, mWord);
if (freq > 0) {
@@ -438,7 +410,7 @@ int UnigramDictionary::getSubStringSuggestion(
}
WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
if (!queue || queue->size() < 1) {
- return 0;
+ return false;
}
int score = 0;
const double ns = queue->getHighestNormalizedScore(
@@ -451,91 +423,103 @@ int UnigramDictionary::getSubStringSuggestion(
// threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
|| tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
- return 0;
+ return false;
}
freq = score >> (tempOutputWordLength
+ TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
}
if (DEBUG_DICT) {
- AKLOGI("Freq(%d): %d", currentWordIndex, freq);
+ AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
+ , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
}
if (freq <= 0 || tempOutputWordLength <= 0
|| MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
- return 0;
+ return false;
}
for (int i = 0; i < tempOutputWordLength; ++i) {
outputWord[outputWordStartPos + i] = tempOutputWord[i];
}
+
+ // Put output values
+ freqArray[currentWordIndex - 1] = freq;
+ // TODO: put output length instead of input length
+ wordLengthArray[currentWordIndex - 1] = inputWordLength;
+ *outputWordLength = outputWordStartPos + tempOutputWordLength;
+
if ((inputWordStartPos + inputWordLength) < inputLength) {
if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
- return 0;
+ return false;
}
outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
- ++tempOutputWordLength;
+ ++*outputWordLength;
+ } else if (currentWordIndex >= 2) {
+ // TODO: Handle 3 or more words
+ const int pairFreq = correction->getFreqForSplitTwoWords(
+ freqArray, wordLengthArray, isSpaceProximity, outputWord);
+ if (DEBUG_DICT) {
+ AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
+ inputLength);
+ }
+ addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
}
- *outputWordLength = outputWordStartPos + tempOutputWordLength;
- return freq;
+ return true;
}
void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
- const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
+ const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
+ Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate) {
if (inputLength >= MAX_WORD_LENGTH) return;
if (DEBUG_DICT) {
- int inputCount = 0;
- if (spaceProximityPos >= 0) ++inputCount;
- if (missingSpacePos >= 0) ++inputCount;
- assert(inputCount <= 1);
// MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
assert(MAX_PROXIMITY_CHARS == 16);
}
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- inputLength, correction);
-
// Allocating fixed length array on stack
unsigned short outputWord[MAX_WORD_LENGTH];
+ int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
+ int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
int outputWordLength = 0;
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- const bool isSpaceProximity = spaceProximityPos >= 0;
-
// First word
int inputWordStartPos = 0;
- int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
- const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ int inputWordLength = wordDivideIndex;
+ if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength);
- if (firstFreq <= 0) {
+ FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
+ freqArray, wordLengthArray, outputWord, &outputWordLength)) {
return;
}
+ const int tempOutputWordLength = outputWordLength;
// Second word
- inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
- inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
- : (inputLength - missingSpacePos);
- const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ // Missing space
+ inputWordStartPos = wordDivideIndex;
+ inputWordLength = inputLength - wordDivideIndex;
+ getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord,
- &outputWordLength);
- if (secondFreq <= 0) {
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
+ false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
+
+ // Mistyped space
+ ++inputWordStartPos;
+ --inputWordLength;
+
+ if (inputWordLength <= 0) {
return;
}
- // TODO: Remove initSuggestions and correction->setCorrectionParams
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
-
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
- useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
- const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
- if (DEBUG_DICT) {
- AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ const int x = xcoordinates[inputWordStartPos - 1];
+ const int y = ycoordinates[inputWordStartPos - 1];
+ if (!proximityInfo->hasSpaceProximity(x, y)) {
+ return;
}
- addWord(outputWord, outputWordLength, pairFreq, masterQueue);
- return;
+
+ getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
+ true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
}
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous