aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/src/correction.cpp11
-rw-r--r--native/src/defines.h6
-rw-r--r--native/src/unigram_dictionary.cpp177
-rw-r--r--native/src/unigram_dictionary.h9
-rw-r--r--native/src/words_priority_queue_pool.h39
5 files changed, 132 insertions, 110 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index ee5023532..7323747d7 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -827,11 +827,6 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
const bool capitalizedWordDemotion =
firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
- if (DEBUG_DICT_FULL) {
- AKLOGI("Two words: %c, %c, %d",
- word[0], word[firstWordLength + 1], capitalizedWordDemotion);
- }
-
if (firstWordLength == 0 || secondWordLength == 0) {
return 0;
}
@@ -891,6 +886,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
}
+ if (DEBUG_CORRECTION_FREQ) {
+ AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength,
+ secondWordLength, capitalizedWordDemotion, totalFreq);
+ DUMP_WORD(word, firstWordLength);
+ }
+
return totalFreq;
}
diff --git a/native/src/defines.h b/native/src/defines.h
index c25f963e0..3f3f5ba5c 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -216,15 +216,15 @@ static void prof_out(void) {
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10
#define SUB_QUEUE_MIN_WORD_LENGTH 4
-#define SUB_QUEUE_MAX_WORD_INDEX 2
+#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2
#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22
#define MAX_DEPTH_MULTIPLIER 3
-#define FIRST_WORD_INDEX 1
-#define SECOND_WORD_INDEX 2
+#define FIRST_WORD_INDEX 0
+#define SECOND_WORD_INDEX 1
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
// word in the dictionary
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8b1a25d90..597e5c821 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -224,14 +224,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
// Multiple word suggestions
if (SUGGEST_MULTIPLE_WORDS
&& inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
- for (int i = 1; i < inputLength; ++i) {
- if (DEBUG_DICT) {
- AKLOGI("--- Suggest multiple words %d", i);
- }
- getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputLength, i, correction, queuePool,
- hasAutoCorrectionCandidate);
- }
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, correction, queuePool,
+ hasAutoCorrectionCandidate);
}
PROF_END(5);
@@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
int wordLength;
unsigned short* wordPointer;
- if ((currentWordIndex == 1) && addToMasterQueue) {
+ if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq != NOT_A_FREQUENCY) {
@@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion(
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
- if (DEBUG_DICT) {
- assert(currentWordIndex >= 1);
- }
unsigned short* tempOutputWord = 0;
- int tempOutputWordLength = 0;
+ int nextWordLength = 0;
// TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputLength, correction);
@@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion(
int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, proximityInfo, mWord);
if (freq > 0) {
- tempOutputWordLength = inputWordLength;
+ nextWordLength = inputWordLength;
tempOutputWord = mWord;
} else if (!hasAutoCorrectionCandidate) {
if (inputWordStartPos > 0) {
@@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion(
getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
if (DEBUG_DICT) {
- if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) {
+ if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
@@ -415,59 +407,122 @@ bool UnigramDictionary::getSubStringSuggestion(
int score = 0;
const double ns = queue->getHighestNormalizedScore(
proximityInfo->getPrimaryInputWord(), inputWordLength,
- &tempOutputWord, &score, &tempOutputWordLength);
+ &tempOutputWord, &score, &nextWordLength);
if (DEBUG_DICT) {
AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
}
// Two words correction won't be done if the score of the first word doesn't exceed the
// threshold.
if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
- || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
+ || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
return false;
}
- freq = score >> (tempOutputWordLength
- + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
+ freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
}
if (DEBUG_DICT) {
- AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
- , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
+ AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)"
+ , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
+ wordLengthArray[0]);
}
- if (freq <= 0 || tempOutputWordLength <= 0
- || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
+ if (freq <= 0 || nextWordLength <= 0
+ || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
return false;
}
- for (int i = 0; i < tempOutputWordLength; ++i) {
+ for (int i = 0; i < nextWordLength; ++i) {
outputWord[outputWordStartPos + i] = tempOutputWord[i];
}
// Put output values
- freqArray[currentWordIndex - 1] = freq;
+ freqArray[currentWordIndex] = freq;
// TODO: put output length instead of input length
- wordLengthArray[currentWordIndex - 1] = inputWordLength;
- *outputWordLength = outputWordStartPos + tempOutputWordLength;
+ wordLengthArray[currentWordIndex] = inputWordLength;
+ const int tempOutputWordLength = outputWordStartPos + nextWordLength;
+ if (outputWordLength) {
+ *outputWordLength = tempOutputWordLength;
+ }
if ((inputWordStartPos + inputWordLength) < inputLength) {
- if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
+ if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
return false;
}
outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
- ++*outputWordLength;
- } else if (currentWordIndex >= 2) {
+ if (outputWordLength) {
+ ++*outputWordLength;
+ }
+ } else if (currentWordIndex >= 1) {
// TODO: Handle 3 or more words
const int pairFreq = correction->getFreqForSplitTwoWords(
freqArray, wordLengthArray, isSpaceProximity, outputWord);
if (DEBUG_DICT) {
- AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
- inputLength);
+ AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq,
+ inputLength, wordLengthArray[0]);
}
- addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
+ addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
}
return true;
}
+void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength,
+ Correction *correction, WordsPriorityQueuePool* queuePool,
+ const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
+ const int outputWordLength, int *freqArray, int* wordLengthArray,
+ unsigned short* outputWord) {
+ if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
+ // Return if the last word index
+ return;
+ }
+ for (int i = 1; i < inputLength; ++i) {
+ int tempOutputWordLength = 0;
+ // First word
+ int inputWordStartPos = 0;
+ int inputWordLength = i;
+ if (DEBUG_CORRECTION_FREQ) {
+ AKLOGI("Two words, %d", inputWordLength);
+ }
+ if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
+ freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) {
+ continue;
+ }
+
+ // Second word
+ // Missing space
+ inputWordStartPos = i;
+ inputWordLength = inputLength - i;
+ getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
+ false /* missing space */, freqArray, wordLengthArray, outputWord,
+ 0);
+
+ // Mistyped space
+ ++inputWordStartPos;
+ --inputWordLength;
+
+ if (inputWordLength <= 0) {
+ continue;
+ }
+
+ const int x = xcoordinates[inputWordStartPos - 1];
+ const int y = ycoordinates[inputWordStartPos - 1];
+ if (!proximityInfo->hasSpaceProximity(x, y)) {
+ continue;
+ }
+
+ getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+ SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
+ true /* mistyped space */, freqArray, wordLengthArray, outputWord,
+ 0);
+ }
+}
+
void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
+ const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate) {
if (inputLength >= MAX_WORD_LENGTH) return;
@@ -475,51 +530,21 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
// MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
assert(MAX_PROXIMITY_CHARS == 16);
}
+ if (DEBUG_DICT) {
+ AKLOGI("--- Suggest multiple words");
+ }
// Allocating fixed length array on stack
unsigned short outputWord[MAX_WORD_LENGTH];
- int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
- int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
- int outputWordLength = 0;
-
- // First word
- int inputWordStartPos = 0;
- int inputWordLength = wordDivideIndex;
- if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
- freqArray, wordLengthArray, outputWord, &outputWordLength)) {
- return;
- }
-
- const int tempOutputWordLength = outputWordLength;
- // Second word
- // Missing space
- inputWordStartPos = wordDivideIndex;
- inputWordLength = inputLength - wordDivideIndex;
- getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
- false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
-
- // Mistyped space
- ++inputWordStartPos;
- --inputWordLength;
-
- if (inputWordLength <= 0) {
- return;
- }
-
- const int x = xcoordinates[inputWordStartPos - 1];
- const int y = ycoordinates[inputWordStartPos - 1];
- if (!proximityInfo->hasSpaceProximity(x, y)) {
- return;
- }
-
- getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
- SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
- true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
+ int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
+ int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
+ const int outputWordLength = 0;
+ const int startInputPos = 0;
+ const int startWordIndex = 0;
+ getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate,
+ startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
+ outputWord);
}
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 79793d676..2d5d076b1 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -103,7 +103,7 @@ class UnigramDictionary {
const int currentWordIndex);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
+ const bool useFullEditDistance, const int inputLength,
Correction *correction, WordsPriorityQueuePool* queuePool,
const bool hasAutoCorrectionCandidate);
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
@@ -127,6 +127,13 @@ class UnigramDictionary {
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);
+ void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength,
+ Correction *correction, WordsPriorityQueuePool* queuePool,
+ const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
+ const int outputWordLength, int *freqArray, int* wordLengthArray,
+ unsigned short* outputWord);
const uint8_t* const DICT_ROOT;
const int MAX_WORD_LENGTH;
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
index a4aa8b6ca..5b50e8f4f 100644
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@@ -27,11 +27,10 @@ class WordsPriorityQueuePool {
public:
WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
- for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT;
+ for (int i = 0, subQueueBufOffset = 0;
+ i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
- mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset)
- WordsPriorityQueue(subQueueMaxWords, maxWordLength);
- mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset)
+ mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
WordsPriorityQueue(subQueueMaxWords, maxWordLength);
}
}
@@ -44,7 +43,7 @@ class WordsPriorityQueuePool {
}
WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) {
- if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) {
+ if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
return 0;
}
if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) {
@@ -53,30 +52,21 @@ class WordsPriorityQueuePool {
}
return 0;
}
- // TODO: Come up with more generic pool
- if (wordIndex == 1) {
- return mSubQueues1[inputWordLength];
- } else if (wordIndex == 2) {
- return mSubQueues2[inputWordLength];
- } else {
- return 0;
- }
+ return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength];
}
inline void clearAll() {
mMasterQueue->clear();
- for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- mSubQueues1[i]->clear();
- mSubQueues2[i]->clear();
+ for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) {
+ clearSubQueue(i);
}
}
inline void clearSubQueue(const int wordIndex) {
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- if (wordIndex == 1) {
- mSubQueues1[i]->clear();
- } else if (wordIndex == 2) {
- mSubQueues2[i]->clear();
+ WordsPriorityQueue* queue = getSubQueue(wordIndex, i);
+ if (queue) {
+ queue->clear();
}
}
}
@@ -84,17 +74,16 @@ class WordsPriorityQueuePool {
void dumpSubQueue1TopSuggestions() {
AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- mSubQueues1[i]->dumpTopWord();
+ getSubQueue(0, i)->dumpTopWord();
}
}
private:
WordsPriorityQueue* mMasterQueue;
- WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];
- WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT];
+ WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
- char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
- char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+ char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
+ * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
};
}