aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp110
1 files changed, 73 insertions, 37 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8be95bc40..2c5b9402a 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -254,7 +254,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
ns += 0;
AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
- (ns > TWO_WORDS_CORRECTION_THRESHOLD));
+ (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
DUMP_WORD(word, wordLength);
}
@@ -343,43 +343,45 @@ inline void UnigramDictionary::onTerminal(const int freq,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
const int inputIndex = correction->getInputIndex();
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
- if (!addToMasterQueue && !addToSubQueue) {
- return;
- }
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
+
int wordLength;
unsigned short* wordPointer;
- const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
- if (finalFreq != NOT_A_FREQUENCY) {
- if (!terminalAttributes.isShortcutOnly()) {
- if (addToMasterQueue) {
+
+ if (addToMasterQueue) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
+ if (finalFreq != NOT_A_FREQUENCY) {
+ if (!terminalAttributes.isShortcutOnly()) {
addWord(wordPointer, wordLength, finalFreq, masterQueue);
}
- // TODO: Check the validity of "inputIndex == wordLength"
- //if (addToSubQueue && inputIndex == wordLength) {
- if (addToSubQueue) {
- addWord(wordPointer, wordLength, finalFreq, subQueue);
+
+ // Please note that the shortcut candidates will be added to the master queue only.
+ TerminalAttributes::ShortcutIterator iterator =
+ terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means
+ // to control the order of the shortcuts relative to one another or to the word.
+ // We need to either modulate the frequency of each shortcut according
+ // to its own shortcut frequency or to make the queue
+ // so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
}
}
- // Please note that the shortcut candidates will be added to the master queue only.
- if (!addToMasterQueue) {
- return;
- }
+ }
- // From here, below is the code to add shortcut candidates.
- TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
- while (iterator.hasNextShortcutTarget()) {
- // TODO: addWord only supports weak ordering, meaning we have no means to control the
- // order of the shortcuts relative to one another or to the word. We need to either
- // modulate the frequency of each shortcut according to its own shortcut frequency or
- // to make the queue so that the insert order is protected inside the queue for words
- // with the same score.
- uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
- const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
- MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
- addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
- }
+ // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
+ // or more length.
+ if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
+ // TODO: Check the validity of "inputIndex == wordLength"
+ //if (addToSubQueue && inputIndex == wordLength) {
+ WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
+ const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
+ inputIndex);
+ addWord(wordPointer, wordLength, finalFreq, subQueue);
}
}
@@ -397,20 +399,57 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
}
const bool isSpaceProximity = spaceProximityPos >= 0;
const int firstWordStartPos = 0;
+
+ const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord);
+ unsigned short* firstWord = 0;
+ int firstWordLength = 0;
+ if (firstFreq > 0) {
+ firstWordLength = firstTypedWordLength;
+ firstWord = mWord;
+ } else {
+ if (masterQueue->size() > 0) {
+ double nsForMaster = masterQueue->getHighestNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
+ if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) {
+ // Do nothing if the highest suggestion exceeds the threshold.
+ return;
+ }
+ }
+ WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength);
+ if (firstWordQueue->size() < 1) {
+ return;
+ }
+ int score = 0;
+ const double ns = firstWordQueue->getHighestNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score,
+ &firstWordLength);
+ // Two words correction won't be done if the score of the first word doesn't exceed the
+ // threshold.
+ if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) {
+ return;
+ }
+ firstFreq = score >> (firstWordLength
+ + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
+ }
+
+ if (firstFreq <= 0) {
+ return;
+ }
+
const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
- const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
const int secondWordLength = isSpaceProximity
? (inputLength - spaceProximityPos - 1)
: (inputLength - missingSpacePos);
if (inputLength >= MAX_WORD_LENGTH) return;
+
if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
|| firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
return;
const int newWordLength = firstWordLength + secondWordLength + 1;
-
// Space proximity preparation
//WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
//initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
@@ -420,15 +459,12 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
// Allocating variable length array on stack
unsigned short word[newWordLength];
- const int firstFreq = getMostFrequentWordLike(
- firstWordStartPos, firstWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
AKLOGI("First freq: %d", firstFreq);
}
- if (firstFreq <= 0) return;
for (int i = 0; i < firstWordLength; ++i) {
- word[i] = mWord[i];
+ word[i] = firstWord[i];
}
const int secondFreq = getMostFrequentWordLike(