aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp26
1 files changed, 22 insertions, 4 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 7c3c35e40..e95e03ce5 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -25,6 +25,7 @@
#include "unigram_dictionary.h"
#include "binary_format.h"
+#include "terminal_attributes.h"
namespace latinime {
@@ -324,13 +325,28 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
correction, queuePool);
}
-inline void UnigramDictionary::onTerminal(
- const int freq, Correction *correction, WordsPriorityQueue *queue) {
+inline void UnigramDictionary::onTerminal(const int freq,
+ const TerminalAttributes& terminalAttributes, Correction *correction,
+ WordsPriorityQueue *queue) {
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) {
- addWord(wordPointer, wordLength, finalFreq, queue);
+ if (!terminalAttributes.isShortcutOnly()) {
+ addWord(wordPointer, wordLength, finalFreq, queue);
+ }
+ TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means to control the
+ // order of the shortcuts relative to one another or to the word. We need to either
+ // modulate the frequency of each shortcut according to its own shortcut frequency or
+ // to make the queue so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ }
}
}
@@ -646,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(freq, correction, queue);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags,
+ BinaryFormat::skipFrequency(flags, pos));
+ onTerminal(freq, terminalAttributes, correction, queue);
}
// If there are more chars in this node, then this virtual node has children.