diff options
author | 2012-03-27 19:56:23 +0900 | |
---|---|---|
committer | 2012-04-06 16:22:08 +0900 | |
commit | 9a933a742d2a3ffdfb955705ad086035bc27db60 (patch) | |
tree | 991c505bb2c4a3dff0f3704e36837d2f63628293 /native/jni/src/unigram_dictionary.cpp | |
parent | 7540fd009d47d7210f1bbbbae75582698be6f313 (diff) | |
download | latinime-9a933a742d2a3ffdfb955705ad086035bc27db60.tar.gz latinime-9a933a742d2a3ffdfb955705ad086035bc27db60.tar.xz latinime-9a933a742d2a3ffdfb955705ad086035bc27db60.zip |
Read shortcuts as strings in the dictionary.
This has no impact on performance.
Before:
(0) 9.61 (0.01%)
(1) 57514.58 (56.70%)
(2) 10.55 (0.01%)
(3) 10.79 (0.01%)
(4) 133.20 (0.13%)
(5) 43553.87 (42.94%)
(6) 10.03 (0.01%)
(20) 47.20 (0.05%)
Total 101431.47 (sum of others 101289.84)
After:
(0) 10.52 (0.01%)
(1) 56311.16 (56.66%)
(2) 13.40 (0.01%)
(3) 10.98 (0.01%)
(4) 136.72 (0.14%)
(5) 42707.92 (42.97%)
(6) 9.79 (0.01%)
(20) 51.35 (0.05%)
Total 99390.76 (sum of others 99251.84)
The difference is not significant with regard to measure imprecision
Change-Id: I2e4f1ef7a5e99082e67dd27f56cf4fc432bb48fa
Diffstat (limited to 'native/jni/src/unigram_dictionary.cpp')
-rw-r--r-- | native/jni/src/unigram_dictionary.cpp | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index ed4c066f3..50805ad87 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -366,10 +366,9 @@ inline void UnigramDictionary::onTerminal(const int freq, WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { - if (!terminalAttributes.isShortcutOnly()) { - addWord(wordPointer, wordLength, finalFreq, masterQueue); - } + addWord(wordPointer, wordLength, finalFreq, masterQueue); + const int shortcutFreq = finalFreq > 0 ? finalFreq - 1 : 0; // Please note that the shortcut candidates will be added to the master queue only. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); @@ -379,11 +378,12 @@ inline void UnigramDictionary::onTerminal(const int freq, // We need to either modulate the frequency of each shortcut according // to its own shortcut frequency or to make the queue // so that the insert order is protected inside the queue for words - // with the same score. + // with the same score. For the moment we use -1 to make sure the shortcut will + // never be in front of the word. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget); - addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue); + addWord(shortcutTarget, shortcutTargetStringLength, shortcutFreq, masterQueue); } } } |