diff options
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r-- | native/src/unigram_dictionary.cpp | 88 |
1 files changed, 55 insertions, 33 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index d0c903e81..e4edc5ab6 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -102,7 +102,8 @@ int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, return suggestedWordsCount; } -void UnigramDictionary::registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { +void UnigramDictionary::registerNextLetter( + unsigned short c, int *nextLetters, int nextLettersSize) { if (c < nextLettersSize) { nextLetters[c]++; } @@ -121,9 +122,8 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency) // Find the right insertion point int insertAt = 0; while (insertAt < MAX_WORDS) { - if (frequency > mFrequencies[insertAt] - || (mFrequencies[insertAt] == frequency - && length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) { + if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency + && length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) { break; } insertAt++; @@ -134,9 +134,9 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency) (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); mFrequencies[insertAt] = frequency; memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), - (char*) mOutputChars + (insertAt ) * MAX_WORD_LENGTH * sizeof(short), + (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short), (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); - unsigned short *dest = mOutputChars + (insertAt ) * MAX_WORD_LENGTH; + unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH; while (length--) { *dest++ = *word++; } @@ -177,8 +177,9 @@ UnigramDictionary::sameAsTyped(unsigned short *word, int length) return true; } -static char QUOTE = '\''; +static const char QUOTE = '\''; +// snr : frequency? void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) @@ -190,8 +191,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion if (diffs > mMaxEditDistance) { return; } + // get the count of nodes and increment pos. int count = Dictionary::getCount(DICT, &pos); int *currentChars = NULL; + // If inputIndex is greater than mInputLength, that means there are no proximity chars. if (mInputLength <= inputIndex) { completion = true; } else { @@ -205,8 +208,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion unsigned short lowerC = toLowerCase(c); bool terminal = Dictionary::getTerminal(DICT, &pos); int childrenAddress = Dictionary::getAddress(DICT, &pos); + const bool needsToContinue = childrenAddress != 0; // -- after address or flag int freq = 1; + // If terminal, increment pos if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); // -- after add or freq @@ -214,53 +219,70 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion if (completion) { mWord[depth] = c; if (terminal) { - addWord(mWord, depth + 1, freq * snr); - if (depth >= mInputLength && skipPos < 0) { - registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); - } + onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth, + snr, nextLetters, nextLettersSize, skipPos, freq); } - if (childrenAddress != 0) { - getWordsRec(childrenAddress, depth + 1, maxDepth, completion, snr, inputIndex, + if (needsToContinue) { + // No need to do proximity suggest any more. + getWordsRec(childrenAddress, depth + 1, maxDepth, true, snr, inputIndex, diffs, skipPos, nextLetters, nextLettersSize); } } else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) { // Skip the ' or other letter and continue deeper mWord[depth] = c; - if (childrenAddress != 0) { - getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs, - skipPos, nextLetters, nextLettersSize); + if (needsToContinue) { + getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, + diffs, skipPos, nextLetters, nextLettersSize); } } else { int j = 0; while (currentChars[j] > 0) { + // Move to child node if (currentChars[j] == lowerC || currentChars[j] == c) { - int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1; mWord[depth] = c; - if (mInputLength == inputIndex + 1) { + const int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1; + const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1; + // If inputIndex is greater than mInputLength, that means there is no + // proximity chars. So, we don't need to check proximity. + if (isSameAsUserTypedLength) { if (terminal) { - if (//INCLUDE_TYPED_WORD_IF_VALID || - !sameAsTyped(mWord, depth + 1)) { - int finalFreq = freq * snr * addedWeight; - if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER; - addWord(mWord, depth + 1, finalFreq); - } - } - if (childrenAddress != 0) { - getWordsRec(childrenAddress, depth + 1, - maxDepth, true, snr * addedWeight, inputIndex + 1, - diffs + (j > 0), skipPos, nextLetters, nextLettersSize); + onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr, + skipPos, freq, addedWeight); } - } else if (childrenAddress != 0) { + } + if (needsToContinue) { getWordsRec(childrenAddress, depth + 1, maxDepth, - false, snr * addedWeight, inputIndex + 1, diffs + (j > 0), - skipPos, nextLetters, nextLettersSize); + isSameAsUserTypedLength, snr * addedWeight, inputIndex + 1, + diffs + (j > 0), skipPos, nextLetters, nextLettersSize); } } - j++; + ++j; + // If skipPos is defined, not to search proximity collections. + // First char is what user typed. if (skipPos >= 0) break; } } } } +inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength( + unsigned short *word, const int inputLength, const int depth, const int snr, + int *nextLetters, const int nextLettersSize, const int skipPos, const int freq) { + addWord(word, depth + 1, freq * snr); + if (depth >= inputLength && skipPos < 0) { + registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); + } +} + +inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength( + unsigned short *word, const int depth, const int snr, const int skipPos, const int freq, + const int addedWeight) { + if (!sameAsTyped(word, depth + 1)) { + int finalFreq = freq * snr * addedWeight; + // Proximity collection will promote a word of the same length as + // what user typed. + if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER; + addWord(word, depth + 1, finalFreq); + } +} } // namespace latinime |