diff options
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r-- | native/src/unigram_dictionary.cpp | 71 |
1 files changed, 42 insertions, 29 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index aa159b533..ef52ceb7c 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -272,6 +272,7 @@ static inline void registerNextLetter(unsigned short c, int *nextLetters, int ne } // TODO: We need to optimize addWord by using STL or something +// TODO: This needs to take an const unsigned short* and not tinker with its contents bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { word[length] = 0; if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { @@ -321,6 +322,16 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) return false; } +inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos, + int depth, int finalFreq) { + // TODO: actually add alternates when the format supports it. +} + +static inline bool hasAlternateSpellings(uint8_t flags) { + // TODO: when the format supports it, return the actual value. + return false; +} + static inline unsigned short toBaseLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; @@ -333,7 +344,7 @@ static inline unsigned short toBaseLowerCase(unsigned short c) { return c; } -bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) { +bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const { if (length != mInputLength) { return false; } @@ -656,28 +667,6 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int return finalFreq; } -inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength( - unsigned short *word, const int inputIndex, const int depth, const int matchWeight, - int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, - const int transposedPos, const int freq) { - const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos, - transposedPos, freq, false); - if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); - if (depth >= mInputLength && skipPos < 0) { - registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); - } -} - -inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength( - unsigned short *word, const int inputIndex, const int depth, const int matchWeight, - const int skipPos, const int excessivePos, const int transposedPos, const int freq) { - if (sameAsTyped(word, depth + 1)) return; - const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, - excessivePos, transposedPos, freq, true); - // Proximity collection will promote a word of the same length as what user typed. - if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); -} - inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth) { const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0]; @@ -708,7 +697,6 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex return false; } - // In the following function, c is the current character of the dictionary word // currently examined. // currentChars is an array containing the keys close to the character the @@ -751,6 +739,30 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId return UNRELATED_CHAR; } +inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth, + const uint8_t* const root, const uint8_t flags, int pos, + const int inputIndex, const int matchWeight, const int skipPos, + const int excessivePos, const int transposedPos, const int freq, const bool sameLength, + int* nextLetters, const int nextLettersSize) { + + const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false; + const bool hasAlternates = hasAlternateSpellings(flags); + if (isSameAsTyped && !hasAlternates) return; + + if (depth >= MIN_SUGGEST_DEPTH) { + const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, + excessivePos, transposedPos, freq, sameLength); + if (!isSameAsTyped) + addWord(word, depth + 1, finalFreq); + if (hasAlternates) + addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq); + } + + if (sameLength && depth >= mInputLength && skipPos < 0) { + registerNextLetter(word[mInputLength], nextLetters, nextLettersSize); + } +} + inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex, const int diffs, const int skipPos, const int excessivePos, const int transposedPos, @@ -770,6 +782,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth int freq; bool isSameAsUserTypedLength = false; + const uint8_t flags = 0; // No flags for now + if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos, @@ -782,9 +796,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { mWord[depth] = c; if (traverseAllNodes && terminal) { - onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth, - matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, - freq); + onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos, + excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize); } if (!needsToTraverseChildrenNodes) return false; *newTraverseAllNodes = traverseAllNodes; @@ -811,8 +824,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); if (isSameAsUserTypedLength && terminal) { - onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight, - skipPos, excessivePos, transposedPos, freq); + onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos, + excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize); } if (!needsToTraverseChildrenNodes) return false; // Start traversing all nodes after the index exceeds the user typed length |