diff options
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/bigram_dictionary.cpp | 3 | ||||
-rw-r--r-- | native/src/bigram_dictionary.h | 5 | ||||
-rw-r--r-- | native/src/char_utils.h | 2 | ||||
-rw-r--r-- | native/src/debug.h | 3 | ||||
-rw-r--r-- | native/src/defines.h | 2 | ||||
-rw-r--r-- | native/src/dictionary.cpp | 41 | ||||
-rw-r--r-- | native/src/dictionary.h | 9 | ||||
-rw-r--r-- | native/src/proximity_info.cpp | 4 | ||||
-rw-r--r-- | native/src/proximity_info.h | 4 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 617 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 42 |
11 files changed, 462 insertions, 270 deletions
diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp index 36761b88d..11e6dc250 100644 --- a/native/src/bigram_dictionary.cpp +++ b/native/src/bigram_dictionary.cpp @@ -111,8 +111,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i mMaxBigrams = maxBigrams; if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) { - int pos = mParentDictionary->isValidWordRec( - DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); + int pos = mParentDictionary->getBigramPosition(prevWord, prevWordLength); if (DEBUG_DICT) { LOGI("Pos -> %d", pos); } diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h index d658b93e6..c07458a38 100644 --- a/native/src/bigram_dictionary.h +++ b/native/src/bigram_dictionary.h @@ -50,6 +50,7 @@ private: int *mInputCodes; int mInputLength; }; -// ---------------------------------------------------------------------------- -}; // namespace latinime + +} // namespace latinime + #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/src/char_utils.h b/native/src/char_utils.h index 921ecb4a5..a69a35e7a 100644 --- a/native/src/char_utils.h +++ b/native/src/char_utils.h @@ -21,6 +21,6 @@ namespace latinime { unsigned short latin_tolower(unsigned short c); -}; // namespace latinime +} // namespace latinime #endif // LATINIME_CHAR_UTILS_H diff --git a/native/src/debug.h b/native/src/debug.h index ae629b222..38b2f107a 100644 --- a/native/src/debug.h +++ b/native/src/debug.h @@ -28,6 +28,7 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig output[i] = 0; return output; } + static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input, unsigned char* output, const unsigned int length, unsigned char c) { int i = 0; @@ -37,6 +38,7 @@ static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned s output[i] = 0; return output; } + static inline void LOGI_S16(unsigned short* string, const unsigned int length) { unsigned char tmp_buffer[length]; convertToUnibyteString(string, tmp_buffer, length); @@ -46,6 +48,7 @@ static inline void LOGI_S16(unsigned short* string, const unsigned int length) { // TODO : refactor this in a blocking log or something. // usleep(10); } + static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int length, unsigned char c) { unsigned char tmp_buffer[length+1]; diff --git a/native/src/defines.h b/native/src/defines.h index bdab19ff7..0a3240507 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -140,7 +140,7 @@ static void prof_out(void) { // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 -#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80 +#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60 diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index d69cb2a53..9e32ee80f 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -53,45 +53,16 @@ bool Dictionary::hasBigram() { return ((mDict[1] & 0xFF) == 1); } -// TODO: use uint16_t instead of unsigned short bool Dictionary::isValidWord(unsigned short *word, int length) { + return mUnigramDictionary->isValidWord(word, length); +} + +int Dictionary::getBigramPosition(unsigned short *word, int length) { if (IS_LATEST_DICT_VERSION) { - return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); + return mUnigramDictionary->getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length); } else { - return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); + return mUnigramDictionary->getBigramPosition(0, word, 0, length); } } -int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { - // returns address of bigram data of that word - // return -99 if not found - - int count = Dictionary::getCount(mDict, &pos); - unsigned short currentChar = (unsigned short) word[offset]; - for (int j = 0; j < count; j++) { - unsigned short c = Dictionary::getChar(mDict, &pos); - int terminal = Dictionary::getTerminal(mDict, &pos); - int childPos = Dictionary::getAddress(mDict, &pos); - if (c == currentChar) { - if (offset == length - 1) { - if (terminal) { - return (pos+1); - } - } else { - if (childPos != 0) { - int t = isValidWordRec(childPos, word, offset + 1, length); - if (t > 0) { - return t; - } - } - } - } - if (terminal) { - Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos); - } - // There could be two instances of each alphabet - upper and lower case. So continue - // looking ... - } - return NOT_VALID_WORD; -} } // namespace latinime diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 13b2a2816..3dc577a56 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -43,7 +43,6 @@ public: } bool isValidWord(unsigned short *word, int length); - int isValidWordRec(int pos, unsigned short *word, int offset, int length); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } int getMmapFd() { return mMmapFd; } @@ -63,6 +62,9 @@ public: const int pos, unsigned short *c, int *childrenPosition, bool *terminal, int *freq); + // TODO: delete this + int getBigramPosition(unsigned short *word, int length); + private: bool hasBigram(); @@ -79,7 +81,6 @@ private: BigramDictionary *mBigramDictionary; }; -// ---------------------------------------------------------------------------- // public static utility methods // static inline methods should be defined in the header file inline unsigned short Dictionary::getChar(const unsigned char *dict, int *pos) { @@ -132,7 +133,6 @@ inline int Dictionary::getFreq(const unsigned char *dict, return freq; } - inline int Dictionary::wideStrLen(unsigned short *str) { if (!str) return 0; unsigned short *end = str; @@ -156,5 +156,6 @@ inline int Dictionary::setDictionaryValues(const unsigned char *dict, return position; } -}; // namespace latinime +} // namespace latinime + #endif // LATINIME_DICTIONARY_H diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp index 102123c3c..209c31e6e 100644 --- a/native/src/proximity_info.cpp +++ b/native/src/proximity_info.cpp @@ -22,6 +22,7 @@ #include "proximity_info.h" namespace latinime { + ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth, const int keyboardHeight, const int gridWidth, const int gridHeight, const uint32_t *proximityCharsArray) @@ -61,4 +62,5 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { } return false; } -} // namespace latinime + +} // namespace latinime diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h index c2062e8c5..327cd0940 100644 --- a/native/src/proximity_info.h +++ b/native/src/proximity_info.h @@ -41,5 +41,7 @@ private: const int CELL_HEIGHT; uint32_t *mProximityCharsArray; }; -}; // namespace latinime + +} // namespace latinime + #endif // LATINIME_PROXIMITY_INFO_H diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 20a185219..e3296f12a 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -16,8 +16,6 @@ */ #include <assert.h> -#include <fcntl.h> -#include <stdio.h> #include <string.h> #define LOG_TAG "LatinIME: unigram_dictionary.cpp" @@ -34,10 +32,12 @@ const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] = { 'o', 'e' }, { 'u', 'e' } }; -UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterMultiplier, +// TODO: check the header +UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion) - : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), + : DICT_ROOT(streamStart), + MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion), TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier), ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0), @@ -233,7 +233,7 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo, PROF_END(5); PROF_START(6); - if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY) { + if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) { // The first and last "mistyped spaces" are taken care of by excessive character handling for (int i = 1; i < codesSize - 1; ++i) { if (DEBUG_DICT) { @@ -265,14 +265,14 @@ void UnigramDictionary::initSuggestions(const int *codes, const int codesSize, mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; } -void UnigramDictionary::registerNextLetter( - unsigned short c, int *nextLetters, int nextLettersSize) { +static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { if (c < nextLettersSize) { nextLetters[c]++; } } // TODO: We need to optimize addWord by using STL or something +// TODO: This needs to take an const unsigned short* and not tinker with its contents bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { word[length] = 0; if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { @@ -290,8 +290,8 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) // Find the right insertion point int insertAt = 0; while (insertAt < MAX_WORDS) { - if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency - && length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) { + // TODO: How should we sort words with the same frequency? + if (frequency > mFrequencies[insertAt]) { break; } insertAt++; @@ -322,7 +322,17 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) return false; } -unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) { +inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos, + int depth, int finalFreq) { + // TODO: actually add alternates when the format supports it. +} + +static inline bool hasAlternateSpellings(uint8_t flags) { + // TODO: when the format supports it, return the actual value. + return false; +} + +static inline unsigned short toBaseLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } @@ -334,7 +344,7 @@ unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) { return c; } -bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) { +bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const { if (length != mInputLength) { return false; } @@ -363,7 +373,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, } int rootPosition = ROOT_POS; // Get the number of child of root, then increment the position - int childCount = Dictionary::getCount(DICT, &rootPosition); + int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); int depth = 0; mStackChildCount[0] = childCount; @@ -372,6 +382,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, mStackInputIndex[0] = 0; mStackDiffs[0] = 0; mStackSiblingPos[0] = rootPosition; + mStackOutputIndex[0] = 0; // Depth first search while (depth >= 0) { @@ -382,14 +393,15 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, int inputIndex = mStackInputIndex[depth]; int diffs = mStackDiffs[depth]; int siblingPos = mStackSiblingPos[depth]; + int outputIndex = mStackOutputIndex[depth]; int firstChildPos; // depth will never be greater than maxDepth because in that case, // needsToTraverseChildrenNodes should be false - const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, + const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex, maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos, excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs, - &siblingPos); + &siblingPos, &outputIndex); // Update next sibling pos mStackSiblingPos[depth] = siblingPos; if (needsToTraverseChildrenNodes) { @@ -401,6 +413,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, mStackInputIndex[depth] = inputIndex; mStackDiffs[depth] = diffs; mStackSiblingPos[depth] = firstChildPos; + mStackOutputIndex[depth] = outputIndex; } } else { // Goes to parent sibling node @@ -451,8 +464,8 @@ inline static void multiplyRate(const int rate, int *freq) { } inline static int calcFreqForSplitTwoWords( - const int typedLetterMultiplier, const int firstWordLength, - const int secondWordLength, const int firstFreq, const int secondFreq) { + const int typedLetterMultiplier, const int firstWordLength, const int secondWordLength, + const int firstFreq, const int secondFreq, const bool isSpaceProximity) { if (firstWordLength == 0 || secondWordLength == 0) { return 0; } @@ -492,102 +505,28 @@ inline static int calcFreqForSplitTwoWords( const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength); multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq); - multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); - return totalFreq; -} - -bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength, - const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos, - const int secondWordLength) { - if (inputLength >= MAX_WORD_LENGTH) return false; - if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos - || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) - return false; - const int newWordLength = firstWordLength + secondWordLength + 1; - // Allocating variable length array on stack - unsigned short word[newWordLength]; - const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord); - if (DEBUG_DICT) { - LOGI("First freq: %d", firstFreq); - } - if (firstFreq <= 0) return false; - - for (int i = 0; i < firstWordLength; ++i) { - word[i] = mWord[i]; - } - - const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord); - if (DEBUG_DICT) { - LOGI("Second freq: %d", secondFreq); - } - if (secondFreq <= 0) return false; - - word[firstWordLength] = SPACE; - for (int i = (firstWordLength + 1); i < newWordLength; ++i) { - word[i] = mWord[i - firstWordLength - 1]; + if (isSpaceProximity) { + // A word pair with one space proximity correction + if (DEBUG_DICT) { + LOGI("Found a word pair with space proximity correction."); + } + multiplyIntCapped(typedLetterMultiplier, &totalFreq); + multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); } - int pairFreq = calcFreqForSplitTwoWords( - TYPED_LETTER_MULTIPLIER, firstWordLength, secondWordLength, firstFreq, secondFreq); - if (DEBUG_DICT) { - LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength, - TYPED_LETTER_MULTIPLIER); - } - addWord(word, newWordLength, pairFreq); - return true; + multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); + return totalFreq; } bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) { return getSplitTwoWordsSuggestion( - inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos); + inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos, false); } bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) { return getSplitTwoWordsSuggestion( inputLength, 0, spaceProximityPos, spaceProximityPos + 1, - inputLength - spaceProximityPos - 1); -} - -// Keep this for comparing spec to new getWords -void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos, - const int excessivePos, const int transposedPos,int *nextLetters, - const int nextLettersSize) { - int initialPosition = initialPos; - const int count = Dictionary::getCount(DICT, &initialPosition); - getWordsRec(count, initialPosition, 0, - min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH), - mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters, - nextLettersSize); -} - -void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth, - const int maxDepth, const bool traverseAllNodes, const int matchWeight, - const int inputIndex, const int diffs, const int skipPos, const int excessivePos, - const int transposedPos, int *nextLetters, const int nextLettersSize) { - int siblingPos = pos; - for (int i = 0; i < childrenCount; ++i) { - int newCount; - int newChildPosition; - const int newDepth = depth + 1; - bool newTraverseAllNodes; - int newMatchRate; - int newInputIndex; - int newDiffs; - int newSiblingPos; - const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth, - traverseAllNodes, matchWeight, inputIndex, diffs, - skipPos, excessivePos, transposedPos, - nextLetters, nextLettersSize, - &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate, - &newInputIndex, &newDiffs, &newSiblingPos); - siblingPos = newSiblingPos; - - if (needsToTraverseChildrenNodes) { - getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes, - newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos, - nextLetters, nextLettersSize); - } - } + inputLength - spaceProximityPos - 1, true); } inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth, @@ -645,28 +584,6 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int return finalFreq; } -inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength( - unsigned short *word, const int inputIndex, const int depth, const int matchWeight, - int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, - const int transposedPos, const int freq) { - const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos, - transposedPos, freq, false); - if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); - if (depth >= mInputLength && skipPos < 0) { - registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); - } -} - -inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength( - unsigned short *word, const int inputIndex, const int depth, const int matchWeight, - const int skipPos, const int excessivePos, const int transposedPos, const int freq) { - if (sameAsTyped(word, depth + 1)) return; - const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, - excessivePos, transposedPos, freq, true); - // Proximity collection will promote a word of the same length as what user typed. - if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); -} - inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth) { const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0]; @@ -697,7 +614,6 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex return false; } - // In the following function, c is the current character of the dictionary word // currently examined. // currentChars is an array containing the keys close to the character the @@ -740,96 +656,79 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId return UNRELATED_CHAR; } -inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, - const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex, - const int diffs, const int skipPos, const int excessivePos, const int transposedPos, - int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition, - bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs, - int *nextSiblingPosition) { - if (DEBUG_DICT) { - int inputCount = 0; - if (skipPos >= 0) ++inputCount; - if (excessivePos >= 0) ++inputCount; - if (transposedPos >= 0) ++inputCount; - assert(inputCount <= 1); +inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth, + const uint8_t* const root, const uint8_t flags, int pos, + const int inputIndex, const int matchWeight, const int skipPos, + const int excessivePos, const int transposedPos, const int freq, const bool sameLength, + int* nextLetters, const int nextLettersSize) { + + const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false; + const bool hasAlternates = hasAlternateSpellings(flags); + if (isSameAsTyped && !hasAlternates) return; + + if (depth >= MIN_SUGGEST_DEPTH) { + const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, + excessivePos, transposedPos, freq, sameLength); + if (!isSameAsTyped) + addWord(word, depth + 1, finalFreq); + if (hasAlternates) + addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq); } - unsigned short c; - int childPosition; - bool terminal; - int freq; - bool isSameAsUserTypedLength = false; - if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; + if (sameLength && depth >= mInputLength && skipPos < 0) { + registerNextLetter(word[mInputLength], nextLetters, nextLettersSize); + } +} - *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c, - &childPosition, &terminal, &freq); +#ifndef NEW_DICTIONARY_FORMAT +// TODO: Don't forget to bring inline functions back to over where they are used. - const bool needsToTraverseChildrenNodes = childPosition != 0; - - // If we are only doing traverseAllNodes, no need to look at the typed characters. - if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { - mWord[depth] = c; - if (traverseAllNodes && terminal) { - onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth, - matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, - freq); - } - if (!needsToTraverseChildrenNodes) return false; - *newTraverseAllNodes = traverseAllNodes; - *newMatchRate = matchWeight; - *newDiffs = diffs; - *newInputIndex = inputIndex; - } else { - const int *currentChars = getInputCharsAt(inputIndex); +// The following functions will be entirely replaced with new implementations. +void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos, + const int excessivePos, const int transposedPos,int *nextLetters, + const int nextLettersSize) { + int initialPosition = initialPos; + const int count = Dictionary::getCount(DICT_ROOT, &initialPosition); + getWordsRec(count, initialPosition, 0, + min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH), + mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters, + nextLettersSize); +} - if (transposedPos >= 0) { - if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS; - if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS; - } +void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth, + const int maxDepth, const bool traverseAllNodes, const int matchWeight, + const int inputIndex, const int diffs, const int skipPos, const int excessivePos, + const int transposedPos, int *nextLetters, const int nextLettersSize) { + int siblingPos = pos; + for (int i = 0; i < childrenCount; ++i) { + int newCount; + int newChildPosition; + bool newTraverseAllNodes; + int newMatchRate; + int newInputIndex; + int newDiffs; + int newSiblingPos; + int newOutputIndex; + const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth, + traverseAllNodes, matchWeight, inputIndex, diffs, + skipPos, excessivePos, transposedPos, + nextLetters, nextLettersSize, + &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate, + &newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex); + siblingPos = newSiblingPos; - int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos, - transposedPos); - if (UNRELATED_CHAR == matchedProximityCharId) return false; - mWord[depth] = c; - // If inputIndex is greater than mInputLength, that means there is no - // proximity chars. So, we don't need to check proximity. - if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { - multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight); - } - bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 - || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); - if (isSameAsUserTypedLength && terminal) { - onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight, - skipPos, excessivePos, transposedPos, freq); + if (needsToTraverseChildrenNodes) { + getWordsRec(newCount, newChildPosition, newOutputIndex, maxDepth, newTraverseAllNodes, + newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos, + nextLetters, nextLettersSize); } - if (!needsToTraverseChildrenNodes) return false; - // Start traversing all nodes after the index exceeds the user typed length - *newTraverseAllNodes = isSameAsUserTypedLength; - *newMatchRate = matchWeight; - *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0); - *newInputIndex = inputIndex + 1; - } - // Optimization: Prune out words that are too long compared to how much was typed. - if (depth >= maxDepth || *newDiffs > mMaxEditDistance) { - return false; - } - - // If inputIndex is greater than mInputLength, that means there are no proximity chars. - // TODO: Check if this can be isSameAsUserTypedLength only. - if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) { - *newTraverseAllNodes = true; } - // get the count of nodes and increment childAddress. - *newCount = Dictionary::getCount(DICT, &childPosition); - *newChildPosition = childPosition; - if (DEBUG_DICT) assert(needsToTraverseChildrenNodes); - return needsToTraverseChildrenNodes; } inline int UnigramDictionary::getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word) { int pos = ROOT_POS; - int count = Dictionary::getCount(DICT, &pos); + int count = Dictionary::getCount(DICT_ROOT, &pos); int maxFreq = 0; int depth = 0; unsigned short newWord[MAX_WORD_LENGTH_INTERNAL]; @@ -885,8 +784,8 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh const int inputIndex = startInputIndex + depth; const int *currentChars = getInputCharsAt(inputIndex); unsigned short c; - *siblingPos = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, firstChildPos, &c, - newChildPosition, newTerminal, newFreq); + *siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos, + &c, newChildPosition, newTerminal, newFreq); const unsigned int inputC = currentChars[0]; if (DEBUG_DICT) { assert(inputC <= U_SHORT_MAX); @@ -903,7 +802,7 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh } } if (hasChild) { - *newCount = Dictionary::getCount(DICT, newChildPosition); + *newCount = Dictionary::getCount(DICT_ROOT, newChildPosition); return true; } else { return false; @@ -915,4 +814,314 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh return false; } } + +// TODO: use uint32_t instead of unsigned short +bool UnigramDictionary::isValidWord(unsigned short *word, int length) { + if (IS_LATEST_DICT_VERSION) { + return (getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); + } else { + return (getBigramPosition(0, word, 0, length) != NOT_VALID_WORD); + } +} + + +// Require strict exact match. +int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset, + int length) const { + // returns address of bigram data of that word + // return -99 if not found + + int count = Dictionary::getCount(DICT_ROOT, &pos); + unsigned short currentChar = (unsigned short) word[offset]; + for (int j = 0; j < count; j++) { + unsigned short c = Dictionary::getChar(DICT_ROOT, &pos); + int terminal = Dictionary::getTerminal(DICT_ROOT, &pos); + int childPos = Dictionary::getAddress(DICT_ROOT, &pos); + if (c == currentChar) { + if (offset == length - 1) { + if (terminal) { + return (pos+1); + } + } else { + if (childPos != 0) { + int t = getBigramPosition(childPos, word, offset + 1, length); + if (t > 0) { + return t; + } + } + } + } + if (terminal) { + Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos); + } + // There could be two instances of each alphabet - upper and lower case. So continue + // looking ... + } + return NOT_VALID_WORD; +} + + +// The following functions will be modified. +bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength, + const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos, + const int secondWordLength, const bool isSpaceProximity) { + if (inputLength >= MAX_WORD_LENGTH) return false; + if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos + || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) + return false; + const int newWordLength = firstWordLength + secondWordLength + 1; + // Allocating variable length array on stack + unsigned short word[newWordLength]; + const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord); + if (DEBUG_DICT) { + LOGI("First freq: %d", firstFreq); + } + if (firstFreq <= 0) return false; + + for (int i = 0; i < firstWordLength; ++i) { + word[i] = mWord[i]; + } + + const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord); + if (DEBUG_DICT) { + LOGI("Second freq: %d", secondFreq); + } + if (secondFreq <= 0) return false; + + word[firstWordLength] = SPACE; + for (int i = (firstWordLength + 1); i < newWordLength; ++i) { + word[i] = mWord[i - firstWordLength - 1]; + } + + int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength, + secondWordLength, firstFreq, secondFreq, isSpaceProximity); + if (DEBUG_DICT) { + LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength, + TYPED_LETTER_MULTIPLIER); + } + addWord(word, newWordLength, pairFreq); + return true; +} + +inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, + const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex, + const int diffs, const int skipPos, const int excessivePos, const int transposedPos, + int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition, + bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs, + int *nextSiblingPosition, int *nextOutputIndex) { + if (DEBUG_DICT) { + int inputCount = 0; + if (skipPos >= 0) ++inputCount; + if (excessivePos >= 0) ++inputCount; + if (transposedPos >= 0) ++inputCount; + assert(inputCount <= 1); + } + unsigned short c; + int childPosition; + bool terminal; + int freq; + bool isSameAsUserTypedLength = false; + + const uint8_t flags = 0; // No flags for now + + if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; + + *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos, + &c, &childPosition, &terminal, &freq); + *nextOutputIndex = depth + 1; + + const bool needsToTraverseChildrenNodes = childPosition != 0; + + // If we are only doing traverseAllNodes, no need to look at the typed characters. + if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { + mWord[depth] = c; + if (traverseAllNodes && terminal) { + onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos, + excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize); + } + if (!needsToTraverseChildrenNodes) return false; + *newTraverseAllNodes = traverseAllNodes; + *newMatchRate = matchWeight; + *newDiffs = diffs; + *newInputIndex = inputIndex; + } else { + const int *currentChars = getInputCharsAt(inputIndex); + + if (transposedPos >= 0) { + if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS; + if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS; + } + + int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos, + transposedPos); + if (UNRELATED_CHAR == matchedProximityCharId) return false; + mWord[depth] = c; + // If inputIndex is greater than mInputLength, that means there is no + // proximity chars. So, we don't need to check proximity. + if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { + multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight); + } + bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 + || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); + if (isSameAsUserTypedLength && terminal) { + onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos, + excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize); + } + if (!needsToTraverseChildrenNodes) return false; + // Start traversing all nodes after the index exceeds the user typed length + *newTraverseAllNodes = isSameAsUserTypedLength; + *newMatchRate = matchWeight; + *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0); + *newInputIndex = inputIndex + 1; + } + // Optimization: Prune out words that are too long compared to how much was typed. + if (depth >= maxDepth || *newDiffs > mMaxEditDistance) { + return false; + } + + // If inputIndex is greater than mInputLength, that means there are no proximity chars. + // TODO: Check if this can be isSameAsUserTypedLength only. + if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) { + *newTraverseAllNodes = true; + } + // get the count of nodes and increment childAddress. + *newCount = Dictionary::getCount(DICT_ROOT, &childPosition); + *newChildPosition = childPosition; + if (DEBUG_DICT) assert(needsToTraverseChildrenNodes); + return needsToTraverseChildrenNodes; +} + +#else // NEW_DICTIONARY_FORMAT + +bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength, + const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos, + const int secondWordLength, const bool isSpaceProximity) { + if (inputLength >= MAX_WORD_LENGTH) return false; + if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos + || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) + return false; + const int newWordLength = firstWordLength + secondWordLength + 1; + // Allocating variable length array on stack + unsigned short word[newWordLength]; + const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord); + if (DEBUG_DICT) { + LOGI("First freq: %d", firstFreq); + } + if (firstFreq <= 0) return false; + + for (int i = 0; i < firstWordLength; ++i) { + word[i] = mWord[i]; + } + + const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord); + if (DEBUG_DICT) { + LOGI("Second freq: %d", secondFreq); + } + if (secondFreq <= 0) return false; + + word[firstWordLength] = SPACE; + for (int i = (firstWordLength + 1); i < newWordLength; ++i) { + word[i] = mWord[i - firstWordLength - 1]; + } + + int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength, + secondWordLength, firstFreq, secondFreq, isSpaceProximity); + if (DEBUG_DICT) { + LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength, + TYPED_LETTER_MULTIPLIER); + } + addWord(word, newWordLength, pairFreq); + return true; +} + +inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, + const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex, + const int diffs, const int skipPos, const int excessivePos, const int transposedPos, + int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition, + bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs, + int *nextSiblingPosition, int *nextOutputIndex) { + if (DEBUG_DICT) { + int inputCount = 0; + if (skipPos >= 0) ++inputCount; + if (excessivePos >= 0) ++inputCount; + if (transposedPos >= 0) ++inputCount; + assert(inputCount <= 1); + } + unsigned short c; + int childPosition; + bool terminal; + int freq; + bool isSameAsUserTypedLength = false; + + const uint8_t flags = 0; // No flags for now + + if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; + + *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos, + &c, &childPosition, &terminal, &freq); + *nextOutputIndex = depth + 1; + + const bool needsToTraverseChildrenNodes = childPosition != 0; + + // If we are only doing traverseAllNodes, no need to look at the typed characters. + if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { + mWord[depth] = c; + if (traverseAllNodes && terminal) { + onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos, + excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize); + } + if (!needsToTraverseChildrenNodes) return false; + *newTraverseAllNodes = traverseAllNodes; + *newMatchRate = matchWeight; + *newDiffs = diffs; + *newInputIndex = inputIndex; + } else { + const int *currentChars = getInputCharsAt(inputIndex); + + if (transposedPos >= 0) { + if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS; + if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS; + } + + int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos, + transposedPos); + if (UNRELATED_CHAR == matchedProximityCharId) return false; + mWord[depth] = c; + // If inputIndex is greater than mInputLength, that means there is no + // proximity chars. So, we don't need to check proximity. + if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { + multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight); + } + bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 + || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); + if (isSameAsUserTypedLength && terminal) { + onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos, + excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize); + } + if (!needsToTraverseChildrenNodes) return false; + // Start traversing all nodes after the index exceeds the user typed length + *newTraverseAllNodes = isSameAsUserTypedLength; + *newMatchRate = matchWeight; + *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0); + *newInputIndex = inputIndex + 1; + } + // Optimization: Prune out words that are too long compared to how much was typed. + if (depth >= maxDepth || *newDiffs > mMaxEditDistance) { + return false; + } + + // If inputIndex is greater than mInputLength, that means there are no proximity chars. + // TODO: Check if this can be isSameAsUserTypedLength only. + if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) { + *newTraverseAllNodes = true; + } + // get the count of nodes and increment childAddress. + *newCount = Dictionary::getCount(DICT_ROOT, &childPosition); + *newChildPosition = childPosition; + if (DEBUG_DICT) assert(needsToTraverseChildrenNodes); + return needsToTraverseChildrenNodes; +} + +#endif // NEW_DICTIONARY_FORMAT + } // namespace latinime diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 3d3007ce0..154ac9b36 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -17,9 +17,14 @@ #ifndef LATINIME_UNIGRAM_DICTIONARY_H #define LATINIME_UNIGRAM_DICTIONARY_H +#include <stdint.h> #include "defines.h" #include "proximity_info.h" +#ifndef NULL +#define NULL 0 +#endif + namespace latinime { class UnigramDictionary { @@ -31,8 +36,11 @@ class UnigramDictionary { } ProximityType; public: - UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier, - int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); + UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, + int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, + const bool isLatestDictVersion); + bool isValidWord(unsigned short *word, int length); + int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); @@ -56,34 +64,30 @@ private: bool checkIfDictVersionIsLatest(); int getAddress(int *pos); int getFreq(int *pos); - int wideStrLen(unsigned short *str); - bool sameAsTyped(unsigned short *word, int length); + bool sameAsTyped(const unsigned short *word, int length) const; bool addWord(unsigned short *word, int length, int frequency); - unsigned short toBaseLowerCase(unsigned short c); + void addWordAlternatesSpellings(const uint8_t* const root, int pos, int depth, int finalFreq); void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize); bool getSplitTwoWordsSuggestion(const int inputLength, const int firstWordStartPos, const int firstWordLength, - const int secondWordStartPos, const int secondWordLength); + const int secondWordStartPos, const int secondWordLength, const bool isSpaceProximity); bool getMissingSpaceWords(const int inputLength, const int missingSpacePos); bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos); // Keep getWordsOld for comparing performance between getWords and getWordsOld void getWordsOld(const int initialPos, const int inputLength, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize); - void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int freq, const bool sameLength) const; - void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, - const int inputIndex, const int depth, const int snr, int *nextLetters, - const int nextLettersSize, const int skipPos, const int excessivePos, - const int transposedPos, const int freq); - void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, - const int inputIndex, const int depth, const int snr, const int skipPos, - const int excessivePos, const int transposedPos, const int freq); + void onTerminal(unsigned short int* word, const int depth, + const uint8_t* const root, const uint8_t flags, int pos, + const int inputIndex, const int matchWeight, const int skipPos, + const int excessivePos, const int transposedPos, const int freq, const bool sameLength, + int *nextLetters, const int nextLettersSize); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c, @@ -94,7 +98,7 @@ private: const int diffs, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs, - int *nextSiblingPosition); + int *nextSiblingPosition, int *nextOutputIndex); int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word); // Process a node by considering missing space bool processCurrentNodeForExactMatch(const int firstChildPos, @@ -104,7 +108,8 @@ private: inline const int* getInputCharsAt(const int index) const { return mInputCodes + (index * MAX_PROXIMITY_CHARS); } - const unsigned char *DICT; + + const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; const int MAX_WORDS; const int MAX_PROXIMITY_CHARS; @@ -138,11 +143,10 @@ private: int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL]; int mStackDiffs[MAX_WORD_LENGTH_INTERNAL]; int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; + int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL]; int mNextLettersFrequency[NEXT_LETTERS_SIZE]; }; -// ---------------------------------------------------------------------------- - -}; // namespace latinime +} // namespace latinime #endif // LATINIME_UNIGRAM_DICTIONARY_H |