diff options
Diffstat (limited to 'dictionary/src/dictionary.cpp')
-rw-r--r-- | dictionary/src/dictionary.cpp | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/dictionary/src/dictionary.cpp b/dictionary/src/dictionary.cpp index b37f4c926..cc711f419 100644 --- a/dictionary/src/dictionary.cpp +++ b/dictionary/src/dictionary.cpp @@ -49,11 +49,8 @@ Dictionary::~Dictionary() } int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, - int maxWordLength, int maxWords, int maxAlternatives) + int maxWordLength, int maxWords, int maxAlternatives, int skipPos) { - memset(frequencies, 0, maxWords * sizeof(*frequencies)); - memset(outWords, 0, maxWords * maxWordLength * sizeof(*outWords)); - mFrequencies = frequencies; mOutputChars = outWords; mInputCodes = codes; @@ -62,8 +59,10 @@ int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWor mMaxWordLength = maxWordLength; mMaxWords = maxWords; mWords = 0; + mSkipPos = skipPos; + mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; - getWordsRec(0, 0, mInputLength * 3, false, 1, 0); + getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0); if (DEBUG_DICT) LOGI("Returning %d words", mWords); return mWords; @@ -110,7 +109,11 @@ bool Dictionary::addWord(unsigned short *word, int length, int frequency) { word[length] = 0; - if (DEBUG_DICT) LOGI("Found word = %s, freq = %d : \n", word, frequency); + if (DEBUG_DICT) { + char s[length + 1]; + for (int i = 0; i <= length; i++) s[i] = word[i]; + LOGI("Found word = %s, freq = %d : \n", s, frequency); + } // Find the right insertion point int insertAt = 0; @@ -144,16 +147,14 @@ Dictionary::addWord(unsigned short *word, int length, int frequency) } unsigned short -Dictionary::toLowerCase(unsigned short c, const int depth) { +Dictionary::toLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } - if (depth == 0) { - if (c >='A' && c <= 'Z') { - c |= 32; - } else if (c > 127) { - c = u_tolower(c); - } + if (c >='A' && c <= 'Z') { + c |= 32; + } else if (c > 127) { + c = u_tolower(c); } return c; } @@ -178,12 +179,16 @@ Dictionary::sameAsTyped(unsigned short *word, int length) static char QUOTE = '\''; void -Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex) +Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex, + int diffs) { // Optimization: Prune out words that are too long compared to how much was typed. if (depth > maxDepth) { return; } + if (diffs > mMaxEditDistance) { + return; + } int count = getCount(&pos); int *currentChars = NULL; if (mInputLength <= inputIndex) { @@ -194,7 +199,7 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s for (int i = 0; i < count; i++) { unsigned short c = getChar(&pos); - unsigned short lowerC = toLowerCase(c, depth); + unsigned short lowerC = toLowerCase(c); bool terminal = getTerminal(&pos); int childrenAddress = getAddress(&pos); int freq = 1; @@ -207,38 +212,41 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s } if (childrenAddress != 0) { getWordsRec(childrenAddress, depth + 1, maxDepth, - completion, snr, inputIndex); + completion, snr, inputIndex, diffs); } - } else if (c == QUOTE && currentChars[0] != QUOTE) { - // Skip the ' and continue deeper - mWord[depth] = QUOTE; + } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) { + // Skip the ' or other letter and continue deeper + mWord[depth] = c; if (childrenAddress != 0) { - getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex); + getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs); } } else { int j = 0; while (currentChars[j] > 0) { - int addedWeight = j == 0 ? mTypedLetterMultiplier : 1; if (currentChars[j] == lowerC || currentChars[j] == c) { + int addedWeight = j == 0 ? mTypedLetterMultiplier : 1; mWord[depth] = c; if (mInputLength == inputIndex + 1) { if (terminal) { if (//INCLUDE_TYPED_WORD_IF_VALID || !sameAsTyped(mWord, depth + 1)) { - addWord(mWord, depth + 1, - (freq * snr * addedWeight * mFullWordMultiplier)); + int finalFreq = freq * snr * addedWeight; + if (mSkipPos < 0) finalFreq *= mFullWordMultiplier; + addWord(mWord, depth + 1, finalFreq); } } if (childrenAddress != 0) { getWordsRec(childrenAddress, depth + 1, - maxDepth, true, snr * addedWeight, inputIndex + 1); + maxDepth, true, snr * addedWeight, inputIndex + 1, + diffs + (j > 0)); } } else if (childrenAddress != 0) { getWordsRec(childrenAddress, depth + 1, maxDepth, - false, snr * addedWeight, inputIndex + 1); + false, snr * addedWeight, inputIndex + 1, diffs + (j > 0)); } } j++; + if (mSkipPos >= 0) break; } } } |