diff options
Diffstat (limited to 'native/src/dictionary.cpp')
-rw-r--r-- | native/src/dictionary.cpp | 61 |
1 files changed, 57 insertions, 4 deletions
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index a21b80a48..05692f7ef 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -17,17 +17,23 @@ #include <stdio.h> +#define LOG_TAG "LatinIME: dictionary.cpp" + #include "dictionary.h" namespace latinime { Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives) + : DICT((unsigned char*) dict), + // Checks whether it has the latest dictionary or the old dictionary + IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) { - mUnigramDictionary = new UnigramDictionary(dict, typedLetterMultiplier, fullWordMultiplier, - maxWordLength, maxWords, maxAlternatives, this); - mBigramDictionary = new BigramDictionary(dict, typedLetterMultiplier, fullWordMultiplier, - maxWordLength, maxWords, maxAlternatives, this); + LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF)); + mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier, + maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); + mBigramDictionary = new BigramDictionary(DICT, maxWordLength, maxAlternatives, + IS_LATEST_DICT_VERSION, hasBigram(), this); } Dictionary::~Dictionary() @@ -35,4 +41,51 @@ Dictionary::~Dictionary() delete mUnigramDictionary; delete mBigramDictionary; } + +bool Dictionary::hasBigram() { + return ((DICT[1] & 0xFF) == 1); +} + +// TODO: use uint16_t instead of unsigned short +bool Dictionary::isValidWord(unsigned short *word, int length) +{ + if (IS_LATEST_DICT_VERSION) { + return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); + } else { + return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); + } +} + +int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { + // returns address of bigram data of that word + // return -99 if not found + + int count = Dictionary::getCount(DICT, &pos); + unsigned short currentChar = (unsigned short) word[offset]; + for (int j = 0; j < count; j++) { + unsigned short c = Dictionary::getChar(DICT, &pos); + int terminal = Dictionary::getTerminal(DICT, &pos); + int childPos = Dictionary::getAddress(DICT, &pos); + if (c == currentChar) { + if (offset == length - 1) { + if (terminal) { + return (pos+1); + } + } else { + if (childPos != 0) { + int t = isValidWordRec(childPos, word, offset + 1, length); + if (t > 0) { + return t; + } + } + } + } + if (terminal) { + Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); + } + // There could be two instances of each alphabet - upper and lower case. So continue + // looking ... + } + return NOT_VALID_WORD; +} } // namespace latinime |