diff options
author | 2011-06-16 22:33:41 +0900 | |
---|---|---|
committer | 2011-06-16 22:33:41 +0900 | |
commit | 8124e64dccb2027fcdcf19b76490769087f55cc2 (patch) | |
tree | 33ca9611b1844e9a8e26014efd4f0090843ab316 /native/src/unigram_dictionary.cpp | |
parent | 3f4385511b100b369c1c0b89acf532398c3ecb30 (diff) | |
download | latinime-8124e64dccb2027fcdcf19b76490769087f55cc2.tar.gz latinime-8124e64dccb2027fcdcf19b76490769087f55cc2.tar.xz latinime-8124e64dccb2027fcdcf19b76490769087f55cc2.zip |
New dict format, step 2
Move some methods around and make static some methods
Bug: 4392433
Change-Id: I2bbe98aec118a416d21d1e293638e1d324505b9b
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r-- | native/src/unigram_dictionary.cpp | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index cb8f5076c..91e3c8134 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -265,8 +265,7 @@ void UnigramDictionary::initSuggestions(const int *codes, const int codesSize, mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; } -void UnigramDictionary::registerNextLetter( - unsigned short c, int *nextLetters, int nextLettersSize) { +static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { if (c < nextLettersSize) { nextLetters[c]++; } @@ -322,7 +321,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) return false; } -unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) { +static inline unsigned short toBaseLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } @@ -924,4 +923,47 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh return false; } } + +// TODO: use uint32_t instead of unsigned short +bool UnigramDictionary::isValidWord(unsigned short *word, int length) { + if (IS_LATEST_DICT_VERSION) { + return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); + } else { + return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); + } +} + +int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { + // returns address of bigram data of that word + // return -99 if not found + + int count = Dictionary::getCount(DICT_ROOT, &pos); + unsigned short currentChar = (unsigned short) word[offset]; + for (int j = 0; j < count; j++) { + unsigned short c = Dictionary::getChar(DICT_ROOT, &pos); + int terminal = Dictionary::getTerminal(DICT_ROOT, &pos); + int childPos = Dictionary::getAddress(DICT_ROOT, &pos); + if (c == currentChar) { + if (offset == length - 1) { + if (terminal) { + return (pos+1); + } + } else { + if (childPos != 0) { + int t = isValidWordRec(childPos, word, offset + 1, length); + if (t > 0) { + return t; + } + } + } + } + if (terminal) { + Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos); + } + // There could be two instances of each alphabet - upper and lower case. So continue + // looking ... + } + return NOT_VALID_WORD; +} + } // namespace latinime |