diff options
Diffstat (limited to 'native/jni/src/bigram_dictionary.cpp')
-rw-r--r-- | native/jni/src/bigram_dictionary.cpp | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index c592542bd..b57227880 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -20,15 +20,18 @@ #include "bigram_dictionary.h" -#include "char_utils.h" #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/bloom_filter.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/dictionary/probability_utils.h" namespace latinime { -BigramDictionary::BigramDictionary(const uint8_t *const streamStart) : DICT_ROOT(streamStart) { +BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo) + : mBinaryDictionaryInfo(binaryDictionaryInfo) { if (DEBUG_DICT) { AKLOGI("BigramDictionary - constructor"); } @@ -52,7 +55,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int int insertAt = 0; while (insertAt < MAX_RESULTS) { if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability - && length < getCodePointCount(MAX_WORD_LENGTH, + && length < CharUtils::getCodePointCount(MAX_WORD_LENGTH, bigramCodePoints + insertAt * MAX_WORD_LENGTH))) { break; } @@ -103,7 +106,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name - const uint8_t *const root = DICT_ROOT; + const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams @@ -134,7 +137,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 // in very bad cases. This means that sometimes, we'll see some bigrams interverted // here, but it can't get too bad. - const int probability = BinaryFormat::computeProbabilityForBigram( + const int probability = ProbabilityUtils::computeProbabilityForBigram( unigramProbability, bigramProbabilityTemp); addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints, outputTypes); @@ -149,7 +152,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return 0; - const uint8_t *const root = DICT_ROOT; + const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength, forceLowerCaseSearch); @@ -170,7 +173,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const { memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE); - const uint8_t *const root = DICT_ROOT; + const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (0 == pos) { @@ -196,9 +199,9 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons // what user typed. int maxAlt = MAX_ALTERNATIVES; - const int firstBaseLowerCodePoint = toBaseLowerCase(*word); + const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word); while (maxAlt > 0) { - if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { + if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { return true; } inputCodePoints++; @@ -209,7 +212,7 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const { - const uint8_t *const root = DICT_ROOT; + const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; |