diff options
author | 2013-06-03 00:51:43 +0000 | |
---|---|---|
committer | 2013-06-03 00:51:44 +0000 | |
commit | 712fefd85fa024259b81e50e98b18b749b9bde69 (patch) | |
tree | 25a0322bc2d2e688f9315626958a176fd2c22787 /native/jni/src/unigram_dictionary.cpp | |
parent | 88ad30f40b05128d891fd412bb684bdbdc514446 (diff) | |
parent | 0ecfb9424754341d7ee41557fc1f913cb6ca79c2 (diff) | |
download | latinime-712fefd85fa024259b81e50e98b18b749b9bde69.tar.gz latinime-712fefd85fa024259b81e50e98b18b749b9bde69.tar.xz latinime-712fefd85fa024259b81e50e98b18b749b9bde69.zip |
Merge "Use BinaryDictonaryInfo instead of raw pointers."
Diffstat (limited to 'native/jni/src/unigram_dictionary.cpp')
-rw-r--r-- | native/jni/src/unigram_dictionary.cpp | 56 |
1 files changed, 32 insertions, 24 deletions
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 1d979dec0..8fd015c62 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -19,6 +19,7 @@ #define LOG_TAG "LatinIME: unigram_dictionary.cpp" #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/char_utils.h" #include "suggest/core/dictionary/dictionary.h" @@ -33,8 +34,9 @@ namespace latinime { // TODO: check the header -UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int dictFlags) - : DICT_ROOT(streamStart), ROOT_POS(0), +UnigramDictionary::UnigramDictionary( + const BinaryDictionaryInfo *const binaryDicitonaryInfo, const uint8_t dictFlags) + : mBinaryDicitonaryInfo(binaryDicitonaryInfo), MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), DICT_FLAGS(dictFlags) { if (DEBUG_DICT) { AKLOGI("UnigramDictionary - constructor"); @@ -316,9 +318,10 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, correction->setCorrectionParams(0, 0, 0, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, doAutoCompletion, maxErrors); - int rootPosition = ROOT_POS; + int rootPosition = mBinaryDicitonaryInfo->getRootPosition(); // Get the number of children of root, then increment the position - int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition); + int childCount = BinaryFormat::getGroupCountAndForwardPointer( + mBinaryDicitonaryInfo->getDictRoot(), &rootPosition); int outputIndex = 0; correction->initCorrectionState(rootPosition, childCount, (inputSize <= 0)); @@ -748,7 +751,7 @@ int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, con int newWord[MAX_WORD_LENGTH]; int depth = 0; int maxFreq = -1; - const uint8_t *const root = DICT_ROOT; + const uint8_t *const root = mBinaryDicitonaryInfo->getDictRoot(); int stackChildCount[MAX_WORD_LENGTH]; int stackInputIndex[MAX_WORD_LENGTH]; int stackSiblingPos[MAX_WORD_LENGTH]; @@ -807,7 +810,7 @@ int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, con } int UnigramDictionary::getProbability(const int *const inWord, const int length) const { - const uint8_t *const root = DICT_ROOT; + const uint8_t *const root = mBinaryDicitonaryInfo->getDictRoot(); int pos = BinaryFormat::getTerminalPosition(root, inWord, length, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == pos) { @@ -824,7 +827,7 @@ int UnigramDictionary::getProbability(const int *const inWord, const int length) if (hasMultipleChars) { pos = BinaryFormat::skipOtherCharacters(root, pos); } else { - BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); + BinaryFormat::getCodePointAndForwardPointer(root, &pos); } const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos); return unigramProbability; @@ -866,7 +869,8 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, // - FLAG_HAS_MULTIPLE_CHARS: whether this node has multiple char or not. // - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children) // - FLAG_HAS_BIGRAMS: whether this node has bigrams or not - const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos); + const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer( + mBinaryDicitonaryInfo->getDictRoot(), &pos); const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags)); @@ -877,7 +881,8 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, // else if FLAG_IS_TERMINAL: the probability // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address // Note that you can't have a node that both is not a terminal and has no children. - int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); + int c = BinaryFormat::getCodePointAndForwardPointer( + mBinaryDicitonaryInfo->getDictRoot(), &pos); ASSERT(NOT_A_CODE_POINT != c); // We are going to loop through each character and make it look like it's a different @@ -891,8 +896,8 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, // We prefetch the next char. If 'c' is the last char of this node, we will have // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node // should behave as a terminal or not and whether we have children. - const int nextc = hasMultipleChars - ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT; + const int nextc = hasMultipleChars ? BinaryFormat::getCodePointAndForwardPointer( + mBinaryDicitonaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT; const bool isLastChar = (NOT_A_CODE_POINT == nextc); // If there are more chars in this nodes, then this virtual node is not a terminal. // If we are on the last char, this virtual node is a terminal if this node is. @@ -912,11 +917,11 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, // We don't have to output other values because we return false, as in // "don't traverse children". if (!isLastChar) { - pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos); + pos = BinaryFormat::skipOtherCharacters(mBinaryDicitonaryInfo->getDictRoot(), pos); } pos = BinaryFormat::skipProbability(flags, pos); - *nextSiblingPosition = - BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); + *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes( + mBinaryDicitonaryInfo->getDictRoot(), flags, pos); return false; } @@ -929,11 +934,11 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, if (isTerminalNode) { // The probability should be here, because we come here only if this is actually // a terminal node, and we are on its last char. - const int unigramProbability = - BinaryFormat::readProbabilityWithoutMovingPointer(DICT_ROOT, pos); + const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer( + mBinaryDicitonaryInfo->getDictRoot(), pos); const int childrenAddressPos = BinaryFormat::skipProbability(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); - TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); + TerminalAttributes terminalAttributes(mBinaryDicitonaryInfo, flags, attributesPos); // bigramMap contains the bigram frequencies indexed by addresses for fast lookup. // bigramFilter is a bloom filter of said frequencies for even faster rejection. const int probability = ProbabilityUtils::getProbability( @@ -953,16 +958,16 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, // remaining char in this group for there can't be any. if (!hasChildren) { pos = BinaryFormat::skipProbability(flags, pos); - *nextSiblingPosition = - BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); + *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes( + mBinaryDicitonaryInfo->getDictRoot(), flags, pos); return false; } // Optimization: Prune out words that are too long compared to how much was typed. if (correction->needsToPrune()) { pos = BinaryFormat::skipProbability(flags, pos); - *nextSiblingPosition = - BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); + *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes( + mBinaryDicitonaryInfo->getDictRoot(), flags, pos); if (DEBUG_DICT_FULL) { AKLOGI("Traversing was pruned."); } @@ -981,9 +986,12 @@ bool UnigramDictionary::processCurrentNode(const int initialPos, // Once this is read, we still need to output the number of nodes in the immediate children of // this node, so we read and output it before returning true, as in "please traverse children". pos = BinaryFormat::skipProbability(flags, pos); - int childrenPos = BinaryFormat::readChildrenPosition(DICT_ROOT, flags, pos); - *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); - *newCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &childrenPos); + int childrenPos = BinaryFormat::readChildrenPosition( + mBinaryDicitonaryInfo->getDictRoot(), flags, pos); + *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes( + mBinaryDicitonaryInfo->getDictRoot(), flags, pos); + *newCount = BinaryFormat::getGroupCountAndForwardPointer( + mBinaryDicitonaryInfo->getDictRoot(), &childrenPos); *newChildrenPosition = childrenPos; return true; } |