aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-06-03 00:51:43 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-06-03 00:51:44 +0000
commit712fefd85fa024259b81e50e98b18b749b9bde69 (patch)
tree25a0322bc2d2e688f9315626958a176fd2c22787 /native/jni/src/unigram_dictionary.cpp
parent88ad30f40b05128d891fd412bb684bdbdc514446 (diff)
parent0ecfb9424754341d7ee41557fc1f913cb6ca79c2 (diff)
downloadlatinime-712fefd85fa024259b81e50e98b18b749b9bde69.tar.gz
latinime-712fefd85fa024259b81e50e98b18b749b9bde69.tar.xz
latinime-712fefd85fa024259b81e50e98b18b749b9bde69.zip
Merge "Use BinaryDictonaryInfo instead of raw pointers."
Diffstat (limited to 'native/jni/src/unigram_dictionary.cpp')
-rw-r--r--native/jni/src/unigram_dictionary.cpp56
1 files changed, 32 insertions, 24 deletions
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 1d979dec0..8fd015c62 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -19,6 +19,7 @@
#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/char_utils.h"
#include "suggest/core/dictionary/dictionary.h"
@@ -33,8 +34,9 @@
namespace latinime {
// TODO: check the header
-UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int dictFlags)
- : DICT_ROOT(streamStart), ROOT_POS(0),
+UnigramDictionary::UnigramDictionary(
+ const BinaryDictionaryInfo *const binaryDicitonaryInfo, const uint8_t dictFlags)
+ : mBinaryDicitonaryInfo(binaryDicitonaryInfo),
MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), DICT_FLAGS(dictFlags) {
if (DEBUG_DICT) {
AKLOGI("UnigramDictionary - constructor");
@@ -316,9 +318,10 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
correction->setCorrectionParams(0, 0, 0,
-1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
doAutoCompletion, maxErrors);
- int rootPosition = ROOT_POS;
+ int rootPosition = mBinaryDicitonaryInfo->getRootPosition();
// Get the number of children of root, then increment the position
- int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition);
+ int childCount = BinaryFormat::getGroupCountAndForwardPointer(
+ mBinaryDicitonaryInfo->getDictRoot(), &rootPosition);
int outputIndex = 0;
correction->initCorrectionState(rootPosition, childCount, (inputSize <= 0));
@@ -748,7 +751,7 @@ int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, con
int newWord[MAX_WORD_LENGTH];
int depth = 0;
int maxFreq = -1;
- const uint8_t *const root = DICT_ROOT;
+ const uint8_t *const root = mBinaryDicitonaryInfo->getDictRoot();
int stackChildCount[MAX_WORD_LENGTH];
int stackInputIndex[MAX_WORD_LENGTH];
int stackSiblingPos[MAX_WORD_LENGTH];
@@ -807,7 +810,7 @@ int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, con
}
int UnigramDictionary::getProbability(const int *const inWord, const int length) const {
- const uint8_t *const root = DICT_ROOT;
+ const uint8_t *const root = mBinaryDicitonaryInfo->getDictRoot();
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == pos) {
@@ -824,7 +827,7 @@ int UnigramDictionary::getProbability(const int *const inWord, const int length)
if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos);
} else {
- BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
+ BinaryFormat::getCodePointAndForwardPointer(root, &pos);
}
const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
return unigramProbability;
@@ -866,7 +869,8 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
// - FLAG_HAS_MULTIPLE_CHARS: whether this node has multiple char or not.
// - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children)
// - FLAG_HAS_BIGRAMS: whether this node has bigrams or not
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos);
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
+ mBinaryDicitonaryInfo->getDictRoot(), &pos);
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
@@ -877,7 +881,8 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the probability
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children.
- int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
+ int c = BinaryFormat::getCodePointAndForwardPointer(
+ mBinaryDicitonaryInfo->getDictRoot(), &pos);
ASSERT(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different
@@ -891,8 +896,8 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
// We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children.
- const int nextc = hasMultipleChars
- ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
+ const int nextc = hasMultipleChars ? BinaryFormat::getCodePointAndForwardPointer(
+ mBinaryDicitonaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal.
// If we are on the last char, this virtual node is a terminal if this node is.
@@ -912,11 +917,11 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
// We don't have to output other values because we return false, as in
// "don't traverse children".
if (!isLastChar) {
- pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos);
+ pos = BinaryFormat::skipOtherCharacters(mBinaryDicitonaryInfo->getDictRoot(), pos);
}
pos = BinaryFormat::skipProbability(flags, pos);
- *nextSiblingPosition =
- BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
+ *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
+ mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
return false;
}
@@ -929,11 +934,11 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
if (isTerminalNode) {
// The probability should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
- const int unigramProbability =
- BinaryFormat::readProbabilityWithoutMovingPointer(DICT_ROOT, pos);
+ const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(
+ mBinaryDicitonaryInfo->getDictRoot(), pos);
const int childrenAddressPos = BinaryFormat::skipProbability(flags, pos);
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
- TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
+ TerminalAttributes terminalAttributes(mBinaryDicitonaryInfo, flags, attributesPos);
// bigramMap contains the bigram frequencies indexed by addresses for fast lookup.
// bigramFilter is a bloom filter of said frequencies for even faster rejection.
const int probability = ProbabilityUtils::getProbability(
@@ -953,16 +958,16 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
// remaining char in this group for there can't be any.
if (!hasChildren) {
pos = BinaryFormat::skipProbability(flags, pos);
- *nextSiblingPosition =
- BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
+ *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
+ mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
return false;
}
// Optimization: Prune out words that are too long compared to how much was typed.
if (correction->needsToPrune()) {
pos = BinaryFormat::skipProbability(flags, pos);
- *nextSiblingPosition =
- BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
+ *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
+ mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
if (DEBUG_DICT_FULL) {
AKLOGI("Traversing was pruned.");
}
@@ -981,9 +986,12 @@ bool UnigramDictionary::processCurrentNode(const int initialPos,
// Once this is read, we still need to output the number of nodes in the immediate children of
// this node, so we read and output it before returning true, as in "please traverse children".
pos = BinaryFormat::skipProbability(flags, pos);
- int childrenPos = BinaryFormat::readChildrenPosition(DICT_ROOT, flags, pos);
- *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
- *newCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &childrenPos);
+ int childrenPos = BinaryFormat::readChildrenPosition(
+ mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
+ *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
+ mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
+ *newCount = BinaryFormat::getGroupCountAndForwardPointer(
+ mBinaryDicitonaryInfo->getDictRoot(), &childrenPos);
*newChildrenPosition = childrenPos;
return true;
}