aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp97
1 files changed, 43 insertions, 54 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index eb28538f1..f5648d3df 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -167,12 +167,6 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
LOGI("%s %i", s, mFrequencies[j]);
#endif
}
- LOGI("Next letters: ");
- for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
- if (mNextLettersFrequency[k] > 0) {
- LOGI("%c = %d,", k, mNextLettersFrequency[k]);
- }
- }
}
PROF_END(20);
PROF_CLOSE;
@@ -194,7 +188,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_END(0);
PROF_START(1);
- getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH);
+ getSuggestionCandidates(-1, -1, -1, MAX_DEPTH);
PROF_END(1);
PROF_START(2);
@@ -204,7 +198,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) {
LOGI("--- Suggest missing characters %d", i);
}
- getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);
+ getSuggestionCandidates(i, -1, -1, MAX_DEPTH);
}
}
PROF_END(2);
@@ -217,7 +211,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) {
LOGI("--- Suggest excessive characters %d", i);
}
- getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);
+ getSuggestionCandidates(-1, i, -1, MAX_DEPTH);
}
}
PROF_END(3);
@@ -230,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) {
LOGI("--- Suggest transposed characters %d", i);
}
- getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);
+ getSuggestionCandidates(-1, -1, i, mInputLength - 1);
}
}
PROF_END(4);
@@ -348,8 +342,7 @@ static const char QUOTE = '\'';
static const char SPACE = ' ';
void UnigramDictionary::getSuggestionCandidates(const int skipPos,
- const int excessivePos, const int transposedPos, int *nextLetters,
- const int nextLettersSize, const int maxDepth) {
+ const int excessivePos, const int transposedPos, const int maxDepth) {
if (DEBUG_DICT) {
LOGI("getSuggestionCandidates %d", maxDepth);
assert(transposedPos + 1 < mInputLength);
@@ -365,29 +358,31 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
mStackChildCount[0] = childCount;
mStackTraverseAll[0] = (mInputLength <= 0);
- mStackMatchCount[0] = 0;
mStackInputIndex[0] = 0;
mStackDiffs[0] = 0;
mStackSiblingPos[0] = rootPosition;
mStackOutputIndex[0] = 0;
+ mStackMatchedCount[0] = 0;
+ mCorrectionState->initDepth();
// Depth first search
while (depth >= 0) {
if (mStackChildCount[depth] > 0) {
--mStackChildCount[depth];
bool traverseAllNodes = mStackTraverseAll[depth];
- int matchCount = mStackMatchCount[depth];
int inputIndex = mStackInputIndex[depth];
int diffs = mStackDiffs[depth];
int siblingPos = mStackSiblingPos[depth];
int outputIndex = mStackOutputIndex[depth];
int firstChildPos;
+ mCorrectionState->slideTree(mStackMatchedCount[depth]);
+
// depth will never be greater than maxDepth because in that case,
// needsToTraverseChildrenNodes should be false
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
- maxDepth, traverseAllNodes, matchCount, inputIndex, diffs,
- nextLetters, nextLettersSize, mCorrectionState, &childCount,
- &firstChildPos, &traverseAllNodes, &matchCount, &inputIndex, &diffs,
+ maxDepth, traverseAllNodes, inputIndex, diffs,
+ mCorrectionState, &childCount,
+ &firstChildPos, &traverseAllNodes, &inputIndex, &diffs,
&siblingPos, &outputIndex);
// Update next sibling pos
mStackSiblingPos[depth] = siblingPos;
@@ -396,15 +391,21 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
++depth;
mStackChildCount[depth] = childCount;
mStackTraverseAll[depth] = traverseAllNodes;
- mStackMatchCount[depth] = matchCount;
mStackInputIndex[depth] = inputIndex;
mStackDiffs[depth] = diffs;
mStackSiblingPos[depth] = firstChildPos;
mStackOutputIndex[depth] = outputIndex;
+
+ int matchedCount;
+ mCorrectionState->goDownTree(&matchedCount);
+ mStackMatchedCount[depth] = matchedCount;
+ } else {
+ mCorrectionState->slideTree(mStackMatchedCount[depth]);
}
} else {
// Goes to parent sibling node
--depth;
+ mCorrectionState->goUpTree(mStackMatchedCount[depth]);
}
}
}
@@ -445,24 +446,13 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
}
-inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
- const uint8_t* const root, const uint8_t flags, const int pos,
- const int inputIndex, const int matchCount, const int freq, const bool sameLength,
- int* nextLetters, const int nextLettersSize, CorrectionState *correctionState) {
- const int skipPos = correctionState->getSkipPos();
-
- const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
- if (isSameAsTyped) return;
-
- if (depth >= MIN_SUGGEST_DEPTH) {
- const int finalFreq = correctionState->getFinalFreq(inputIndex, depth, matchCount,
- freq, sameLength);
- if (!isSameAsTyped)
- addWord(word, depth + 1, finalFreq);
- }
-
- if (sameLength && depth >= mInputLength && skipPos < 0) {
- registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);
+inline void UnigramDictionary::onTerminal(unsigned short int* word, const int outputIndex,
+ const int inputIndex, const int freq, CorrectionState *correctionState) {
+ if (!mProximityInfo->sameAsTyped(word, outputIndex + 1) && outputIndex >= MIN_SUGGEST_DEPTH) {
+ const int finalFreq = correctionState->getFinalFreq(inputIndex, outputIndex, freq);
+ if (finalFreq >= 0) {
+ addWord(word, outputIndex + 1, finalFreq);
+ }
}
}
@@ -677,11 +667,11 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// there aren't any more nodes at this level, it merely returns the address of the first byte after
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent.
-inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
- const int maxDepth, const bool initialTraverseAllNodes, int matchCount, int inputIndex,
- const int initialDiffs, int *nextLetters, const int nextLettersSize,
+inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialOutputPos,
+ const int maxDepth, const bool initialTraverseAllNodes, int inputIndex,
+ const int initialDiffs,
CorrectionState *correctionState, int *newCount, int *newChildrenPosition,
- bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
+ bool *newTraverseAllNodes, int *newInputIndex, int *newDiffs,
int *nextSiblingPosition, int *newOutputIndex) {
const int skipPos = correctionState->getSkipPos();
const int excessivePos = correctionState->getExcessivePos();
@@ -690,7 +680,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
correctionState->checkState();
}
int pos = initialPos;
- int depth = initialDepth;
+ int internalOutputPos = initialOutputPos;
int traverseAllNodes = initialTraverseAllNodes;
int diffs = initialDiffs;
@@ -736,15 +726,16 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// This has to be done for each virtual char (this forwards the "inputIndex" which
// is the index in the user-inputted chars, as read by proximity chars.
- if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
- if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
- mWord[depth] = c;
+ if (excessivePos == internalOutputPos && inputIndex < mInputLength - 1) {
+ ++inputIndex;
+ }
+ if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, internalOutputPos)) {
+ mWord[internalOutputPos] = c;
if (traverseAllNodes && isTerminal) {
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchCount,
- freq, false, nextLetters, nextLettersSize, mCorrectionState);
+ onTerminal(mWord, internalOutputPos, inputIndex, freq, mCorrectionState);
}
if (!hasChildren) {
// If we don't have children here, that means we finished processing all
@@ -784,18 +775,17 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
return false;
}
- mWord[depth] = c;
+ mWord[internalOutputPos] = c;
// If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity.
if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
- ++matchCount;
+ correctionState->charMatched();
}
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
if (isSameAsUserTypedLength && isTerminal) {
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchCount,
- freq, true, nextLetters, nextLettersSize, mCorrectionState);
+ onTerminal(mWord, internalOutputPos, inputIndex, freq, mCorrectionState);
}
// This character matched the typed character (enough to traverse the node at least)
// so we just evaluated it. Now we should evaluate this virtual node's children - that
@@ -821,7 +811,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
++inputIndex;
}
// Optimization: Prune out words that are too long compared to how much was typed.
- if (depth >= maxDepth || diffs > mMaxEditDistance) {
+ if (internalOutputPos >= maxDepth || diffs > mMaxEditDistance) {
// We are giving up parsing this node and its children. Skip the rest of the node,
// output the sibling position, and return that we don't want to traverse children.
if (!isLastChar) {
@@ -838,7 +828,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// contain NOT_A_CHARACTER.
c = nextc;
// Also, the next char is one "virtual node" depth more than this char.
- ++depth;
+ ++internalOutputPos;
} while (NOT_A_CHARACTER != c);
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
@@ -850,10 +840,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// All the output values that are purely computation by this function are held in local
// variables. Output them to the caller.
*newTraverseAllNodes = traverseAllNodes;
- *newMatchRate = matchCount;
*newDiffs = diffs;
*newInputIndex = inputIndex;
- *newOutputIndex = depth;
+ *newOutputIndex = internalOutputPos;
// Now we finished processing this node, and we want to traverse children. If there are no
// children, we can't come here.