diff options
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r-- | native/src/unigram_dictionary.cpp | 88 |
1 files changed, 36 insertions, 52 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 93d2b8418..bbfaea454 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -48,11 +48,11 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed if (DEBUG_DICT) { LOGI("UnigramDictionary - constructor"); } - mCorrectionState = new CorrectionState(typedLetterMultiplier, fullWordMultiplier); + mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); } UnigramDictionary::~UnigramDictionary() { - delete mCorrectionState; + delete mCorrection; } static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, @@ -184,7 +184,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) assert(codesSize == mInputLength); const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); - mCorrectionState->initCorrectionState(mProximityInfo, mInputLength, maxDepth); + mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth); PROF_END(0); PROF_START(1); @@ -237,7 +237,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { LOGI("--- Suggest missing space characters %d", i); } - getMissingSpaceWords(mInputLength, i, mCorrectionState); + getMissingSpaceWords(mInputLength, i, mCorrection); } } PROF_END(5); @@ -256,7 +256,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, i, x, y, proximityInfo->hasSpaceProximity(x, y)); } if (proximityInfo->hasSpaceProximity(x, y)) { - getMistypedSpaceWords(mInputLength, i, mCorrectionState); + getMistypedSpaceWords(mInputLength, i, mCorrection); } } } @@ -347,49 +347,33 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos, assert(excessivePos < mInputLength); assert(missingPos < mInputLength); } - mCorrectionState->setCorrectionParams(skipPos, excessivePos, transposedPos, + mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos, -1 /* spaceProximityPos */, -1 /* missingSpacePos */); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); - int depth = 0; + int outputIndex = 0; - mStackChildCount[0] = childCount; - mStackTraverseAll[0] = (mInputLength <= 0); - mStackInputIndex[0] = 0; - mStackDiffs[0] = 0; - mStackSiblingPos[0] = rootPosition; - mStackOutputIndex[0] = 0; - mStackMatchedCount[0] = 0; + mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0)); // Depth first search - while (depth >= 0) { - if (mStackChildCount[depth] > 0) { - --mStackChildCount[depth]; - int siblingPos = mStackSiblingPos[depth]; + while (outputIndex >= 0) { + if (mCorrection->initProcessState(outputIndex)) { + int siblingPos = mCorrection->getTreeSiblingPos(outputIndex); int firstChildPos; - mCorrectionState->initProcessState( - mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth], - mStackTraverseAll[depth], mStackDiffs[depth]); - // needsToTraverseChildrenNodes should be false const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - mCorrectionState, &childCount, &firstChildPos, &siblingPos); + mCorrection, &childCount, &firstChildPos, &siblingPos); // Update next sibling pos - mStackSiblingPos[depth] = siblingPos; + mCorrection->setTreeSiblingPos(outputIndex, siblingPos); + if (needsToTraverseChildrenNodes) { // Goes to child node - ++depth; - mStackChildCount[depth] = childCount; - mStackSiblingPos[depth] = firstChildPos; - - mCorrectionState->getProcessState(&mStackMatchedCount[depth], - &mStackInputIndex[depth], &mStackOutputIndex[depth], - &mStackTraverseAll[depth], &mStackDiffs[depth]); + outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos); } } else { // Goes to parent sibling node - --depth; + outputIndex = mCorrection->getTreeParentIndex(outputIndex); } } } @@ -409,17 +393,17 @@ inline static void multiplyIntCapped(const int multiplier, int *base) { } void UnigramDictionary::getMissingSpaceWords( - const int inputLength, const int missingSpacePos, CorrectionState *correctionState) { - correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, + const int inputLength, const int missingSpacePos, Correction *correction) { + correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos); - getSplitTwoWordsSuggestion(inputLength, correctionState); + getSplitTwoWordsSuggestion(inputLength, correction); } void UnigramDictionary::getMistypedSpaceWords( - const int inputLength, const int spaceProximityPos, CorrectionState *correctionState) { - correctionState->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, + const int inputLength, const int spaceProximityPos, Correction *correction) { + correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */); - getSplitTwoWordsSuggestion(inputLength, correctionState); + getSplitTwoWordsSuggestion(inputLength, correction); } inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, @@ -429,19 +413,19 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; } -inline void UnigramDictionary::onTerminal(const int freq, CorrectionState *correctionState) { +inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) { int wordLength; unsigned short* wordPointer; - const int finalFreq = correctionState->getFinalFreq(freq, &wordPointer, &wordLength); + const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { addWord(wordPointer, wordLength, finalFreq); } } void UnigramDictionary::getSplitTwoWordsSuggestion( - const int inputLength, CorrectionState* correctionState) { - const int spaceProximityPos = correctionState->getSpaceProximityPos(); - const int missingSpacePos = correctionState->getMissingSpacePos(); + const int inputLength, Correction* correction) { + const int spaceProximityPos = correction->getSpaceProximityPos(); + const int missingSpacePos = correction->getMissingSpacePos(); if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; @@ -485,7 +469,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( word[i] = mWord[i - firstWordLength - 1]; } - const int pairFreq = mCorrectionState->getFreqForSplitTwoWords(firstFreq, secondFreq); + const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } @@ -650,10 +634,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, - CorrectionState *correctionState, int *newCount, + Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition) { if (DEBUG_DICT) { - correctionState->checkState(); + correction->checkState(); } int pos = initialPos; @@ -697,12 +681,12 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // If we are on the last char, this virtual node is a terminal if this node is. const bool isTerminal = isLastChar && isTerminalNode; - CorrectionState::CorrectionStateType stateType = correctionState->processCharAndCalcState( + Correction::CorrectionType stateType = correction->processCharAndCalcState( c, isTerminal); - if (stateType == CorrectionState::TRAVERSE_ALL_ON_TERMINAL - || stateType == CorrectionState::ON_TERMINAL) { + if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL + || stateType == Correction::ON_TERMINAL) { needsToInvokeOnTerminal = true; - } else if (stateType == CorrectionState::UNRELATED) { + } else if (stateType == Correction::UNRELATED) { // We found that this is an unrelated character, so we should give up traversing // this node and its children entirely. // However we may not be on the last virtual node yet so we skip the remaining @@ -730,7 +714,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); - onTerminal(freq, mCorrectionState); + onTerminal(freq, mCorrection); } // If there are more chars in this node, then this virtual node has children. @@ -751,7 +735,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, } // Optimization: Prune out words that are too long compared to how much was typed. - if (correctionState->needsToPrune()) { + if (correction->needsToPrune()) { pos = BinaryFormat::skipFrequency(flags, pos); *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); |