/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/core/suggest.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/shortcut_utils.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/scoring.h" #include "suggest/core/policy/traversal.h" #include "suggest/core/policy/weighting.h" #include "suggest/core/session/dic_traverse_session.h" namespace latinime { // Initialization of class constants. const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; /** * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates * whether to prematurely commit the suggested words up to the given point for sentence-level * suggestion. * * Note: Currently does not support concurrent calls across threads. Continuous suggestion is * automatically activated for sequential calls that share the same starting input. * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. */ int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const { PROF_OPEN; PROF_START(0); const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); DicTraverseSession *tSession = static_cast(traverseSession); tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); // TODO: Add the way to evaluate cache initializeSearch(tSession, commitPoint); PROF_END(0); PROF_START(1); // keep expanding search dicNodes until all have terminated. while (tSession->getDicTraverseCache()->activeSize() > 0) { expandCurrentDicNodes(tSession); tSession->getDicTraverseCache()->advanceActiveDicNodes(); tSession->getDicTraverseCache()->advanceInputIndex(inputSize); } PROF_END(1); PROF_START(2); const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes, outputAutoCommitFirstWordConfidence); PROF_END(2); PROF_CLOSE; return size; } /** * Initializes the search at the root of the lexicon trie. Note that when possible the search will * continue suggestion from where it left off during the last call. */ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const { if (!traverseSession->getProximityInfoState(0)->isUsed()) { return; } // Never auto partial commit for now. commitPoint = 0; if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE && traverseSession->isContinuousSuggestionPossible()) { if (commitPoint == 0) { // Continue suggestion traverseSession->getDicTraverseCache()->continueSearch(); } else { // Continue suggestion after partial commit. DicNode *topDicNode = traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos()); traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->setPartiallyCommited(); } } else { // Restart recognition at the root. traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()), MAX_RESULTS); // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), traverseSession->getPrevWordPtNodePos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } } /** * Outputs the final list of suggestions (i.e., terminal nodes). */ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const { #if DEBUG_EVALUATE_MOST_PROBABLE_STRING const int terminalSize = 0; #else const int terminalSize = min(MAX_RESULTS, static_cast(traverseSession->getDicTraverseCache()->terminalSize())); #endif DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array for (int index = terminalSize - 1; index >= 0; --index) { traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]); } const float languageWeight = SCORING->getAdjustedLanguageWeight( traverseSession, terminals, terminalSize); int outputWordIndex = 0; // Insert most probable word at index == 0 as long as there is one terminal at least const bool hasMostProbableString = SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, &outputCodePoints[0], &outputTypes[0], &frequencies[0]); if (hasMostProbableString) { outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; ++outputWordIndex; } // Initial value of the loop index for terminal nodes (words) int doubleLetterTerminalIndex = -1; DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER; SCORING->searchWordWithDoubleLetter(terminals, terminalSize, &doubleLetterTerminalIndex, &doubleLetterLevel); int maxScore = S_INT_MIN; // Force autocorrection for obvious long multi-word suggestions when the top suggestion is // a long multiple words suggestion. // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. // traverseSession->isPartiallyCommited() always returns false because we never auto partial // commit for now. const bool forceCommitMultiWords = (terminalSize > 0) ? TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop() && (traverseSession->isPartiallyCommited() || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT && terminals[0].hasMultipleWords())) : false; // TODO: have partial commit work even with multiple pointers. const bool outputSecondWordFirstLetterInputIndex = traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); if (terminalSize > 0) { // If we have no suggestions, don't write this outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(&terminals[0]); } // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; ++terminalIndex) { DicNode *terminalDicNode = &terminals[terminalIndex]; if (DEBUG_GEO_FULL) { terminalDicNode->dump("OUT:"); } const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost( terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; const bool isPossiblyOffensiveWord = traverseSession->getDictionaryStructurePolicy()->getProbability( terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0; const bool isExactMatch = terminalDicNode->isExactMatch(); const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); // Heuristic: We exclude freq=0 first-char-uppercase words from exact match. // (e.g. "AMD" and "and") const bool isSafeExactMatch = isExactMatch && !(isPossiblyOffensiveWord && isFirstCharUppercase); const int outputTypeFlags = (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); // Increase output score of top typing suggestion to ensure autocorrection. // TODO: Better integration with java side autocorrection logic. const int finalScore = SCORING->calculateFinalScore( compoundDistance, traverseSession->getInputSize(), terminalDicNode->isExactMatch() || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) || (isValidWord && SCORING->doesAutoCorrectValidWord())); if (maxScore < finalScore && isValidWord) { maxScore = finalScore; } // Don't output invalid words. However, we still need to submit their shortcuts if any. if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; if (outputSecondWordFirstLetterInputIndex) { outputIndicesToPartialCommit[outputWordIndex] = terminalDicNode->getSecondWordFirstInputIndex( traverseSession->getProximityInfoState(0)); } else { outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); ++outputWordIndex; } if (!terminalDicNode->hasMultipleWords()) { BinaryDictionaryShortcutIterator shortcutIt( traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy() ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( traverseSession->getProximityInfoState(0)); for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) { if (outputSecondWordFirstLetterInputIndex) { outputIndicesToPartialCommit[i] = secondWordFirstInputIndex; } else { outputIndicesToPartialCommit[i] = NOT_AN_INDEX; } } outputWordIndex = updatedOutputWordIndex; } DicNode::managedDelete(terminalDicNode); } if (hasMostProbableString) { SCORING->safetyNetForMostProbableString(terminalSize, maxScore, &outputCodePoints[0], &frequencies[0]); } return outputWordIndex; } int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { // Get the number of spaces in the first suggestion const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); // Get the number of characters in the first suggestion const int length = terminalDicNode->getTotalNodeCodePointCount(); // Get the distance for the first word of the suggestion const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means // we are very confident. // Expected space count is 1 ~ 5 static const int MIN_EXPECTED_SPACE_COUNT = 1; static const int MAX_EXPECTED_SPACE_COUNT = 5; // Expected length is about 4 ~ 30 static const int MIN_EXPECTED_LENGTH = 4; static const int MAX_EXPECTED_LENGTH = 30; // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 static const float MIN_EXPECTED_DISTANCE = 0.0; static const float MAX_EXPECTED_DISTANCE = 2.0; // This is not strict: it's where most stuff will be falling, but it's still fine if it's // outside these values. We want to output a value that reflects all of these. Each factor // contributes a bit. // We need at least a space. if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the // weight of the distance. Clamp to avoid overflows. const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT * (MAX_EXPECTED_DISTANCE - clampedDistance) / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the // weight of the space count. const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT * (spaceCount - MIN_EXPECTED_SPACE_COUNT) / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); return distanceContribution + lengthContribution + spaceContribution; } /** * Expands the dicNodes in the current search priority queue by advancing to the possible child * nodes based on the next touch point(s) (or no touch points for lookahead) */ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { const int inputSize = traverseSession->getInputSize(); DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize()); DicNode correctionDicNode; // TODO: Find more efficient caching const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession); if (shouldDepthLevelCache) { traverseSession->getDicTraverseCache()->updateLastCachedInputIndex(); } if (DEBUG_CACHE) { AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d", shouldDepthLevelCache, inputSize); } while (traverseSession->getDicTraverseCache()->activeSize() > 0) { DicNode dicNode; traverseSession->getDicTraverseCache()->popActive(&dicNode); if (dicNode.isTotalInputSizeExceedingLimit()) { return; } childDicNodes.clear(); const int point0Index = dicNode.getInputIndex(0); const bool canDoLookAheadCorrection = TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode); const bool isLookAheadCorrection = canDoLookAheadCorrection && traverseSession->getDicTraverseCache()-> isLookAheadCorrectionInputIndex(static_cast(point0Index)); const bool isCompletion = dicNode.isCompletion(inputSize); const bool shouldNodeLevelCache = TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode); if (shouldDepthLevelCache || shouldNodeLevelCache) { if (DEBUG_CACHE) { dicNode.dump("PUSH_CACHE"); } traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode); dicNode.setCached(); } if (dicNode.isInDigraph()) { // Finish digraph handling if the node is in the middle of a digraph expansion. processDicNodeAsDigraph(traverseSession, &dicNode); } else if (isLookAheadCorrection) { // The algorithm maintains a small set of "deferred" nodes that have not consumed the // latest touch point yet. These are needed to apply look-ahead correction operations // that require special handling of the latest touch point. For example, with insertions // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all. processDicNodeAsTransposition(traverseSession, &dicNode); processDicNodeAsInsertion(traverseSession, &dicNode); } else { // !isLookAheadCorrection // Only consider typing error corrections if the normalized compound distance is // below a spatial distance threshold. // NOTE: the threshold may need to be updated if scoring model changes. // TODO: Remove. Do not prune node here. const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode); // Process for handling space substitution (e.g., hevis => he is) if (allowsErrorCorrections && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) { createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */); } DicNodeUtils::getAllChildDicNodes( &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int childDicNodesSize = childDicNodes.getSizeAndLock(); for (int i = 0; i < childDicNodesSize; ++i) { DicNode *const childDicNode = childDicNodes[i]; if (isCompletion) { // Handle forward lookahead when the lexicon letter exceeds the input size. processDicNodeAsMatch(traverseSession, childDicNode); continue; } if (DigraphUtils::hasDigraphForCodePoint( traverseSession->getDictionaryStructurePolicy() ->getHeaderStructurePolicy(), childDicNode->getNodeCodePoint())) { correctionDicNode.initByCopy(childDicNode); correctionDicNode.advanceDigraphIndex(); processDicNodeAsDigraph(traverseSession, &correctionDicNode); } if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode, allowsErrorCorrections)) { // TODO: (Gesture) Change weight between omission and substitution errors // TODO: (Gesture) Terminal node should not be handled as omission correctionDicNode.initByCopy(childDicNode); processDicNodeAsOmission(traverseSession, &correctionDicNode); } const ProximityType proximityType = TRAVERSAL->getProximityType( traverseSession, &dicNode, childDicNode); switch (proximityType) { // TODO: Consider the difference of proximityType here case MATCH_CHAR: case PROXIMITY_CHAR: processDicNodeAsMatch(traverseSession, childDicNode); break; case ADDITIONAL_PROXIMITY_CHAR: if (allowsErrorCorrections) { processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode, childDicNode); } break; case SUBSTITUTION_CHAR: if (allowsErrorCorrections) { processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode); } break; case UNRELATED_CHAR: // Just drop this dicNode and do nothing. break; default: // Just drop this dicNode and do nothing. break; } } // Push the dicNode for look-ahead correction if (allowsErrorCorrections && canDoLookAheadCorrection) { traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); } } } } void Suggest::processTerminalDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { if (dicNode->getCompoundDistance() >= static_cast(MAX_VALUE_FOR_WEIGHTING)) { return; } if (!dicNode->isTerminalDicNode()) { return; } if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { return; } // Create a non-cached node here. DicNode terminalDicNode; DicNodeUtils::initByCopy(dicNode, &terminalDicNode); if (TRAVERSAL->needsToTraverseAllUserInput() && dicNode->getInputIndex(0) < traverseSession->getInputSize()) { Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0, &terminalDicNode, traverseSession->getMultiBigramMap()); } Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0, &terminalDicNode, traverseSession->getMultiBigramMap()); traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode); } /** * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word * (by the space omission error correction) search path if input dicNode is on a terminal. */ void Suggest::processExpandedDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { processTerminalDicNode(traverseSession, dicNode); if (dicNode->getCompoundDistance() < static_cast(MAX_VALUE_FOR_WEIGHTING)) { if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) { createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */); } const int allowsLookAhead = !(dicNode->hasMultipleWords() && dicNode->isCompletion(traverseSession->getInputSize())); if (dicNode->hasChildren() && allowsLookAhead) { traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode); } } DicNode::managedDelete(dicNode); } void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession, DicNode *childDicNode) const { weightChildNode(traverseSession, childDicNode); processExpandedDicNode(traverseSession, childDicNode); } void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, DicNode *dicNode, DicNode *childDicNode) const { // Note: Most types of corrections don't need to look up the bigram information since they do // not treat the node as a terminal. There is no need to pass the bigram map in these cases. Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY, traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); weightChildNode(traverseSession, childDicNode); processExpandedDicNode(traverseSession, childDicNode); } void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, DicNode *dicNode, DicNode *childDicNode) const { Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); weightChildNode(traverseSession, childDicNode); processExpandedDicNode(traverseSession, childDicNode); } // Process the DicNode codepoint as a digraph. This means that composite glyphs like the German // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, DicNode *childDicNode) const { weightChildNode(traverseSession, childDicNode); childDicNode->advanceDigraphIndex(); processExpandedDicNode(traverseSession, childDicNode); } /** * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * matches for all possible next letters. Note that just skipping the current letter without any * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check * the possible *next* letters after the omission to better limit search to plausible omissions. * Note that apostrophes are handled as omissions. */ void Suggest::processDicNodeAsOmission( DicTraverseSession *traverseSession, DicNode *dicNode) const { DicNodeVector childDicNodes; DicNodeUtils::getAllChildDicNodes( dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { DicNode *const childDicNode = childDicNodes[i]; // Treat this word as omission Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); weightChildNode(traverseSession, childDicNode); if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) { continue; } processExpandedDicNode(traverseSession, childDicNode); } } /** * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and * consider matches for the next touch point. */ void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes; DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1) != childDicNodes[i]->getNodeCodePoint()) { continue; } DicNode *const childDicNode = childDicNodes[i]; Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); processExpandedDicNode(traverseSession, childDicNode); } } /** * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points. */ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes1; DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes1); const int childSize1 = childDicNodes1.getSizeAndLock(); for (int i = 0; i < childSize1; i++) { const ProximityType matchedId1 = traverseSession->getProximityInfoState(0) ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(), true /* checkProximityChars */); if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) { continue; } if (childDicNodes1[i]->hasChildren()) { DicNodeVector childDicNodes2; DicNodeUtils::getAllChildDicNodes(childDicNodes1[i], traverseSession->getDictionaryStructurePolicy(), &childDicNodes2); const int childSize2 = childDicNodes2.getSizeAndLock(); for (int j = 0; j < childSize2; j++) { DicNode *const childDicNode2 = childDicNodes2[j]; const ProximityType matchedId2 = traverseSession->getProximityInfoState(0) ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(), true /* checkProximityChars */); if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) { continue; } Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION, traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */); processExpandedDicNode(traverseSession, childDicNode2); } } DicNode::managedDelete(childDicNodes1[i]); } } /** * Weight child dicNode by aligning it to the key */ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int inputSize = traverseSession->getInputSize(); if (dicNode->isCompletion(inputSize)) { Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession, 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); } else { // completion Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession, 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); } } /** * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also * incorporates the unigram / bigram score for the ending word into the new dicNode. */ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, const bool spaceSubstitution) const { if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { return; } // Create a non-cached node here. DicNode newDicNode; DicNodeUtils::initAsRootWithPreviousWord( traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); const CorrectionType correctionType = spaceSubstitution ? CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION; Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, &newDicNode, traverseSession->getMultiBigramMap()); if (newDicNode.getCompoundDistance() < static_cast(MAX_VALUE_FOR_WEIGHTING)) { // newDicNode is worth continuing to traverse. // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune // here because here is not the right place to do pruning. Pruning should take place only // in DicNodePriorityQueue. traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode); } } } // namespace latinime