aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/suggest/core/suggest.cpp
blob: 2eda414f43bb843035715de5801ff12abd9c6d36 (about) (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "suggest/core/suggest.h"

#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/shortcut_utils.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/policy/scoring.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"

namespace latinime {

// Initialization of class constants.
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;

/**
 * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
 * whether to prematurely commit the suggested words up to the given point for sentence-level
 * suggestion.
 *
 * Note: Currently does not support concurrent calls across threads. Continuous suggestion is
 * automatically activated for sequential calls that share the same starting input.
 * TODO: Stop detecting continuous suggestion. Start using traverseSession instead.
 */
int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
        int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
        int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices,
        int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
    PROF_OPEN;
    PROF_START(0);
    const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
    DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
    tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
            pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
    // TODO: Add the way to evaluate cache

    initializeSearch(tSession, commitPoint);
    PROF_END(0);
    PROF_START(1);

    // keep expanding search dicNodes until all have terminated.
    while (tSession->getDicTraverseCache()->activeSize() > 0) {
        expandCurrentDicNodes(tSession);
        tSession->getDicTraverseCache()->advanceActiveDicNodes();
        tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
    }
    PROF_END(1);
    PROF_START(2);
    const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes,
            outputAutoCommitFirstWordConfidence);
    PROF_END(2);
    PROF_CLOSE;
    return size;
}

/**
 * Initializes the search at the root of the lexicon trie. Note that when possible the search will
 * continue suggestion from where it left off during the last call.
 */
void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const {
    if (!traverseSession->getProximityInfoState(0)->isUsed()) {
        return;
    }

    // Never auto partial commit for now.
    commitPoint = 0;

    if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
            && traverseSession->isContinuousSuggestionPossible()) {
        if (commitPoint == 0) {
            // Continue suggestion
            traverseSession->getDicTraverseCache()->continueSearch();
        } else {
            // Continue suggestion after partial commit.
            DicNode *topDicNode =
                    traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
            traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
            traverseSession->getDicTraverseCache()->continueSearch();
            traverseSession->setPartiallyCommited();
        }
    } else {
        // Restart recognition at the root.
        traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
                MAX_RESULTS);
        // Create a new dic node here
        DicNode rootNode;
        DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
                traverseSession->getPrevWordPtNodePos(), &rootNode);
        traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
    }
}

/**
 * Outputs the final list of suggestions (i.e., terminal nodes).
 */
int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
        int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
        int *outputAutoCommitFirstWordConfidence) const {
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
    const int terminalSize = 0;
#else
    const int terminalSize = min(MAX_RESULTS,
            static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
#endif
    DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array

    for (int index = terminalSize - 1; index >= 0; --index) {
        traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
    }

    const float languageWeight = SCORING->getAdjustedLanguageWeight(
            traverseSession, terminals, terminalSize);

    int outputWordIndex = 0;
    // Insert most probable word at index == 0 as long as there is one terminal at least
    const bool hasMostProbableString =
            SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
                    &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
    if (hasMostProbableString) {
        outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
        ++outputWordIndex;
    }

    // Initial value of the loop index for terminal nodes (words)
    int doubleLetterTerminalIndex = -1;
    DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
    SCORING->searchWordWithDoubleLetter(terminals, terminalSize,
            &doubleLetterTerminalIndex, &doubleLetterLevel);

    int maxScore = S_INT_MIN;
    // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
    // a long multiple words suggestion.
    // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
    // traverseSession->isPartiallyCommited() always returns false because we never auto partial
    // commit for now.
    const bool forceCommitMultiWords = (terminalSize > 0) ?
            TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
                    && (traverseSession->isPartiallyCommited()
                            || (traverseSession->getInputSize()
                                    >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
                                            && terminals[0].hasMultipleWords())) : false;
    // TODO: have partial commit work even with multiple pointers.
    const bool outputSecondWordFirstLetterInputIndex =
            traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
    if (terminalSize > 0) {
        // If we have no suggestions, don't write this
        outputAutoCommitFirstWordConfidence[0] =
                computeFirstWordConfidence(&terminals[0]);
    }

    // Output suggestion results here
    for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
            ++terminalIndex) {
        DicNode *terminalDicNode = &terminals[terminalIndex];
        if (DEBUG_GEO_FULL) {
            terminalDicNode->dump("OUT:");
        }
        const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost(
                terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
        const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
                + doubleLetterCost;
        const bool isPossiblyOffensiveWord =
                traverseSession->getDictionaryStructurePolicy()->getProbability(
                        terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
        const bool isExactMatch = terminalDicNode->isExactMatch();
        const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
        // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
        // (e.g. "AMD" and "and")
        const bool isSafeExactMatch = isExactMatch
                && !(isPossiblyOffensiveWord && isFirstCharUppercase);
        const int outputTypeFlags =
                (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
                | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);

        // Entries that are blacklisted or do not represent a word should not be output.
        const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();

        // Increase output score of top typing suggestion to ensure autocorrection.
        // TODO: Better integration with java side autocorrection logic.
        const int finalScore = SCORING->calculateFinalScore(
                compoundDistance, traverseSession->getInputSize(),
                terminalDicNode->isExactMatch()
                        || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
                                || (isValidWord && SCORING->doesAutoCorrectValidWord()));
        if (maxScore < finalScore && isValidWord) {
            maxScore = finalScore;
        }

        // Don't output invalid words. However, we still need to submit their shortcuts if any.
        if (isValidWord) {
            outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
            frequencies[outputWordIndex] = finalScore;
            if (outputSecondWordFirstLetterInputIndex) {
                outputIndicesToPartialCommit[outputWordIndex] =
                        terminalDicNode->getSecondWordFirstInputIndex(
                                traverseSession->getProximityInfoState(0));
            } else {
                outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
            }
            // Populate the outputChars array with the suggested word.
            const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
            terminalDicNode->outputResult(&outputCodePoints[startIndex]);
            ++outputWordIndex;
        }

        if (!terminalDicNode->hasMultipleWords()) {
            BinaryDictionaryShortcutIterator shortcutIt(
                    traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
                    traverseSession->getDictionaryStructurePolicy()
                            ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
            // Shortcut is not supported for multiple words suggestions.
            // TODO: Check shortcuts during traversal for multiple words suggestions.
            const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
            const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
                    outputWordIndex,  finalScore, outputCodePoints, frequencies, outputTypes,
                    sameAsTyped);
            const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
                    traverseSession->getProximityInfoState(0));
            for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
                if (outputSecondWordFirstLetterInputIndex) {
                    outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
                } else {
                    outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
                }
            }
            outputWordIndex = updatedOutputWordIndex;
        }
        DicNode::managedDelete(terminalDicNode);
    }

    if (hasMostProbableString) {
        SCORING->safetyNetForMostProbableString(terminalSize, maxScore,
                &outputCodePoints[0], &frequencies[0]);
    }
    return outputWordIndex;
}

int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
    // Get the number of spaces in the first suggestion
    const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
    // Get the number of characters in the first suggestion
    const int length = terminalDicNode->getTotalNodeCodePointCount();
    // Get the distance for the first word of the suggestion
    const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();

    // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
    // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
    // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
    // we are very confident.
    // Expected space count is 1 ~ 5
    static const int MIN_EXPECTED_SPACE_COUNT = 1;
    static const int MAX_EXPECTED_SPACE_COUNT = 5;
    // Expected length is about 4 ~ 30
    static const int MIN_EXPECTED_LENGTH = 4;
    static const int MAX_EXPECTED_LENGTH = 30;
    // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
    static const float MIN_EXPECTED_DISTANCE = 0.0;
    static const float MAX_EXPECTED_DISTANCE = 2.0;
    // This is not strict: it's where most stuff will be falling, but it's still fine if it's
    // outside these values. We want to output a value that reflects all of these. Each factor
    // contributes a bit.

    // We need at least a space.
    if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;

    // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
    // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
    // weight of the distance. Clamp to avoid overflows.
    const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
            : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
    const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
            * (MAX_EXPECTED_DISTANCE - clampedDistance)
            / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
    // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
    // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
    // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
    const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
            * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
    // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
    // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
    // weight of the space count.
    const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
            * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
            / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);

    return distanceContribution + lengthContribution + spaceContribution;
}

/**
 * Expands the dicNodes in the current search priority queue by advancing to the possible child
 * nodes based on the next touch point(s) (or no touch points for lookahead)
 */
void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
    const int inputSize = traverseSession->getInputSize();
    DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize());
    DicNode correctionDicNode;

    // TODO: Find more efficient caching
    const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession);
    if (shouldDepthLevelCache) {
        traverseSession->getDicTraverseCache()->updateLastCachedInputIndex();
    }
    if (DEBUG_CACHE) {
        AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d",
                shouldDepthLevelCache, inputSize);
    }
    while (traverseSession->getDicTraverseCache()->activeSize() > 0) {
        DicNode dicNode;
        traverseSession->getDicTraverseCache()->popActive(&dicNode);
        if (dicNode.isTotalInputSizeExceedingLimit()) {
            return;
        }
        childDicNodes.clear();
        const int point0Index = dicNode.getInputIndex(0);
        const bool canDoLookAheadCorrection =
                TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode);
        const bool isLookAheadCorrection = canDoLookAheadCorrection
                && traverseSession->getDicTraverseCache()->
                        isLookAheadCorrectionInputIndex(static_cast<int>(point0Index));
        const bool isCompletion = dicNode.isCompletion(inputSize);

        const bool shouldNodeLevelCache =
                TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode);
        if (shouldDepthLevelCache || shouldNodeLevelCache) {
            if (DEBUG_CACHE) {
                dicNode.dump("PUSH_CACHE");
            }
            traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode);
            dicNode.setCached();
        }

        if (dicNode.isInDigraph()) {
            // Finish digraph handling if the node is in the middle of a digraph expansion.
            processDicNodeAsDigraph(traverseSession, &dicNode);
        } else if (isLookAheadCorrection) {
            // The algorithm maintains a small set of "deferred" nodes that have not consumed the
            // latest touch point yet. These are needed to apply look-ahead correction operations
            // that require special handling of the latest touch point. For example, with insertions
            // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all.
            processDicNodeAsTransposition(traverseSession, &dicNode);
            processDicNodeAsInsertion(traverseSession, &dicNode);
        } else { // !isLookAheadCorrection
            // Only consider typing error corrections if the normalized compound distance is
            // below a spatial distance threshold.
            // NOTE: the threshold may need to be updated if scoring model changes.
            // TODO: Remove. Do not prune node here.
            const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
            // Process for handling space substitution (e.g., hevis => he is)
            if (allowsErrorCorrections
                    && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
                createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
            }

            DicNodeUtils::getAllChildDicNodes(
                    &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);

            const int childDicNodesSize = childDicNodes.getSizeAndLock();
            for (int i = 0; i < childDicNodesSize; ++i) {
                DicNode *const childDicNode = childDicNodes[i];
                if (isCompletion) {
                    // Handle forward lookahead when the lexicon letter exceeds the input size.
                    processDicNodeAsMatch(traverseSession, childDicNode);
                    continue;
                }
                if (DigraphUtils::hasDigraphForCodePoint(
                        traverseSession->getDictionaryStructurePolicy()
                                ->getHeaderStructurePolicy(),
                        childDicNode->getNodeCodePoint())) {
                    correctionDicNode.initByCopy(childDicNode);
                    correctionDicNode.advanceDigraphIndex();
                    processDicNodeAsDigraph(traverseSession, &correctionDicNode);
                }
                if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode,
                        allowsErrorCorrections)) {
                    // TODO: (Gesture) Change weight between omission and substitution errors
                    // TODO: (Gesture) Terminal node should not be handled as omission
                    correctionDicNode.initByCopy(childDicNode);
                    processDicNodeAsOmission(traverseSession, &correctionDicNode);
                }
                const ProximityType proximityType = TRAVERSAL->getProximityType(
                        traverseSession, &dicNode, childDicNode);
                switch (proximityType) {
                    // TODO: Consider the difference of proximityType here
                    case MATCH_CHAR:
                    case PROXIMITY_CHAR:
                        processDicNodeAsMatch(traverseSession, childDicNode);
                        break;
                    case ADDITIONAL_PROXIMITY_CHAR:
                        if (allowsErrorCorrections) {
                            processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode,
                                    childDicNode);
                        }
                        break;
                    case SUBSTITUTION_CHAR:
                        if (allowsErrorCorrections) {
                            processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode);
                        }
                        break;
                    case UNRELATED_CHAR:
                        // Just drop this dicNode and do nothing.
                        break;
                    default:
                        // Just drop this dicNode and do nothing.
                        break;
                }
            }

            // Push the dicNode for look-ahead correction
            if (allowsErrorCorrections && canDoLookAheadCorrection) {
                traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
            }
        }
    }
}

void Suggest::processTerminalDicNode(
        DicTraverseSession *traverseSession, DicNode *dicNode) const {
    if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
        return;
    }
    if (!dicNode->isTerminalDicNode()) {
        return;
    }
    if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
        return;
    }
    // Create a non-cached node here.
    DicNode terminalDicNode;
    DicNodeUtils::initByCopy(dicNode, &terminalDicNode);
    if (TRAVERSAL->needsToTraverseAllUserInput()
            && dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
                &terminalDicNode, traverseSession->getMultiBigramMap());
    }
    Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0,
            &terminalDicNode, traverseSession->getMultiBigramMap());
    traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode);
}

/**
 * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
 * (by the space omission error correction) search path if input dicNode is on a terminal.
 */
void Suggest::processExpandedDicNode(
        DicTraverseSession *traverseSession, DicNode *dicNode) const {
    processTerminalDicNode(traverseSession, dicNode);
    if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
        if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) {
            createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */);
        }
        const int allowsLookAhead = !(dicNode->hasMultipleWords()
                && dicNode->isCompletion(traverseSession->getInputSize()));
        if (dicNode->hasChildren() && allowsLookAhead) {
            traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
        }
    }
    DicNode::managedDelete(dicNode);
}

void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
        DicNode *childDicNode) const {
    weightChildNode(traverseSession, childDicNode);
    processExpandedDicNode(traverseSession, childDicNode);
}

void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession,
        DicNode *dicNode, DicNode *childDicNode) const {
    // Note: Most types of corrections don't need to look up the bigram information since they do
    // not treat the node as a terminal. There is no need to pass the bigram map in these cases.
    Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY,
            traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */);
    weightChildNode(traverseSession, childDicNode);
    processExpandedDicNode(traverseSession, childDicNode);
}

void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
        DicNode *dicNode, DicNode *childDicNode) const {
    Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession,
            dicNode, childDicNode, 0 /* multiBigramMap */);
    weightChildNode(traverseSession, childDicNode);
    processExpandedDicNode(traverseSession, childDicNode);
}

// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
        DicNode *childDicNode) const {
    weightChildNode(traverseSession, childDicNode);
    childDicNode->advanceDigraphIndex();
    processExpandedDicNode(traverseSession, childDicNode);
}

/**
 * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
 * matches for all possible next letters. Note that just skipping the current letter without any
 * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
 * the possible *next* letters after the omission to better limit search to plausible omissions.
 * Note that apostrophes are handled as omissions.
 */
void Suggest::processDicNodeAsOmission(
        DicTraverseSession *traverseSession, DicNode *dicNode) const {
    DicNodeVector childDicNodes;
    DicNodeUtils::getAllChildDicNodes(
            dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);

    const int size = childDicNodes.getSizeAndLock();
    for (int i = 0; i < size; i++) {
        DicNode *const childDicNode = childDicNodes[i];
        // Treat this word as omission
        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession,
                dicNode, childDicNode, 0 /* multiBigramMap */);
        weightChildNode(traverseSession, childDicNode);
        if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) {
            continue;
        }
        processExpandedDicNode(traverseSession, childDicNode);
    }
}

/**
 * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and
 * consider matches for the next touch point.
 */
void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
        DicNode *dicNode) const {
    const int16_t pointIndex = dicNode->getInputIndex(0);
    DicNodeVector childDicNodes;
    DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
            &childDicNodes);
    const int size = childDicNodes.getSizeAndLock();
    for (int i = 0; i < size; i++) {
        if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1)
                != childDicNodes[i]->getNodeCodePoint()) {
            continue;
        }
        DicNode *const childDicNode = childDicNodes[i];
        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession,
                dicNode, childDicNode, 0 /* multiBigramMap */);
        processExpandedDicNode(traverseSession, childDicNode);
    }
}

/**
 * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points.
 */
void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
        DicNode *dicNode) const {
    const int16_t pointIndex = dicNode->getInputIndex(0);
    DicNodeVector childDicNodes1;
    DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
            &childDicNodes1);
    const int childSize1 = childDicNodes1.getSizeAndLock();
    for (int i = 0; i < childSize1; i++) {
        const ProximityType matchedId1 = traverseSession->getProximityInfoState(0)
                ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(),
                        true /* checkProximityChars */);
        if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
            continue;
        }
        if (childDicNodes1[i]->hasChildren()) {
            DicNodeVector childDicNodes2;
            DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
                    traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
            const int childSize2 = childDicNodes2.getSizeAndLock();
            for (int j = 0; j < childSize2; j++) {
                DicNode *const childDicNode2 = childDicNodes2[j];
                const ProximityType matchedId2 = traverseSession->getProximityInfoState(0)
                        ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(),
                                true /* checkProximityChars */);
                if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
                    continue;
                }
                Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION,
                        traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
                processExpandedDicNode(traverseSession, childDicNode2);
            }
        }
        DicNode::managedDelete(childDicNodes1[i]);
    }
}

/**
 * Weight child dicNode by aligning it to the key
 */
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
    const int inputSize = traverseSession->getInputSize();
    if (dicNode->isCompletion(inputSize)) {
        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession,
                0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
    } else { // completion
        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession,
                0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
    }
}

/**
 * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also
 * incorporates the unigram / bigram score for the ending word into the new dicNode.
 */
void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
        const bool spaceSubstitution) const {
    if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) {
        return;
    }

    // Create a non-cached node here.
    DicNode newDicNode;
    DicNodeUtils::initAsRootWithPreviousWord(
            traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
    const CorrectionType correctionType = spaceSubstitution ?
            CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
    Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
            &newDicNode, traverseSession->getMultiBigramMap());
    if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
        // newDicNode is worth continuing to traverse.
        // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
        // here because here is not the right place to do pruning. Pruning should take place only
        // in DicNodePriorityQueue.
        traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
    }
}
} // namespace latinime