diff options
Diffstat (limited to 'native/jni/src')
13 files changed, 1421 insertions, 0 deletions
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h new file mode 100644 index 000000000..e592136cc --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_UTILS +#define LATINIME_SHORTCUT_UTILS + +#include "defines.h" +#include "dic_node_utils.h" +#include "terminal_attributes.h" + +namespace latinime { + +class ShortcutUtils { + public: + static int outputShortcuts(const TerminalAttributes *const terminalAttributes, + int outputWordIndex, const int finalScore, int *const outputCodePoints, + int *const frequencies, int *const outputTypes, const bool sameAsTyped) { + TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); + while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { + int shortcutTarget[MAX_WORD_LENGTH]; + int shortcutProbability; + const int shortcutTargetStringLength = iterator.getNextShortcutTarget( + MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability); + int shortcutScore; + int kind; + if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY + && sameAsTyped) { + shortcutScore = S_INT_MAX; + kind = Dictionary::KIND_WHITELIST; + } else { + // shortcut entry's score == its base entry's score - 1 + shortcutScore = finalScore; + // Protection against int underflow + shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1; + kind = Dictionary::KIND_CORRECTION; + } + outputTypes[outputWordIndex] = kind; + frequencies[outputWordIndex] = shortcutScore; + frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1; + const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH; + DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength, + &outputCodePoints[startIndex2]); + ++outputWordIndex; + } + return outputWordIndex; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutUtils); +}; +} // namespace latinime +#endif // LATINIME_SHORTCUT_UTILS diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp new file mode 100644 index 000000000..7fba1d504 --- /dev/null +++ b/native/jni/src/suggest/core/suggest.cpp @@ -0,0 +1,518 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "char_utils.h" +#include "dictionary.h" +#include "dic_node_priority_queue.h" +#include "dic_node_vector.h" +#include "dic_traverse_session.h" +#include "proximity_info.h" +#include "scoring.h" +#include "shortcut_utils.h" +#include "suggest.h" +#include "terminal_attributes.h" +#include "traversal.h" +#include "weighting.h" + +namespace latinime { + +// Initialization of class constants. +const int Suggest::LOOKAHEAD_DIC_NODES_CACHE_SIZE = 25; +const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; +const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; +const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; +const float Suggest::AUTOCORRECT_LANGUAGE_FEATURE_THRESHOLD = 0.6f; + +const bool Suggest::CORRECT_SPACE_OMISSION = true; +const bool Suggest::CORRECT_TRANSPOSITION = true; +const bool Suggest::CORRECT_INSERTION = true; +const bool Suggest::CORRECT_OMISSION_G = true; + +/** + * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates + * whether to prematurely commit the suggested words up to the given point for sentence-level + * suggestion. + * + * Note: Currently does not support concurrent calls across threads. Continuous suggestion is + * automatically activated for sequential calls that share the same starting input. + * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. + */ +int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, + int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, + int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices, + int *outputTypes) const { + PROF_OPEN; + PROF_START(0); + const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); + DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); + tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, + pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); + // TODO: Add the way to evaluate cache + + initializeSearch(tSession, commitPoint); + PROF_END(0); + PROF_START(1); + + // keep expanding search dicNodes until all have terminated. + while (tSession->getDicTraverseCache()->activeSize() > 0) { + expandCurrentDicNodes(tSession); + tSession->getDicTraverseCache()->advanceActiveDicNodes(); + tSession->getDicTraverseCache()->advanceInputIndex(inputSize); + } + PROF_END(1); + PROF_START(2); + const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes); + PROF_END(2); + PROF_CLOSE; + return size; +} + +/** + * Initializes the search at the root of the lexicon trie. Note that when possible the search will + * continue suggestion from where it left off during the last call. + */ +void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const { + if (!traverseSession->getProximityInfoState(0)->isUsed()) { + return; + } + if (TRAVERSAL->allowPartialCommit()) { + commitPoint = 0; + } + + if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE + && traverseSession->isContinuousSuggestionPossible()) { + if (commitPoint == 0) { + // Continue suggestion + traverseSession->getDicTraverseCache()->continueSearch(); + } else { + // Continue suggestion after partial commit. + DicNode *topDicNode = + traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); + traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); + traverseSession->getDicTraverseCache()->continueSearch(); + traverseSession->setPartiallyCommited(); + } + } else { + // Restart recognition at the root. + traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(), MAX_RESULTS); + // Create a new dic node here + DicNode rootNode; + DicNodeUtils::initAsRoot(traverseSession->getDicRootPos(), + traverseSession->getOffsetDict(), traverseSession->getPrevWordPos(), &rootNode); + traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); + } +} + +/** + * Outputs the final list of suggestions (i.e., terminal nodes). + */ +int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, + int *outputCodePoints, int *spaceIndices, int *outputTypes) const { + const int terminalSize = min(MAX_RESULTS, + static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize())); + DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array + + for (int index = terminalSize - 1; index >= 0; --index) { + traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]); + } + + const float languageWeight = SCORING->getAdjustedLanguageWeight( + traverseSession, terminals, terminalSize); + + int outputWordIndex = 0; + // Insert most probable word at index == 0 as long as there is one terminal at least + const bool hasMostProbableString = + SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, + &outputCodePoints[0], &outputTypes[0], &frequencies[0]); + if (hasMostProbableString) { + ++outputWordIndex; + } + + // Initial value of the loop index for terminal nodes (words) + int doubleLetterTerminalIndex = -1; + DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER; + SCORING->searchWordWithDoubleLetter(terminals, terminalSize, + &doubleLetterTerminalIndex, &doubleLetterLevel); + + int maxScore = S_INT_MIN; + // Output suggestion results here + for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; + ++terminalIndex) { + DicNode *terminalDicNode = &terminals[terminalIndex]; + if (DEBUG_GEO_FULL) { + terminalDicNode->dump("OUT:"); + } + const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost( + terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); + const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + + doubleLetterCost; + const TerminalAttributes terminalAttributes(traverseSession->getOffsetDict(), + terminalDicNode->getFlags(), terminalDicNode->getAttributesPos()); + const int originalTerminalProbability = terminalDicNode->getProbability(); + + // Do not suggest words with a 0 probability, or entries that are blacklisted or do not + // represent a word. However, we should still submit their shortcuts if any. + const bool isValidWord = + originalTerminalProbability > 0 && !terminalAttributes.isBlacklistedOrNotAWord(); + // Increase output score of top typing suggestion to ensure autocorrection. + // TODO: Better integration with java side autocorrection logic. + // Force autocorrection for obvious long multi-word suggestions. + const bool isForceCommitMultiWords = TRAVERSAL->allowPartialCommit() + && (traverseSession->isPartiallyCommited() + || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT + && terminalDicNode->hasMultipleWords())); + + const int finalScore = SCORING->calculateFinalScore( + compoundDistance, traverseSession->getInputSize(), + isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord())); + + maxScore = max(maxScore, finalScore); + + if (TRAVERSAL->allowPartialCommit()) { + // Index for top typing suggestion should be 0. + if (isValidWord && outputWordIndex == 0) { + terminalDicNode->outputSpacePositionsResult(spaceIndices); + } + } + + // Do not suggest words with a 0 probability, or entries that are blacklisted or do not + // represent a word. However, we should still submit their shortcuts if any. + if (isValidWord) { + outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION; + frequencies[outputWordIndex] = finalScore; + // Populate the outputChars array with the suggested word. + const int startIndex = outputWordIndex * MAX_WORD_LENGTH; + terminalDicNode->outputResult(&outputCodePoints[startIndex]); + ++outputWordIndex; + } + + const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); + outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, + finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); + DicNode::managedDelete(terminalDicNode); + } + + if (hasMostProbableString) { + SCORING->safetyNetForMostProbableString(terminalSize, maxScore, + &outputCodePoints[0], &frequencies[0]); + } + return outputWordIndex; +} + +/** + * Expands the dicNodes in the current search priority queue by advancing to the possible child + * nodes based on the next touch point(s) (or no touch points for lookahead) + */ +void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { + const int inputSize = traverseSession->getInputSize(); + DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize()); + DicNode omissionDicNode; + + // TODO: Find more efficient caching + const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession); + if (shouldDepthLevelCache) { + traverseSession->getDicTraverseCache()->updateLastCachedInputIndex(); + } + if (DEBUG_CACHE) { + AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d", + shouldDepthLevelCache, inputSize); + } + while (traverseSession->getDicTraverseCache()->activeSize() > 0) { + DicNode dicNode; + traverseSession->getDicTraverseCache()->popActive(&dicNode); + if (dicNode.isTotalInputSizeExceedingLimit()) { + return; + } + childDicNodes.clear(); + const int point0Index = dicNode.getInputIndex(0); + const bool canDoLookAheadCorrection = + TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode); + const bool isLookAheadCorrection = canDoLookAheadCorrection + && traverseSession->getDicTraverseCache()-> + isLookAheadCorrectionInputIndex(static_cast<int>(point0Index)); + const bool isCompletion = dicNode.isCompletion(inputSize); + + const bool shouldNodeLevelCache = + TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode); + if (shouldDepthLevelCache || shouldNodeLevelCache) { + if (DEBUG_CACHE) { + dicNode.dump("PUSH_CACHE"); + } + traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode); + dicNode.setCached(); + } + + if (isLookAheadCorrection) { + // The algorithm maintains a small set of "deferred" nodes that have not consumed the + // latest touch point yet. These are needed to apply look-ahead correction operations + // that require special handling of the latest touch point. For example, with insertions + // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all. + if (CORRECT_TRANSPOSITION) { + processDicNodeAsTransposition(traverseSession, &dicNode); + } + if (CORRECT_INSERTION) { + processDicNodeAsInsertion(traverseSession, &dicNode); + } + } else { // !isLookAheadCorrection + // Only consider typing error corrections if the normalized compound distance is + // below a spatial distance threshold. + // NOTE: the threshold may need to be updated if scoring model changes. + // TODO: Remove. Do not prune node here. + const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode); + // Process for handling space substitution (e.g., hevis => he is) + if (allowsErrorCorrections + && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) { + createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */); + } + + DicNodeUtils::getAllChildDicNodes( + &dicNode, traverseSession->getOffsetDict(), &childDicNodes); + + const int childDicNodesSize = childDicNodes.getSizeAndLock(); + for (int i = 0; i < childDicNodesSize; ++i) { + DicNode *const childDicNode = childDicNodes[i]; + if (isCompletion) { + // Handle forward lookahead when the lexicon letter exceeds the input size. + processDicNodeAsMatch(traverseSession, childDicNode); + continue; + } + if (allowsErrorCorrections + && TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode)) { + // TODO: (Gesture) Change weight between omission and substitution errors + // TODO: (Gesture) Terminal node should not be handled as omission + omissionDicNode.initByCopy(childDicNode); + processDicNodeAsOmission(traverseSession, &omissionDicNode); + } + const ProximityType proximityType = TRAVERSAL->getProximityType( + traverseSession, &dicNode, childDicNode); + switch (proximityType) { + // TODO: Consider the difference of proximityType here + case MATCH_CHAR: + case PROXIMITY_CHAR: + processDicNodeAsMatch(traverseSession, childDicNode); + break; + case ADDITIONAL_PROXIMITY_CHAR: + if (allowsErrorCorrections) { + processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode, + childDicNode); + } + break; + case SUBSTITUTION_CHAR: + if (allowsErrorCorrections) { + processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode); + } + break; + case UNRELATED_CHAR: + // Just drop this node and do nothing. + break; + default: + // Just drop this node and do nothing. + break; + } + } + + // Push the node for look-ahead correction + if (allowsErrorCorrections && canDoLookAheadCorrection) { + traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); + } + } + } +} + +void Suggest::processTerminalDicNode( + DicTraverseSession *traverseSession, DicNode *dicNode) const { + if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { + return; + } + if (!dicNode->isTerminalWordNode()) { + return; + } + if (TRAVERSAL->needsToTraverseAllUserInput() + && dicNode->getInputIndex(0) < traverseSession->getInputSize()) { + return; + } + + if (dicNode->shouldBeFilterdBySafetyNetForBigram()) { + return; + } + // Create a non-cached node here. + DicNode terminalDicNode; + DicNodeUtils::initByCopy(dicNode, &terminalDicNode); + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0, + &terminalDicNode, traverseSession->getBigramCacheMap()); + traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode); +} + +/** + * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word + * (by the space omission error correction) search path if input dicNode is on a terminal node. + */ +void Suggest::processExpandedDicNode( + DicTraverseSession *traverseSession, DicNode *dicNode) const { + processTerminalDicNode(traverseSession, dicNode); + if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { + if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) { + createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */); + } + const int allowsLookAhead = !(dicNode->hasMultipleWords() + && dicNode->isCompletion(traverseSession->getInputSize())); + if (dicNode->hasChildren() && allowsLookAhead) { + traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode); + } + } + DicNode::managedDelete(dicNode); +} + +void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession, + DicNode *childDicNode) const { + weightChildNode(traverseSession, childDicNode); + processExpandedDicNode(traverseSession, childDicNode); +} + +void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, + DicNode *dicNode, DicNode *childDicNode) const { + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY, + traverseSession, dicNode, childDicNode, 0 /* bigramCacheMap */); + weightChildNode(traverseSession, childDicNode); + processExpandedDicNode(traverseSession, childDicNode); +} + +void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, + DicNode *dicNode, DicNode *childDicNode) const { + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession, + dicNode, childDicNode, 0 /* bigramCacheMap */); + weightChildNode(traverseSession, childDicNode); + processExpandedDicNode(traverseSession, childDicNode); +} + +/** + * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider + * matches for all possible next letters. Note that just skipping the current letter without any + * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check + * the possible *next* letters after the omission to better limit search to plausible omissions. + * Note that apostrophes are handled as omissions. + */ +void Suggest::processDicNodeAsOmission( + DicTraverseSession *traverseSession, DicNode *dicNode) const { + // If the omission is surely intentional that it should incur zero cost. + const bool isZeroCostOmission = dicNode->isZeroCostOmission(); + DicNodeVector childDicNodes; + + DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getOffsetDict(), &childDicNodes); + + const int size = childDicNodes.getSizeAndLock(); + for (int i = 0; i < size; i++) { + DicNode *const childDicNode = childDicNodes[i]; + if (!isZeroCostOmission) { + // Treat this word as omission + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession, + dicNode, childDicNode, 0 /* bigramCacheMap */); + } + weightChildNode(traverseSession, childDicNode); + + if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) { + DicNode::managedDelete(childDicNode); + continue; + } + processExpandedDicNode(traverseSession, childDicNode); + } +} + +/** + * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and + * consider matches for the next touch point. + */ +void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, + DicNode *dicNode) const { + const int16_t pointIndex = dicNode->getInputIndex(0); + DicNodeVector childDicNodes; + DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getOffsetDict(), + traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes); + const int size = childDicNodes.getSizeAndLock(); + for (int i = 0; i < size; i++) { + DicNode *const childDicNode = childDicNodes[i]; + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession, + dicNode, childDicNode, 0 /* bigramCacheMap */); + processExpandedDicNode(traverseSession, childDicNode); + } +} + +/** + * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points. + */ +void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, + DicNode *dicNode) const { + const int16_t pointIndex = dicNode->getInputIndex(0); + DicNodeVector childDicNodes1; + DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getOffsetDict(), + traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1); + const int childSize1 = childDicNodes1.getSizeAndLock(); + for (int i = 0; i < childSize1; i++) { + if (childDicNodes1[i]->hasChildren()) { + DicNodeVector childDicNodes2; + DicNodeUtils::getProximityChildDicNodes( + childDicNodes1[i], traverseSession->getOffsetDict(), + traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2); + const int childSize2 = childDicNodes2.getSizeAndLock(); + for (int j = 0; j < childSize2; j++) { + DicNode *const childDicNode2 = childDicNodes2[j]; + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION, + traverseSession, childDicNodes1[i], childDicNode2, 0 /* bigramCacheMap */); + processExpandedDicNode(traverseSession, childDicNode2); + } + } + DicNode::managedDelete(childDicNodes1[i]); + } +} + +/** + * Weight child node by aligning it to the key + */ +void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { + const int inputSize = traverseSession->getInputSize(); + if (dicNode->isCompletion(inputSize)) { + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession, + 0 /* parentDicNode */, dicNode, 0 /* bigramCacheMap */); + } else { // completion + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession, + 0 /* parentDicNode */, dicNode, 0 /* bigramCacheMap */); + } +} + +/** + * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also + * incorporates the unigram / bigram score for the ending word into the new dicNode. + */ +void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, + const bool spaceSubstitution) const { + if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { + return; + } + + // Create a non-cached node here. + DicNode newDicNode; + DicNodeUtils::initAsRootWithPreviousWord(traverseSession->getDicRootPos(), + traverseSession->getOffsetDict(), dicNode, &newDicNode); + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_NEW_WORD, traverseSession, dicNode, + &newDicNode, traverseSession->getBigramCacheMap()); + if (spaceSubstitution) { + // Merge this with CT_NEW_WORD + Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SPACE_SUBSTITUTION, + traverseSession, 0, &newDicNode, 0 /* bigramCacheMap */); + } + traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode); +} +} // namespace latinime diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h new file mode 100644 index 000000000..75d646bdd --- /dev/null +++ b/native/jni/src/suggest/core/suggest.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SUGGEST_IMPL_H +#define LATINIME_SUGGEST_IMPL_H + +#include "defines.h" +#include "suggest_interface.h" +#include "suggest_policy.h" + +namespace latinime { + +class DicNode; +class DicTraverseSession; +class ProximityInfo; +class Scoring; +class Traversal; +class Weighting; + +class Suggest : public SuggestInterface { + public: + AK_FORCE_INLINE Suggest(const SuggestPolicy *const suggestPolicy) + : TRAVERSAL(suggestPolicy->getTraversal()), + SCORING(suggestPolicy->getScoring()), WEIGHTING(suggestPolicy->getWeighting()) {} + AK_FORCE_INLINE virtual ~Suggest() {} + int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, + int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint, + int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest); + void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, + const bool spaceSubstitution) const; + int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, + int *outputCodePoints, int *outputIndices, int *outputTypes) const; + void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; + void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; + void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void processExpandedDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; + float getAutocorrectScore(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void generateFeatures( + DicTraverseSession *traverseSession, DicNode *dicNode, float *features) const; + void processDicNodeAsOmission(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void processDicNodeAsTransposition(DicTraverseSession *traverseSession, + DicNode *dicNode) const; + void processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const; + void processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, + DicNode *dicNode, DicNode *childDicNode) const; + void processDicNodeAsSubstitution(DicTraverseSession *traverseSession, DicNode *dicNode, + DicNode *childDicNode) const; + void processDicNodeAsMatch(DicTraverseSession *traverseSession, + DicNode *childDicNode) const; + + // Dic nodes cache size for lookahead (autocompletion) + static const int LOOKAHEAD_DIC_NODES_CACHE_SIZE; + // Max characters to lookahead + static const int MAX_LOOKAHEAD; + // Inputs longer than this will autocorrect if the suggestion is multi-word + static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT; + static const int MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE; + // Base value for converting costs into scores (low so will not autocorrect without classifier) + static const float BASE_OUTPUT_SCORE; + + // Threshold for autocorrection classifier + static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; + // Threshold for computing the language model feature for autocorrect classification + static const float AUTOCORRECT_LANGUAGE_FEATURE_THRESHOLD; + + // Typing error correction settings + static const bool CORRECT_SPACE_OMISSION; + static const bool CORRECT_TRANSPOSITION; + static const bool CORRECT_INSERTION; + + const Traversal *const TRAVERSAL; + const Scoring *const SCORING; + const Weighting *const WEIGHTING; + + static const bool CORRECT_OMISSION_G; +}; +} // namespace latinime +#endif // LATINIME_SUGGEST_IMPL_H diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp new file mode 100644 index 000000000..90985d0fe --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "scoring_params.h" + +namespace latinime { +// TODO: RENAME all +const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f; +const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40; +const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120; +const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f; +const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 125; +const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4; + +const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f; +const float ScoringParams::PROXIMITY_COST = 0.086f; +const float ScoringParams::FIRST_PROXIMITY_COST = 0.104f; +const float ScoringParams::OMISSION_COST = 0.388f; +const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.431f; +const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.532f; +const float ScoringParams::INSERTION_COST = 0.670f; +const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.526f; +const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.563f; +const float ScoringParams::TRANSPOSITION_COST = 0.494f; +const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.239f; +const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f; +const float ScoringParams::SUBSTITUTION_COST = 0.363f; +const float ScoringParams::COST_NEW_WORD = 0.054f; +const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f; +const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f; +const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.462f; +const float ScoringParams::COST_LOOKAHEAD = 0.092f; +const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.126f; +const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.056f; +const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.136f; +const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f; +const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f; +const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f; +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h new file mode 100644 index 000000000..8f104b362 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SCORING_PARAMS_H +#define LATINIME_SCORING_PARAMS_H + +#include "defines.h" + +namespace latinime { + +class ScoringParams { + public: + // Fixed model parameters + static const float MAX_SPATIAL_DISTANCE; + static const int THRESHOLD_NEXT_WORD_PROBABILITY; + static const int THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED; + static const float AUTOCORRECT_OUTPUT_THRESHOLD; + static const int MAX_CACHE_DIC_NODE_SIZE; + static const int THRESHOLD_SHORT_WORD_LENGTH; + + // Numerically optimized parameters (currently for tap typing only). + // TODO: add ability to modify these constants programmatically. + // TODO: explore optimization of gesture parameters. + static const float DISTANCE_WEIGHT_LENGTH; + static const float PROXIMITY_COST; + static const float FIRST_PROXIMITY_COST; + static const float OMISSION_COST; + static const float OMISSION_COST_SAME_CHAR; + static const float OMISSION_COST_FIRST_CHAR; + static const float INSERTION_COST; + static const float INSERTION_COST_SAME_CHAR; + static const float INSERTION_COST_FIRST_CHAR; + static const float TRANSPOSITION_COST; + static const float SPACE_SUBSTITUTION_COST; + static const float ADDITIONAL_PROXIMITY_COST; + static const float SUBSTITUTION_COST; + static const float COST_NEW_WORD; + static const float COST_NEW_WORD_CAPITALIZED; + static const float DISTANCE_WEIGHT_LANGUAGE; + static const float COST_FIRST_LOOKAHEAD; + static const float COST_LOOKAHEAD; + static const float HAS_PROXIMITY_TERMINAL_COST; + static const float HAS_EDIT_CORRECTION_TERMINAL_COST; + static const float HAS_MULTI_WORD_TERMINAL_COST; + static const float TYPING_BASE_OUTPUT_SCORE; + static const float TYPING_MAX_OUTPUT_SCORE_PER_INPUT; + static const float MAX_NORM_DISTANCE_FOR_EDIT; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ScoringParams); +}; +} // namespace latinime +#endif // LATINIME_SCORING_PARAMS_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp b/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp new file mode 100644 index 000000000..53f68f20f --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "typing_scoring.h" + +namespace latinime { +const TypingScoring TypingScoring::sInstance; +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h new file mode 100644 index 000000000..ed941f0ae --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TYPING_SCORING_H +#define LATINIME_TYPING_SCORING_H + +#include "defines.h" +#include "scoring.h" +#include "scoring_params.h" + +namespace latinime { + +class DicNode; +class DicTraverseSession; + +class TypingScoring : public Scoring { + public: + static const TypingScoring *getInstance() { return &sInstance; } + + AK_FORCE_INLINE bool getMostProbableString( + const DicTraverseSession *const traverseSession, const int terminalSize, + const float languageWeight, int *const outputCodePoints, int *const type, + int *const freq) const { + return false; + } + + AK_FORCE_INLINE void safetyNetForMostProbableString(const int terminalSize, + const int maxScore, int *const outputCodePoints, int *const frequencies) const { + } + + AK_FORCE_INLINE void searchWordWithDoubleLetter(DicNode *terminals, + const int terminalSize, int *doubleLetterTerminalIndex, + DoubleLetterLevel *doubleLetterLevel) const { + } + + AK_FORCE_INLINE float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession, + DicNode *const terminals, const int size) const { + return 1.0f; + } + + AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, + const int inputSize, const bool forceCommit) const { + const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE + + static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT; + return static_cast<int>((ScoringParams::TYPING_BASE_OUTPUT_SCORE + - (compoundDistance / maxDistance) + + (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f)) + * SUGGEST_INTERFACE_OUTPUT_SCALE); + } + + AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(const int terminalIndex, + const int doubleLetterTerminalIndex, + const DoubleLetterLevel doubleLetterLevel) const { + return 0.0f; + } + + AK_FORCE_INLINE bool doesAutoCorrectValidWord() const { + return false; + } + + private: + DISALLOW_COPY_AND_ASSIGN(TypingScoring); + static const TypingScoring sInstance; + + TypingScoring() {} + ~TypingScoring() {} +}; +} // namespace latinime +#endif // LATINIME_TYPING_SCORING_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.cpp b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.cpp new file mode 100644 index 000000000..ebba37531 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest.h" +#include "typing_suggest.h" +#include "typing_suggest_policy.h" + +namespace latinime { + +const TypingSuggestPolicy TypingSuggestPolicy::sInstance; + +// A factory method for a "typing" Suggest instance +static SuggestInterface *getTypingSuggestInstance() { + return new Suggest(TypingSuggestPolicy::getInstance()); +} + +// An ad-hoc internal class to register the factory method getTypingSuggestInstance() defined above +class TypingSuggestFactoryRegisterer { + public: + TypingSuggestFactoryRegisterer() { + TypingSuggest::setTypingSuggestFactoryMethod(getTypingSuggestInstance); + } + private: + DISALLOW_COPY_AND_ASSIGN(TypingSuggestFactoryRegisterer); +}; + +// To invoke the TypingSuggestFactoryRegisterer's constructor in the global constructor +static TypingSuggestFactoryRegisterer typingSuggestFactoryregisterer; +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h new file mode 100644 index 000000000..55668fc25 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_suggest_policy.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TYPING_SUGGEST_POLICY_H +#define LATINIME_TYPING_SUGGEST_POLICY_H + +#include "defines.h" +#include "suggest_policy.h" +#include "typing_scoring.h" +#include "typing_traversal.h" +#include "typing_weighting.h" + +namespace latinime { + +class Scoring; +class Traversal; +class Weighting; + +class TypingSuggestPolicy : public SuggestPolicy { + public: + static const TypingSuggestPolicy *getInstance() { return &sInstance; } + + TypingSuggestPolicy() {} + virtual ~TypingSuggestPolicy() {} + AK_FORCE_INLINE const Traversal *getTraversal() const { + return TypingTraversal::getInstance(); + } + + AK_FORCE_INLINE const Scoring *getScoring() const { + return TypingScoring::getInstance(); + } + + AK_FORCE_INLINE const Weighting *getWeighting() const { + return TypingWeighting::getInstance(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(TypingSuggestPolicy); + static const TypingSuggestPolicy sInstance; +}; +} // namespace latinime +#endif // LATINIME_TYPING_SUGGEST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp b/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp new file mode 100644 index 000000000..68c614e77 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "typing_traversal.h" + +namespace latinime { +const bool TypingTraversal::CORRECT_OMISSION = true; +const bool TypingTraversal::CORRECT_SPACE_SUBSTITUTION = true; +const bool TypingTraversal::CORRECT_SPACE_OMISSION = true; +const TypingTraversal TypingTraversal::sInstance; +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h new file mode 100644 index 000000000..16153f8bb --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TYPING_TRAVERSAL_H +#define LATINIME_TYPING_TRAVERSAL_H + +#include <stdint.h> + +#include "char_utils.h" +#include "defines.h" +#include "dic_node.h" +#include "dic_node_vector.h" +#include "dic_traverse_session.h" +#include "proximity_info_state.h" +#include "scoring_params.h" +#include "traversal.h" + +namespace latinime { +class TypingTraversal : public Traversal { + public: + static const TypingTraversal *getInstance() { return &sInstance; } + + AK_FORCE_INLINE int getMaxPointerCount() const { + return MAX_POINTER_COUNT; + } + + AK_FORCE_INLINE bool allowsErrorCorrections(const DicNode *const dicNode) const { + return dicNode->getNormalizedSpatialDistance() + < ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT; + } + + AK_FORCE_INLINE bool isOmission(const DicTraverseSession *const traverseSession, + const DicNode *const dicNode, const DicNode *const childDicNode) const { + if (!CORRECT_OMISSION) { + return false; + } + const int inputSize = traverseSession->getInputSize(); + // TODO: Don't refer to isCompletion? + if (dicNode->isCompletion(inputSize)) { + return false; + } + if (dicNode->canBeIntentionalOmission()) { + return true; + } + const int point0Index = dicNode->getInputIndex(0); + const int currentBaseLowerCodePoint = + toBaseLowerCase(childDicNode->getNodeCodePoint()); + const int typedBaseLowerCodePoint = + toBaseLowerCase(traverseSession->getProximityInfoState(0) + ->getPrimaryCodePointAt(point0Index)); + return (currentBaseLowerCodePoint != typedBaseLowerCodePoint); + } + + AK_FORCE_INLINE bool isSpaceSubstitutionTerminal( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + if (!CORRECT_SPACE_SUBSTITUTION) { + return false; + } + if (!canDoLookAheadCorrection(traverseSession, dicNode)) { + return false; + } + const int point0Index = dicNode->getInputIndex(0); + return dicNode->isTerminalWordNode() + && traverseSession->getProximityInfoState(0)-> + hasSpaceProximity(point0Index); + } + + AK_FORCE_INLINE bool isSpaceOmissionTerminal( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + if (!CORRECT_SPACE_OMISSION) { + return false; + } + const int inputSize = traverseSession->getInputSize(); + // TODO: Don't refer to isCompletion? + if (dicNode->isCompletion(inputSize)) { + return false; + } + if (!dicNode->isTerminalWordNode()) { + return false; + } + const int16_t pointIndex = dicNode->getInputIndex(0); + return pointIndex <= inputSize && !dicNode->isTotalInputSizeExceedingLimit() + && !dicNode->shouldBeFilterdBySafetyNetForBigram(); + } + + AK_FORCE_INLINE bool shouldDepthLevelCache( + const DicTraverseSession *const traverseSession) const { + const int inputSize = traverseSession->getInputSize(); + return traverseSession->isCacheBorderForTyping(inputSize); + } + + AK_FORCE_INLINE bool shouldNodeLevelCache( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + return false; + } + + AK_FORCE_INLINE bool canDoLookAheadCorrection( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + const int inputSize = traverseSession->getInputSize(); + return dicNode->canDoLookAheadCorrection(inputSize); + } + + AK_FORCE_INLINE ProximityType getProximityType( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode, + const DicNode *const childDicNode) const { + return traverseSession->getProximityInfoState(0)->getProximityType( + dicNode->getInputIndex(0), childDicNode->getNodeCodePoint(), + true /* checkProximityChars */); + } + + AK_FORCE_INLINE bool needsToTraverseAllUserInput() const { + return true; + } + + AK_FORCE_INLINE float getMaxSpatialDistance() const { + return ScoringParams::MAX_SPATIAL_DISTANCE; + } + + AK_FORCE_INLINE bool allowPartialCommit() const { + return true; + } + + AK_FORCE_INLINE int getDefaultExpandDicNodeSize() const { + return DicNodeVector::DEFAULT_NODES_SIZE_FOR_OPTIMIZATION; + } + + AK_FORCE_INLINE bool sameAsTyped( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + return traverseSession->getProximityInfoState(0)->sameAsTyped( + dicNode->getOutputWordBuf(), dicNode->getDepth()); + } + + AK_FORCE_INLINE int getMaxCacheSize() const { + return ScoringParams::MAX_CACHE_DIC_NODE_SIZE; + } + + AK_FORCE_INLINE bool isPossibleOmissionChildNode( + const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, + const DicNode *const dicNode) const { + const ProximityType proximityType = + getProximityType(traverseSession, parentDicNode, dicNode); + if (!DicNodeUtils::isProximityChar(proximityType)) { + return false; + } + return true; + } + + AK_FORCE_INLINE bool isGoodToTraverseNextWord(const DicNode *const dicNode) const { + const int probability = dicNode->getProbability(); + if (probability < ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY) { + return false; + } + const int c = dicNode->getOutputWordBuf()[0]; + const bool shortCappedWord = dicNode->getDepth() + < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && isAsciiUpper(c); + return !shortCappedWord + || probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED; + } + + private: + DISALLOW_COPY_AND_ASSIGN(TypingTraversal); + static const bool CORRECT_OMISSION; + static const bool CORRECT_SPACE_SUBSTITUTION; + static const bool CORRECT_SPACE_OMISSION; + static const TypingTraversal sInstance; + + TypingTraversal() {} + ~TypingTraversal() {} +}; +} // namespace latinime +#endif // LATINIME_TYPING_TRAVERSAL_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp new file mode 100644 index 000000000..6e4b2fb6a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dic_node.h" +#include "scoring_params.h" +#include "typing_weighting.h" + +namespace latinime { +const TypingWeighting TypingWeighting::sInstance; +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h new file mode 100644 index 000000000..e8075f41a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TYPING_WEIGHTING_H +#define LATINIME_TYPING_WEIGHTING_H + +#include "defines.h" +#include "dic_node_utils.h" +#include "dic_traverse_session.h" +#include "weighting.h" + +namespace latinime { + +class DicNode; +struct DicNode_InputStateG; + +class TypingWeighting : public Weighting { + public: + static const TypingWeighting *getInstance() { return &sInstance; } + + protected: + float getTerminalSpatialCost( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + float cost = 0.0f; + if (dicNode->hasMultipleWords()) { + cost += ScoringParams::HAS_MULTI_WORD_TERMINAL_COST; + } + if (dicNode->getProximityCorrectionCount() > 0) { + cost += ScoringParams::HAS_PROXIMITY_TERMINAL_COST; + } + if (dicNode->getEditCorrectionCount() > 0) { + cost += ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST; + } + return cost; + } + + float getOmissionCost(const DicNode *const parentDicNode, const DicNode *const dicNode) const { + bool sameCodePoint = false; + bool isFirstLetterOmission = false; + float cost = 0.0f; + sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode); + // If the traversal omitted the first letter then the dicNode should now be on the second. + isFirstLetterOmission = dicNode->getDepth() == 2; + if (isFirstLetterOmission) { + cost = ScoringParams::OMISSION_COST_FIRST_CHAR; + } else { + cost = sameCodePoint ? ScoringParams::OMISSION_COST_SAME_CHAR + : ScoringParams::OMISSION_COST; + } + return cost; + } + + float getMatchedCost( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode, + DicNode_InputStateG *inputStateG) const { + const int pointIndex = dicNode->getInputIndex(0); + // Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on + // the keyboard (like accented letters) + const float length = min(ScoringParams::MAX_SPATIAL_DISTANCE, + traverseSession->getProximityInfoState(0)->getPointToKeyLength( + pointIndex, dicNode->getNodeCodePoint())); + const float weightedDistance = length * ScoringParams::DISTANCE_WEIGHT_LENGTH; + const bool isFirstChar = pointIndex == 0; + const bool isProximity = isProximityDicNode(traverseSession, dicNode); + const float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST + : ScoringParams::PROXIMITY_COST) : 0.0f; + return weightedDistance + cost; + } + + bool isProximityDicNode( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { + const int pointIndex = dicNode->getInputIndex(0); + const int primaryCodePoint = toBaseLowerCase( + traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex)); + const int dicNodeChar = toBaseLowerCase(dicNode->getNodeCodePoint()); + return primaryCodePoint != dicNodeChar; + } + + float getTranspositionCost( + const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, + const DicNode *const dicNode) const { + const int16_t parentPointIndex = parentDicNode->getInputIndex(0); + const int prevCodePoint = parentDicNode->getNodeCodePoint(); + const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( + parentPointIndex + 1, prevCodePoint); + const int codePoint = dicNode->getNodeCodePoint(); + const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( + parentPointIndex, codePoint); + const float distance = distance1 + distance2; + const float weightedLengthDistance = + distance * ScoringParams::DISTANCE_WEIGHT_LENGTH; + return ScoringParams::TRANSPOSITION_COST + weightedLengthDistance; + } + + float getInsertionCost( + const DicTraverseSession *const traverseSession, + const DicNode *const parentDicNode, const DicNode *const dicNode) const { + const int16_t parentPointIndex = parentDicNode->getInputIndex(0); + const int prevCodePoint = + traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(parentPointIndex); + + const int currentCodePoint = dicNode->getNodeCodePoint(); + const bool sameCodePoint = prevCodePoint == currentCodePoint; + const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength( + parentPointIndex + 1, currentCodePoint); + const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH; + const bool singleChar = dicNode->getDepth() == 1; + const float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f) + + (sameCodePoint ? ScoringParams::INSERTION_COST_SAME_CHAR + : ScoringParams::INSERTION_COST); + return cost + weightedDistance; + } + + float getNewWordCost(const DicNode *const dicNode) const { + const bool isCapitalized = dicNode->isCapitalized(); + return isCapitalized ? + ScoringParams::COST_NEW_WORD_CAPITALIZED : ScoringParams::COST_NEW_WORD; + } + + float getNewWordBigramCost( + const DicTraverseSession *const traverseSession, const DicNode *const dicNode, + hash_map_compat<int, int16_t> *const bigramCacheMap) const { + return DicNodeUtils::getBigramNodeImprobability(traverseSession->getOffsetDict(), + dicNode, bigramCacheMap); + } + + float getCompletionCost(const DicTraverseSession *const traverseSession, + const DicNode *const dicNode) const { + // The auto completion starts when the input index is same as the input size + const bool firstCompletion = dicNode->getInputIndex(0) + == traverseSession->getInputSize(); + // TODO: Change the cost for the first completion for the gesture? + const float cost = firstCompletion ? ScoringParams::COST_FIRST_LOOKAHEAD + : ScoringParams::COST_LOOKAHEAD; + return cost; + } + + float getTerminalLanguageCost(const DicTraverseSession *const traverseSession, + const DicNode *const dicNode, const float dicNodeLanguageImprobability) const { + const bool hasEditCount = dicNode->getEditCorrectionCount() > 0; + const bool isSameLength = dicNode->getDepth() == traverseSession->getInputSize(); + const bool hasMultipleWords = dicNode->hasMultipleWords(); + const bool hasProximityErrors = dicNode->getProximityCorrectionCount() > 0; + // Gesture input is always assumed to have proximity errors + // because the input word shouldn't be treated as perfect + const bool isExactMatch = !hasEditCount && !hasMultipleWords + && !hasProximityErrors && isSameLength; + + const float totalPrevWordsLanguageCost = dicNode->getTotalPrevWordsLanguageCost(); + const float languageImprobability = isExactMatch ? 0.0f : dicNodeLanguageImprobability; + const float languageWeight = ScoringParams::DISTANCE_WEIGHT_LANGUAGE; + // TODO: Caveat: The following equation should be: + // totalPrevWordsLanguageCost + (languageImprobability * languageWeight); + return (totalPrevWordsLanguageCost + languageImprobability) * languageWeight; + } + + AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const { + return false; + } + + AK_FORCE_INLINE float getAdditionalProximityCost() const { + return ScoringParams::ADDITIONAL_PROXIMITY_COST; + } + + AK_FORCE_INLINE float getSubstitutionCost() const { + return ScoringParams::SUBSTITUTION_COST; + } + + AK_FORCE_INLINE float getSpaceSubstitutionCost() const { + return ScoringParams::SPACE_SUBSTITUTION_COST; + } + + private: + DISALLOW_COPY_AND_ASSIGN(TypingWeighting); + static const TypingWeighting sInstance; + + TypingWeighting() {} + ~TypingWeighting() {} +}; +} // namespace latinime +#endif // LATINIME_TYPING_WEIGHTING_H |