aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/Android.mk3
-rw-r--r--native/jni/NativeFileList.mk1
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h9
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h14
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h5
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h64
-rw-r--r--native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp265
-rw-r--r--native/jni/src/suggest/core/dictionary/suggestions_output_utils.h52
-rw-r--r--native/jni/src/suggest/core/policy/scoring.h5
-rw-r--r--native/jni/src/suggest/core/policy/traversal.h3
-rw-r--r--native/jni/src/suggest/core/suggest.cpp209
-rw-r--r--native/jni/src/suggest/core/suggest.h12
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h5
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h34
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h10
16 files changed, 382 insertions, 315 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 7827db302..3b3da96cc 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -28,7 +28,8 @@ LATIN_IME_SRC_DIR := src
LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \
- -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls -Wno-system-headers
+ -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls \
+ -Woverloaded-virtual -Wstrict-null-sentinel -Wsign-promo -Wno-system-headers
ifeq ($(TARGET_ARCH), arm)
ifeq ($(TARGET_GCC_VERSION), 4.6)
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index d80a1115f..d2f22598e 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -31,6 +31,7 @@ LATIN_IME_CORE_SRC_FILES := \
digraph_utils.cpp \
error_type_utils.cpp \
multi_bigram_map.cpp \
+ suggestions_output_utils.cpp \
unigram_property.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 069852d6e..558667eb0 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -23,6 +23,7 @@
#include "suggest/core/dicnode/internal/dic_node_state.h"
#include "suggest/core/dicnode/internal/dic_node_properties.h"
#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "utils/char_utils.h"
#if DEBUG_DICT
@@ -493,8 +494,8 @@ class DicNode {
mDicNodeState.mDicNodeStateScoring.advanceDigraphIndex();
}
- bool isExactMatch() const {
- return mDicNodeState.mDicNodeStateScoring.isExactMatch();
+ ErrorTypeUtils::ErrorType getContainedErrorTypes() const {
+ return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
}
bool isBlacklistedOrNotAWord() const {
@@ -535,8 +536,8 @@ class DicNode {
return false;
}
// Promote exact matches to prevent them from being pruned.
- const bool leftExactMatch = isExactMatch();
- const bool rightExactMatch = right->isExactMatch();
+ const bool leftExactMatch = ErrorTypeUtils::isExactMatch(getContainedErrorTypes());
+ const bool rightExactMatch = ErrorTypeUtils::isExactMatch(right->getContainedErrorTypes());
if (leftExactMatch != rightExactMatch) {
return leftExactMatch;
}
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 74f9eee92..11c201e52 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -32,7 +32,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
+ mRawLength(0.0f), mContainedErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
@@ -48,7 +48,7 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
- mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
+ mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -60,7 +60,7 @@ class DicNodeStateScoring {
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
- mContainingErrorTypes = scoring->mContainingErrorTypes;
+ mContainedErrorTypes = scoring->mContainedErrorTypes;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
@@ -69,7 +69,7 @@ class DicNodeStateScoring {
const int inputSize, const int totalInputIndex,
const ErrorTypeUtils::ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
- mContainingErrorTypes = mContainingErrorTypes | errorType;
+ mContainedErrorTypes = mContainedErrorTypes | errorType;
if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
++mEditCorrectionCount;
}
@@ -169,8 +169,8 @@ class DicNodeStateScoring {
}
}
- bool isExactMatch() const {
- return ErrorTypeUtils::isExactMatch(mContainingErrorTypes);
+ ErrorTypeUtils::ErrorType getContainedErrorTypes() const {
+ return mContainedErrorTypes;
}
private:
@@ -188,7 +188,7 @@ class DicNodeStateScoring {
float mLanguageDistance;
float mRawLength;
// All accumulated error types so far
- ErrorTypeUtils::ErrorType mContainingErrorTypes;
+ ErrorTypeUtils::ErrorType mContainedErrorTypes;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index ab4a65e48..1122291a6 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -47,9 +47,8 @@ class ErrorTypeUtils {
// A new word error should be an edit correction error or a proximity correction error.
static const ErrorType NEW_WORD;
- // TODO: Differentiate errors.
- static bool isExactMatch(const ErrorType containingErrors) {
- return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
+ static bool isExactMatch(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
}
static bool isEditCorrectionError(const ErrorType errorType) {
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
deleted file mode 100644
index 9ccef020f..000000000
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_SHORTCUT_UTILS
-#define LATINIME_SHORTCUT_UTILS
-
-#include "defines.h"
-#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
-
-namespace latinime {
-
-class ShortcutUtils {
- public:
- static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
- int outputWordIndex, const int finalScore, int *const outputCodePoints,
- int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
- int shortcutTarget[MAX_WORD_LENGTH];
- while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
- bool isWhilelist;
- int shortcutTargetStringLength;
- shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
- &shortcutTargetStringLength, &isWhilelist);
- int shortcutScore;
- int kind;
- if (isWhilelist && sameAsTyped) {
- shortcutScore = S_INT_MAX;
- kind = Dictionary::KIND_WHITELIST;
- } else {
- // shortcut entry's score == its base entry's score - 1
- shortcutScore = finalScore;
- // Protection against int underflow
- shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
- kind = Dictionary::KIND_SHORTCUT;
- }
- outputTypes[outputWordIndex] = kind;
- frequencies[outputWordIndex] = shortcutScore;
- frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1;
- const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH;
- DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength,
- &outputCodePoints[startIndex2]);
- ++outputWordIndex;
- }
- return outputWordIndex;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutUtils);
-};
-} // namespace latinime
-#endif // LATINIME_SHORTCUT_UTILS
diff --git a/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp
new file mode 100644
index 000000000..d219757da
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/suggestions_output_utils.h"
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
+#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/dictionary/error_type_utils.h"
+#include "suggest/core/policy/scoring.h"
+#include "suggest/core/session/dic_traverse_session.h"
+
+namespace latinime {
+
+const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
+
+// TODO: Split this method.
+/* static */ int SuggestionsOutputUtils::outputSuggestions(
+ const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
+ int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit,
+ int *outputTypes, int *outputAutoCommitFirstWordConfidence) {
+#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
+ const int terminalSize = 0;
+#else
+ const int terminalSize = min(MAX_RESULTS,
+ static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
+#endif
+ DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
+
+ for (int index = terminalSize - 1; index >= 0; --index) {
+ traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
+ }
+
+ const float languageWeight = scoringPolicy->getAdjustedLanguageWeight(
+ traverseSession, terminals, terminalSize);
+
+ int outputWordIndex = 0;
+ // Insert most probable word at index == 0 as long as there is one terminal at least
+ const bool hasMostProbableString =
+ scoringPolicy->getMostProbableString(traverseSession, terminalSize, languageWeight,
+ &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
+ if (hasMostProbableString) {
+ outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
+ ++outputWordIndex;
+ }
+
+ // Initial value of the loop index for terminal nodes (words)
+ int doubleLetterTerminalIndex = -1;
+ DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
+ scoringPolicy->searchWordWithDoubleLetter(terminals, terminalSize,
+ &doubleLetterTerminalIndex, &doubleLetterLevel);
+
+ int maxScore = S_INT_MIN;
+ // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
+ // a long multiple words suggestion.
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ // traverseSession->isPartiallyCommited() always returns false because we never auto partial
+ // commit for now.
+ const bool forceCommitMultiWords = (terminalSize > 0) ?
+ scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
+ && (traverseSession->isPartiallyCommited()
+ || (traverseSession->getInputSize()
+ >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
+ && terminals[0].hasMultipleWords())) : false;
+ // TODO: have partial commit work even with multiple pointers.
+ const bool outputSecondWordFirstLetterInputIndex =
+ traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
+ if (terminalSize > 0) {
+ // If we have no suggestions, don't write this
+ outputAutoCommitFirstWordConfidence[0] =
+ computeFirstWordConfidence(&terminals[0]);
+ }
+
+ // Output suggestion results here
+ for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
+ ++terminalIndex) {
+ DicNode *terminalDicNode = &terminals[terminalIndex];
+ if (DEBUG_GEO_FULL) {
+ terminalDicNode->dump("OUT:");
+ }
+ const float doubleLetterCost = scoringPolicy->getDoubleLetterDemotionDistanceCost(
+ terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
+ const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ + doubleLetterCost;
+ const bool isPossiblyOffensiveWord =
+ traverseSession->getDictionaryStructurePolicy()->getProbability(
+ terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
+ const bool isExactMatch =
+ ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
+ const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
+ // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
+ // (e.g. "AMD" and "and")
+ const bool isSafeExactMatch = isExactMatch
+ && !(isPossiblyOffensiveWord && isFirstCharUppercase);
+ const int outputTypeFlags =
+ (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
+ | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
+
+ // Entries that are blacklisted or do not represent a word should not be output.
+ const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
+
+ // Increase output score of top typing suggestion to ensure autocorrection.
+ // TODO: Better integration with java side autocorrection logic.
+ const int finalScore = scoringPolicy->calculateFinalScore(
+ compoundDistance, traverseSession->getInputSize(),
+ terminalDicNode->getContainedErrorTypes(),
+ (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
+ || (isValidWord && scoringPolicy->doesAutoCorrectValidWord()));
+ if (maxScore < finalScore && isValidWord) {
+ maxScore = finalScore;
+ }
+
+ // Don't output invalid words. However, we still need to submit their shortcuts if any.
+ if (isValidWord) {
+ outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
+ frequencies[outputWordIndex] = finalScore;
+ if (outputSecondWordFirstLetterInputIndex) {
+ outputIndicesToPartialCommit[outputWordIndex] =
+ terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0));
+ } else {
+ outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
+ }
+ // Populate the outputChars array with the suggested word.
+ const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
+ terminalDicNode->outputResult(&outputCodePoints[startIndex]);
+ ++outputWordIndex;
+ }
+
+ if (!terminalDicNode->hasMultipleWords()) {
+ BinaryDictionaryShortcutIterator shortcutIt(
+ traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
+ // Shortcut is not supported for multiple words suggestions.
+ // TODO: Check shortcuts during traversal for multiple words suggestions.
+ const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
+ const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
+ scoringPolicy->calculateFinalScore(compoundDistance,
+ traverseSession->getInputSize(),
+ terminalDicNode->getContainedErrorTypes(),
+ true /* forceCommit */) : finalScore;
+ const int updatedOutputWordIndex = outputShortcuts(&shortcutIt,
+ outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes,
+ sameAsTyped);
+ const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0));
+ for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
+ if (outputSecondWordFirstLetterInputIndex) {
+ outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
+ } else {
+ outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
+ }
+ }
+ outputWordIndex = updatedOutputWordIndex;
+ }
+ DicNode::managedDelete(terminalDicNode);
+ }
+
+ if (hasMostProbableString) {
+ scoringPolicy->safetyNetForMostProbableString(terminalSize, maxScore,
+ &outputCodePoints[0], &frequencies[0]);
+ }
+ return outputWordIndex;
+}
+
+/* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
+ const DicNode *const terminalDicNode) {
+ // Get the number of spaces in the first suggestion
+ const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
+ // Get the number of characters in the first suggestion
+ const int length = terminalDicNode->getTotalNodeCodePointCount();
+ // Get the distance for the first word of the suggestion
+ const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
+
+ // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
+ // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
+ // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
+ // we are very confident.
+ // Expected space count is 1 ~ 5
+ static const int MIN_EXPECTED_SPACE_COUNT = 1;
+ static const int MAX_EXPECTED_SPACE_COUNT = 5;
+ // Expected length is about 4 ~ 30
+ static const int MIN_EXPECTED_LENGTH = 4;
+ static const int MAX_EXPECTED_LENGTH = 30;
+ // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
+ static const float MIN_EXPECTED_DISTANCE = 0.0;
+ static const float MAX_EXPECTED_DISTANCE = 2.0;
+ // This is not strict: it's where most stuff will be falling, but it's still fine if it's
+ // outside these values. We want to output a value that reflects all of these. Each factor
+ // contributes a bit.
+
+ // We need at least a space.
+ if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
+
+ // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
+ // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
+ // weight of the distance. Clamp to avoid overflows.
+ const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
+ : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
+ const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
+ * (MAX_EXPECTED_DISTANCE - clampedDistance)
+ / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
+ // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
+ // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
+ // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
+ const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
+ * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
+ // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
+ // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
+ // weight of the space count.
+ const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
+ * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
+ / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
+
+ return distanceContribution + lengthContribution + spaceContribution;
+}
+
+/* static */ int SuggestionsOutputUtils::outputShortcuts(
+ BinaryDictionaryShortcutIterator *const shortcutIt,
+ int outputWordIndex, const int finalScore, int *const outputCodePoints,
+ int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
+ bool isWhilelist;
+ int shortcutTargetStringLength;
+ shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetStringLength, &isWhilelist);
+ int shortcutScore;
+ int kind;
+ if (isWhilelist && sameAsTyped) {
+ shortcutScore = S_INT_MAX;
+ kind = Dictionary::KIND_WHITELIST;
+ } else {
+ // shortcut entry's score == its base entry's score - 1
+ shortcutScore = finalScore;
+ // Protection against int underflow
+ shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
+ kind = Dictionary::KIND_SHORTCUT;
+ }
+ outputTypes[outputWordIndex] = kind;
+ frequencies[outputWordIndex] = shortcutScore;
+ frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1;
+ const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH;
+ DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength,
+ &outputCodePoints[startIndex2]);
+ ++outputWordIndex;
+ }
+ return outputWordIndex;
+}
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/suggestions_output_utils.h b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.h
new file mode 100644
index 000000000..460e26082
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTIONS_OUTPUT_UTILS
+#define LATINIME_SUGGESTIONS_OUTPUT_UTILS
+
+#include "defines.h"
+
+namespace latinime {
+
+class BinaryDictionaryShortcutIterator;
+class DicNode;
+class DicTraverseSession;
+class Scoring;
+
+class SuggestionsOutputUtils {
+ public:
+ /**
+ * Outputs the final list of suggestions (i.e., terminal nodes).
+ */
+ static int outputSuggestions(const Scoring *const scoringPolicy,
+ DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints,
+ int *outputIndicesToPartialCommit, int *outputTypes,
+ int *outputAutoCommitFirstWordConfidence);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionsOutputUtils);
+
+ // Inputs longer than this will autocorrect if the suggestion is multi-word
+ static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
+
+ static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
+
+ static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
+ int outputWordIndex, const int finalScore, int *const outputCodePoints,
+ int *const frequencies, int *const outputTypes, const bool sameAsTyped);
+};
+} // namespace latinime
+#endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS
diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h
index 102e856f5..5ae3d2146 100644
--- a/native/jni/src/suggest/core/policy/scoring.h
+++ b/native/jni/src/suggest/core/policy/scoring.h
@@ -28,7 +28,7 @@ class DicTraverseSession;
class Scoring {
public:
virtual int calculateFinalScore(const float compoundDistance, const int inputSize,
- const bool forceCommit) const = 0;
+ const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit) const = 0;
virtual bool getMostProbableString(const DicTraverseSession *const traverseSession,
const int terminalSize, const float languageWeight, int *const outputCodePoints,
int *const type, int *const freq) const = 0;
@@ -43,6 +43,9 @@ class Scoring {
const int doubleLetterTerminalIndex,
const DoubleLetterLevel doubleLetterLevel) const = 0;
virtual bool doesAutoCorrectValidWord() const = 0;
+ virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
+ virtual bool sameAsTyped(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
protected:
Scoring() {}
diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h
index e935533f2..d3b8da0cc 100644
--- a/native/jni/src/suggest/core/policy/traversal.h
+++ b/native/jni/src/suggest/core/policy/traversal.h
@@ -41,11 +41,8 @@ class Traversal {
const DicNode *const dicNode) const = 0;
virtual ProximityType getProximityType(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const DicNode *const childDicNode) const = 0;
- virtual bool sameAsTyped(const DicTraverseSession *const traverseSession,
- const DicNode *const dicNode) const = 0;
virtual bool needsToTraverseAllUserInput() const = 0;
virtual float getMaxSpatialDistance() const = 0;
- virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
virtual int getDefaultExpandDicNodeSize() const = 0;
virtual int getMaxCacheSize(const int inputSize) const = 0;
virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 5377ec4e8..7cd237284 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -19,13 +19,11 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/dictionary/shortcut_utils.h"
+#include "suggest/core/dictionary/suggestions_output_utils.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/policy/scoring.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -33,9 +31,7 @@
namespace latinime {
// Initialization of class constants.
-const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
-const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
/**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -70,8 +66,8 @@ int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
}
PROF_END(1);
PROF_START(2);
- const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes,
- outputAutoCommitFirstWordConfidence);
+ const int size = SuggestionsOutputUtils::outputSuggestions(SCORING, tSession, frequencies,
+ outWords, outputIndices, outputTypes, outputAutoCommitFirstWordConfidence);
PROF_END(2);
PROF_CLOSE;
return size;
@@ -115,205 +111,6 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
}
/**
- * Outputs the final list of suggestions (i.e., terminal nodes).
- */
-int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
- int *outputAutoCommitFirstWordConfidence) const {
-#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
- const int terminalSize = 0;
-#else
- const int terminalSize = min(MAX_RESULTS,
- static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
-#endif
- DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
-
- for (int index = terminalSize - 1; index >= 0; --index) {
- traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
- }
-
- const float languageWeight = SCORING->getAdjustedLanguageWeight(
- traverseSession, terminals, terminalSize);
-
- int outputWordIndex = 0;
- // Insert most probable word at index == 0 as long as there is one terminal at least
- const bool hasMostProbableString =
- SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
- &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
- if (hasMostProbableString) {
- outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
- ++outputWordIndex;
- }
-
- // Initial value of the loop index for terminal nodes (words)
- int doubleLetterTerminalIndex = -1;
- DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
- SCORING->searchWordWithDoubleLetter(terminals, terminalSize,
- &doubleLetterTerminalIndex, &doubleLetterLevel);
-
- int maxScore = S_INT_MIN;
- // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
- // a long multiple words suggestion.
- // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
- // traverseSession->isPartiallyCommited() always returns false because we never auto partial
- // commit for now.
- const bool forceCommitMultiWords = (terminalSize > 0) ?
- TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
- && (traverseSession->isPartiallyCommited()
- || (traverseSession->getInputSize()
- >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
- && terminals[0].hasMultipleWords())) : false;
- // TODO: have partial commit work even with multiple pointers.
- const bool outputSecondWordFirstLetterInputIndex =
- traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
- if (terminalSize > 0) {
- // If we have no suggestions, don't write this
- outputAutoCommitFirstWordConfidence[0] =
- computeFirstWordConfidence(&terminals[0]);
- }
-
- // Output suggestion results here
- for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
- ++terminalIndex) {
- DicNode *terminalDicNode = &terminals[terminalIndex];
- if (DEBUG_GEO_FULL) {
- terminalDicNode->dump("OUT:");
- }
- const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost(
- terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
- const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
- + doubleLetterCost;
- const bool isPossiblyOffensiveWord =
- traverseSession->getDictionaryStructurePolicy()->getProbability(
- terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
- const bool isExactMatch = terminalDicNode->isExactMatch();
- const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
- // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
- // (e.g. "AMD" and "and")
- const bool isSafeExactMatch = isExactMatch
- && !(isPossiblyOffensiveWord && isFirstCharUppercase);
- const int outputTypeFlags =
- (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
- | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
-
- // Entries that are blacklisted or do not represent a word should not be output.
- const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
-
- // Increase output score of top typing suggestion to ensure autocorrection.
- // TODO: Better integration with java side autocorrection logic.
- const int finalScore = SCORING->calculateFinalScore(
- compoundDistance, traverseSession->getInputSize(),
- terminalDicNode->isExactMatch()
- || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
- || (isValidWord && SCORING->doesAutoCorrectValidWord()));
- if (maxScore < finalScore && isValidWord) {
- maxScore = finalScore;
- }
-
- // Don't output invalid words. However, we still need to submit their shortcuts if any.
- if (isValidWord) {
- outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
- frequencies[outputWordIndex] = finalScore;
- if (outputSecondWordFirstLetterInputIndex) {
- outputIndicesToPartialCommit[outputWordIndex] =
- terminalDicNode->getSecondWordFirstInputIndex(
- traverseSession->getProximityInfoState(0));
- } else {
- outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
- }
- // Populate the outputChars array with the suggested word.
- const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
- terminalDicNode->outputResult(&outputCodePoints[startIndex]);
- ++outputWordIndex;
- }
-
- if (!terminalDicNode->hasMultipleWords()) {
- BinaryDictionaryShortcutIterator shortcutIt(
- traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
- traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
- // Shortcut is not supported for multiple words suggestions.
- // TODO: Check shortcuts during traversal for multiple words suggestions.
- const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- const int shortcutBaseScore = SCORING->doesAutoCorrectValidWord() ?
- SCORING->calculateFinalScore(compoundDistance, traverseSession->getInputSize(),
- true /* forceCommit */) : finalScore;
- const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
- outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes,
- sameAsTyped);
- const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
- traverseSession->getProximityInfoState(0));
- for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
- if (outputSecondWordFirstLetterInputIndex) {
- outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
- } else {
- outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
- }
- }
- outputWordIndex = updatedOutputWordIndex;
- }
- DicNode::managedDelete(terminalDicNode);
- }
-
- if (hasMostProbableString) {
- SCORING->safetyNetForMostProbableString(terminalSize, maxScore,
- &outputCodePoints[0], &frequencies[0]);
- }
- return outputWordIndex;
-}
-
-int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
- // Get the number of spaces in the first suggestion
- const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
- // Get the number of characters in the first suggestion
- const int length = terminalDicNode->getTotalNodeCodePointCount();
- // Get the distance for the first word of the suggestion
- const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
-
- // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
- // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
- // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
- // we are very confident.
- // Expected space count is 1 ~ 5
- static const int MIN_EXPECTED_SPACE_COUNT = 1;
- static const int MAX_EXPECTED_SPACE_COUNT = 5;
- // Expected length is about 4 ~ 30
- static const int MIN_EXPECTED_LENGTH = 4;
- static const int MAX_EXPECTED_LENGTH = 30;
- // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
- static const float MIN_EXPECTED_DISTANCE = 0.0;
- static const float MAX_EXPECTED_DISTANCE = 2.0;
- // This is not strict: it's where most stuff will be falling, but it's still fine if it's
- // outside these values. We want to output a value that reflects all of these. Each factor
- // contributes a bit.
-
- // We need at least a space.
- if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
-
- // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
- // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
- // weight of the distance. Clamp to avoid overflows.
- const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
- : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
- const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
- * (MAX_EXPECTED_DISTANCE - clampedDistance)
- / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
- // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
- // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
- // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
- const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
- * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
- // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
- // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
- // weight of the space count.
- const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
- * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
- / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
-
- return distanceContribution + lengthContribution + spaceContribution;
-}
-
-/**
* Expands the dicNodes in the current search priority queue by advancing to the possible child
* nodes based on the next touch point(s) (or no touch points for lookahead)
*/
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index b20343d29..5feb04faa 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -55,18 +55,11 @@ class Suggest : public SuggestInterface {
DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest);
void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
const bool spaceSubstitution) const;
- int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
- int *outputAutoCommitFirstWordConfidence) const;
- int computeFirstWordConfidence(const DicNode *const terminalDicNode) const;
void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void processExpandedDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
- float getAutocorrectScore(DicTraverseSession *traverseSession, DicNode *dicNode) const;
- void generateFeatures(
- DicTraverseSession *traverseSession, DicNode *dicNode, float *features) const;
void processDicNodeAsOmission(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void processDicNodeAsDigraph(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void processDicNodeAsTransposition(DicTraverseSession *traverseSession,
@@ -79,13 +72,8 @@ class Suggest : public SuggestInterface {
void processDicNodeAsMatch(DicTraverseSession *traverseSession,
DicNode *childDicNode) const;
- // Inputs longer than this will autocorrect if the suggestion is multi-word
- static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
static const int MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE;
- // Threshold for autocorrection classifier
- static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
-
const Traversal *const TRAVERSAL;
const Scoring *const SCORING;
const Weighting *const WEIGHTING;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index 104eb2a7a..7b332064c 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -22,6 +22,12 @@ const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120;
const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
+
+const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f;
+const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f;
+const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f;
+const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
+
// TODO: Unlimit max cache dic node size
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index 7d4b5c3c7..de7410d39 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -32,6 +32,11 @@ class ScoringParams {
static const int MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
static const int THRESHOLD_SHORT_WORD_LENGTH;
+ static const float EXACT_MATCH_PROMOTION;
+ static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+
// Numerically optimized parameters (currently for tap typing only).
// TODO: add ability to modify these constants programmatically.
// TODO: explore optimization of gesture parameters.
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 56ffcc93e..186e3ba08 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -18,7 +18,9 @@
#define LATINIME_TYPING_SCORING_H
#include "defines.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/policy/scoring.h"
+#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/policyimpl/typing/scoring_params.h"
namespace latinime {
@@ -52,12 +54,26 @@ class TypingScoring : public Scoring {
}
AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance,
- const int inputSize, const bool forceCommit) const {
+ const int inputSize, const ErrorTypeUtils::ErrorType containedErrorTypes,
+ const bool forceCommit) const {
const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
+ static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
- const float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE
- - compoundDistance / maxDistance
- + (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f);
+ float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance;
+ if (forceCommit) {
+ score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD;
+ }
+ if (ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
+ score += ScoringParams::EXACT_MATCH_PROMOTION;
+ if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) {
+ score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) {
+ score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
+ score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+ }
+ }
return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);
}
@@ -71,6 +87,16 @@ class TypingScoring : public Scoring {
return false;
}
+ AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
+ return true;
+ }
+
+ AK_FORCE_INLINE bool sameAsTyped(
+ const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
+ return traverseSession->getProximityInfoState(0)->sameAsTyped(
+ dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(TypingScoring);
static const TypingScoring sInstance;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index fd0ac9eb6..3db00ad3a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -137,20 +137,10 @@ class TypingTraversal : public Traversal {
return ScoringParams::MAX_SPATIAL_DISTANCE;
}
- AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
- return true;
- }
-
AK_FORCE_INLINE int getDefaultExpandDicNodeSize() const {
return DicNodeVector::DEFAULT_NODES_SIZE_FOR_OPTIMIZATION;
}
- AK_FORCE_INLINE bool sameAsTyped(
- const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
- return traverseSession->getProximityInfoState(0)->sameAsTyped(
- dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
- }
-
AK_FORCE_INLINE int getMaxCacheSize(const int inputSize) const {
return (inputSize <= 1) ? ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT
: ScoringParams::MAX_CACHE_DIC_NODE_SIZE;