diff options
Diffstat (limited to 'native/jni/src')
5 files changed, 19 insertions, 14 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 92783dec7..4225bb3e5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -219,7 +219,7 @@ class DicNode { return (prevWordLen == 1 && currentWordLen == 1); } - bool isCapitalized() const { + bool isFirstCharUppercase() const { const int c = getOutputWordBuf()[0]; return isAsciiUpper(c); } diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index 299ca83ab..f87989286 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -39,7 +39,7 @@ const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f; const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f; const float ScoringParams::SUBSTITUTION_COST = 0.403f; const float ScoringParams::COST_NEW_WORD = 0.042f; -const float ScoringParams::COST_NEW_WORD_CAPITALIZED = 0.174f; +const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f; const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f; const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f; const float ScoringParams::COST_LOOKAHEAD = 0.073f; @@ -48,5 +48,5 @@ const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.038f; const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.444f; const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f; const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f; -const float ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT = 0.1f; +const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.06f; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h index 8f104b362..53ac999c1 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -48,7 +48,7 @@ class ScoringParams { static const float ADDITIONAL_PROXIMITY_COST; static const float SUBSTITUTION_COST; static const float COST_NEW_WORD; - static const float COST_NEW_WORD_CAPITALIZED; + static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE; static const float DISTANCE_WEIGHT_LANGUAGE; static const float COST_FIRST_LOOKAHEAD; static const float COST_LOOKAHEAD; @@ -57,7 +57,7 @@ class ScoringParams { static const float HAS_MULTI_WORD_TERMINAL_COST; static const float TYPING_BASE_OUTPUT_SCORE; static const float TYPING_MAX_OUTPUT_SCORE_PER_INPUT; - static const float MAX_NORM_DISTANCE_FOR_EDIT; + static const float NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT; private: DISALLOW_IMPLICIT_CONSTRUCTORS(ScoringParams); diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index fb1fb79d1..12110d54f 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -39,7 +39,7 @@ class TypingTraversal : public Traversal { AK_FORCE_INLINE bool allowsErrorCorrections(const DicNode *const dicNode) const { return dicNode->getNormalizedSpatialDistance() - < ScoringParams::MAX_NORM_DISTANCE_FOR_EDIT; + < ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT; } AK_FORCE_INLINE bool isOmission(const DicTraverseSession *const traverseSession, diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index e6fa1bdc4..3938c0ec5 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -80,8 +80,18 @@ class TypingWeighting : public Weighting { const bool isFirstChar = pointIndex == 0; const bool isProximity = isProximityDicNode(traverseSession, dicNode); - const float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST + float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST : ScoringParams::PROXIMITY_COST) : 0.0f; + if (dicNode->getDepth() == 2) { + // At the second character of the current word, we check if the first char is uppercase + // and the word is a second or later word of a multiple word suggestion. We demote it + // if so. + const bool isSecondOrLaterWordFirstCharUppercase = + dicNode->hasMultipleWords() && dicNode->isFirstCharUppercase(); + if (isSecondOrLaterWordFirstCharUppercase) { + cost += ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE; + } + } return weightedDistance + cost; } @@ -129,10 +139,7 @@ class TypingWeighting : public Weighting { float getNewWordCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { - const bool isCapitalized = dicNode->isCapitalized(); - const float cost = isCapitalized ? - ScoringParams::COST_NEW_WORD_CAPITALIZED : ScoringParams::COST_NEW_WORD; - return cost * traverseSession->getMultiWordCostMultiplier(); + return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier(); } float getNewWordBigramCost(const DicTraverseSession *const traverseSession, @@ -174,9 +181,7 @@ class TypingWeighting : public Weighting { AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { - const bool isCapitalized = dicNode->isCapitalized(); - const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + (isCapitalized ? - ScoringParams::COST_NEW_WORD_CAPITALIZED : ScoringParams::COST_NEW_WORD); + const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD; return cost * traverseSession->getMultiWordCostMultiplier(); } |