aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-10-28 17:11:14 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-10-28 17:11:14 +0900
commit8a809f3433c3b96a3a9002a5bdc166f8c689eeb0 (patch)
tree246c42f5996ce3b01764704cb21c2bf9f004642a /native/jni/src
parentdd5737b0fa3bf68f90534457ed8d23437caf5420 (diff)
downloadlatinime-8a809f3433c3b96a3a9002a5bdc166f8c689eeb0.tar.gz
latinime-8a809f3433c3b96a3a9002a5bdc166f8c689eeb0.tar.xz
latinime-8a809f3433c3b96a3a9002a5bdc166f8c689eeb0.zip
Improve space substitution error correction.
Bug: 17432052 [Category diff] +1 262 -1 93 +2 2 -2 18 +3 18 -3 2 +4 111 -4 148 +5 295 -5 217 +6 51 -6 276 +7 139 -7 124 [Weighted category diff] +1 276 -1 100 +2 4 -2 20 +3 20 -3 4 +4 118 -4 160 +5 309 -5 225 +6 52 -6 298 +7 163 -7 135 show diff for ./en_user_log_phones_2011_08.csv +1 173 -1 28 +2 2 -2 17 +3 17 -3 2 +4 63 -4 82 +5 120 -5 51 +6 24 -6 220 +7 88 -7 87 Change-Id: I9d673acb0ff632828ae2e0ead56e76e3a20411c6
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp2
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h2
-rw-r--r--native/jni/src/suggest/core/suggest.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h2
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h10
6 files changed, 14 insertions, 11 deletions
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index a06e7d070..450203d98 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -119,7 +119,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
return weighting->getSubstitutionCost()
+ weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
case CT_NEW_WORD_SPACE_OMISSION:
- return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG);
+ return weighting->getSpaceOmissionCost(traverseSession, dicNode, inputStateG);
case CT_MATCH:
return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
case CT_COMPLETION:
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index bd6b3cf41..863c4eabe 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -57,7 +57,7 @@ class Weighting {
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
- virtual float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ virtual float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *const inputStateG) const = 0;
virtual float getNewWordBigramLanguageCost(
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index c71526293..68a36454e 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -160,8 +160,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
// TODO: Remove. Do not prune node here.
const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
// Process for handling space substitution (e.g., hevis => he is)
- if (allowsErrorCorrections
- && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
+ if (TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
}
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index 6a2db687d..a6f9a8b23 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -48,17 +48,17 @@ const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
-const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.334f;
+const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.33f;
+const float ScoringParams::SPACE_OMISSION_COST = 0.1f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.37972f;
const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
-const float ScoringParams::COST_NEW_WORD = 0.0314f;
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
const float ScoringParams::COST_COMPLETION = 0.00624f;
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
-const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.3482f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index 731424f3d..b8f889559 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -56,9 +56,9 @@ class ScoringParams {
static const float INSERTION_COST_FIRST_CHAR;
static const float TRANSPOSITION_COST;
static const float SPACE_SUBSTITUTION_COST;
+ static const float SPACE_OMISSION_COST;
static const float ADDITIONAL_PROXIMITY_COST;
static const float SUBSTITUTION_COST;
- static const float COST_NEW_WORD;
static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
static const float DISTANCE_WEIGHT_LANGUAGE;
static const float COST_FIRST_COMPLETION;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 84077174d..1338ac81a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -150,9 +150,10 @@ class TypingWeighting : public Weighting {
return cost + weightedDistance;
}
- float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
- return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier();
+ const float cost = ScoringParams::SPACE_OMISSION_COST;
+ return cost * traverseSession->getMultiWordCostMultiplier();
}
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
@@ -202,7 +203,10 @@ class TypingWeighting : public Weighting {
AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const {
- const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD;
+ const int inputIndex = dicNode->getInputIndex(0);
+ const float distanceToSpaceKey = traverseSession->getProximityInfoState(0)
+ ->getPointToKeyLength(inputIndex, KEYCODE_SPACE);
+ const float cost = ScoringParams::SPACE_SUBSTITUTION_COST * distanceToSpaceKey;
return cost * traverseSession->getMultiWordCostMultiplier();
}