aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dictionaries/en_GB_wordlist.combined.gzbin859873 -> 859882 bytes
-rw-r--r--dictionaries/en_US_wordlist.combined.gzbin876950 -> 876956 bytes
-rw-r--r--dictionaries/en_wordlist.combined.gzbin908317 -> 908324 bytes
-rw-r--r--dictionaries/fr_wordlist.combined.gzbin1107091 -> 1106515 bytes
-rw-r--r--dictionaries/pt_BR_wordlist.combined.gzbin878434 -> 878432 bytes
-rw-r--r--java/res/raw/main_en.dictbin1069824 -> 1069833 bytes
-rw-r--r--java/res/raw/main_fr.dictbin1329615 -> 1329175 bytes
-rw-r--r--java/res/raw/main_pt_br.dictbin1091993 -> 1091988 bytes
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java53
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h8
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h5
13 files changed, 57 insertions, 30 deletions
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz
index 5e2a9df4f..ff78db7f6 100644
--- a/dictionaries/en_GB_wordlist.combined.gz
+++ b/dictionaries/en_GB_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz
index 33ef1c136..4edc96712 100644
--- a/dictionaries/en_US_wordlist.combined.gz
+++ b/dictionaries/en_US_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz
index c39f0526d..95a7369e1 100644
--- a/dictionaries/en_wordlist.combined.gz
+++ b/dictionaries/en_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz
index 4b552617d..0763b627c 100644
--- a/dictionaries/fr_wordlist.combined.gz
+++ b/dictionaries/fr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz
index 83dbe79c4..0dd847244 100644
--- a/dictionaries/pt_BR_wordlist.combined.gz
+++ b/dictionaries/pt_BR_wordlist.combined.gz
Binary files differ
diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict
index 120e19b60..3a41257e5 100644
--- a/java/res/raw/main_en.dict
+++ b/java/res/raw/main_en.dict
Binary files differ
diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict
index fb43a1a18..31fb2af85 100644
--- a/java/res/raw/main_fr.dict
+++ b/java/res/raw/main_fr.dict
Binary files differ
diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict
index 8c1449919..557d46e89 100644
--- a/java/res/raw/main_pt_br.dict
+++ b/java/res/raw/main_pt_br.dict
Binary files differ
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 2f5e847f7..a54344ab5 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -384,13 +384,12 @@ public final class BinaryDictInputOutput {
/**
* Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches
- * it in the 'actualSize' member of the node, then returns it.
+ * it in the 'actualSize' member of the node.
*
* @param node the node to compute the maximum size of.
* @param options file format options.
- * @return the size of the node.
*/
- private static int calculateNodeMaximumSize(final Node node, final FormatOptions options) {
+ private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) {
int size = getGroupCountSize(node);
for (CharGroup g : node.mData) {
final int groupSize = getCharGroupMaximumSize(g, options);
@@ -401,7 +400,6 @@ public final class BinaryDictInputOutput {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
node.mCachedSize = size;
- return size;
}
/**
@@ -591,22 +589,48 @@ public final class BinaryDictInputOutput {
}
/**
- * Computes the byte size of a list of nodes and updates each node cached position.
+ * Initializes the cached addresses of nodes from their size.
+ *
+ * @param flatNodes the array of nodes.
+ * @param formatOptions file format options.
+ * @return the byte size of the entire stack.
+ */
+ private static int initializeNodesCachedAddresses(final ArrayList<Node> flatNodes,
+ final FormatOptions formatOptions) {
+ int nodeOffset = 0;
+ for (final Node n : flatNodes) {
+ n.mCachedAddressBeforeUpdate = nodeOffset;
+ int groupCountSize = getGroupCountSize(n);
+ int groupOffset = 0;
+ for (final CharGroup g : n.mData) {
+ g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
+ groupOffset += g.mCachedSize;
+ }
+ final int nodeSize = groupCountSize + groupOffset
+ + (formatOptions.mSupportsDynamicUpdate
+ ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
+ nodeOffset += n.mCachedSize;
+ }
+ return nodeOffset;
+ }
+
+ /**
+ * Updates the cached addresses of nodes after recomputing their new positions.
*
* @param flatNodes the array of nodes.
* @param formatOptions file format options.
* @return the byte size of the entire stack.
*/
- // TODO: rename this method when all it does is fill back the cached addresses before update
- // with cached addresses after update.
- private static int stackNodes(final ArrayList<Node> flatNodes,
+ private static int updateNodeCachedAddresses(final ArrayList<Node> flatNodes,
final FormatOptions formatOptions) {
int nodeOffset = 0;
for (final Node n : flatNodes) {
n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate;
int groupCountSize = getGroupCountSize(n);
int groupOffset = 0;
- for (CharGroup g : n.mData) {
+ for (final CharGroup g : n.mData) {
+ // TODO: just copy cached address after update into cached address before update
+ // when the two fields are separated.
g.mCachedAddress = groupCountSize + nodeOffset + groupOffset;
groupOffset += g.mCachedSize;
}
@@ -614,6 +638,7 @@ public final class BinaryDictInputOutput {
+ (formatOptions.mSupportsDynamicUpdate
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
if (nodeSize != n.mCachedSize) {
+ // TODO: remove this test when the addresses are separated
throw new RuntimeException("Bug : Stored and computed node size differ");
}
if (nodeOffset != n.mCachedAddressAfterUpdate) {
@@ -665,12 +690,8 @@ public final class BinaryDictInputOutput {
private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
// First get the worst possible sizes and offsets
- int offset = 0;
- for (final Node n : flatNodes) {
- n.mCachedAddressAfterUpdate = offset;
- offset += calculateNodeMaximumSize(n, formatOptions);
- }
- offset = stackNodes(flatNodes, formatOptions);
+ for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions);
+ final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions);
MakedictLog.i("Compressing the array addresses. Original size : " + offset);
MakedictLog.i("(Recursively seen size : " + offset + ")");
@@ -689,7 +710,7 @@ public final class BinaryDictInputOutput {
nodeStartOffset += newNodeSize;
changesDone |= changed;
}
- stackNodes(flatNodes, formatOptions);
+ updateNodeCachedAddresses(flatNodes, formatOptions);
++passes;
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
} while (changesDone);
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index e22e999f2..cbed2043c 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -328,12 +328,12 @@ class DicNode {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
}
- float getProximityCorrectionCount() const {
- return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount());
+ int getProximityCorrectionCount() const {
+ return mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount();
}
- float getEditCorrectionCount() const {
- return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount());
+ int getEditCorrectionCount() const {
+ return mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount();
}
// Used to prune nodes
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index 2659e4a23..a8f797c5c 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -22,12 +22,14 @@ const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120;
const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
-const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 125;
+// TODO: Unlimit max cache dic node size
+const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f;
-const float ScoringParams::PROXIMITY_COST = 0.086f;
-const float ScoringParams::FIRST_PROXIMITY_COST = 0.104f;
+const float ScoringParams::PROXIMITY_COST = 0.095f;
+const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.102f;
+const float ScoringParams::FIRST_PROXIMITY_COST = 0.019f;
const float ScoringParams::OMISSION_COST = 0.458f;
const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.491f;
const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.582f;
@@ -35,19 +37,19 @@ const float ScoringParams::INSERTION_COST = 0.730f;
const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.586f;
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.70f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.623f;
-const float ScoringParams::TRANSPOSITION_COST = 0.516f;
+const float ScoringParams::TRANSPOSITION_COST = 0.526f;
const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
-const float ScoringParams::SUBSTITUTION_COST = 0.403f;
+const float ScoringParams::SUBSTITUTION_COST = 0.383f;
const float ScoringParams::COST_NEW_WORD = 0.042f;
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f;
const float ScoringParams::COST_LOOKAHEAD = 0.073f;
-const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.105f;
-const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.038f;
-const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.444f;
+const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.093f;
+const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.041f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.447f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
-const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.06f;
+const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index c39c41779..4ebcc7dc3 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -36,6 +36,7 @@ class ScoringParams {
// TODO: explore optimization of gesture parameters.
static const float DISTANCE_WEIGHT_LENGTH;
static const float PROXIMITY_COST;
+ static const float FIRST_CHAR_PROXIMITY_COST;
static const float FIRST_PROXIMITY_COST;
static const float OMISSION_COST;
static const float OMISSION_COST_SAME_CHAR;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 830aa80de..1bb160738 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -81,8 +81,11 @@ class TypingWeighting : public Weighting {
const bool isFirstChar = pointIndex == 0;
const bool isProximity = isProximityDicNode(traverseSession, dicNode);
- float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST
+ float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_CHAR_PROXIMITY_COST
: ScoringParams::PROXIMITY_COST) : 0.0f;
+ if (isProximity && dicNode->getProximityCorrectionCount() == 0) {
+ cost += ScoringParams::FIRST_PROXIMITY_COST;
+ }
if (dicNode->getNodeCodePointCount() == 2) {
// At the second character of the current word, we check if the first char is uppercase
// and the word is a second or later word of a multiple word suggestion. We demote it