diff options
-rw-r--r-- | dictionaries/en_GB_wordlist.combined.gz | bin | 859873 -> 859882 bytes | |||
-rw-r--r-- | dictionaries/en_US_wordlist.combined.gz | bin | 876950 -> 876956 bytes | |||
-rw-r--r-- | dictionaries/en_wordlist.combined.gz | bin | 908317 -> 908324 bytes | |||
-rw-r--r-- | dictionaries/fr_wordlist.combined.gz | bin | 1107091 -> 1106515 bytes | |||
-rw-r--r-- | dictionaries/pt_BR_wordlist.combined.gz | bin | 878434 -> 878432 bytes | |||
-rw-r--r-- | java/res/raw/main_en.dict | bin | 1069824 -> 1069833 bytes | |||
-rw-r--r-- | java/res/raw/main_fr.dict | bin | 1329615 -> 1329175 bytes | |||
-rw-r--r-- | java/res/raw/main_pt_br.dict | bin | 1091993 -> 1091988 bytes | |||
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java | 53 | ||||
-rw-r--r-- | native/jni/src/suggest/core/dicnode/dic_node.h | 8 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/typing/scoring_params.cpp | 20 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/typing/scoring_params.h | 1 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/typing/typing_weighting.h | 5 |
13 files changed, 57 insertions, 30 deletions
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz Binary files differindex 5e2a9df4f..ff78db7f6 100644 --- a/dictionaries/en_GB_wordlist.combined.gz +++ b/dictionaries/en_GB_wordlist.combined.gz diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz Binary files differindex 33ef1c136..4edc96712 100644 --- a/dictionaries/en_US_wordlist.combined.gz +++ b/dictionaries/en_US_wordlist.combined.gz diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz Binary files differindex c39f0526d..95a7369e1 100644 --- a/dictionaries/en_wordlist.combined.gz +++ b/dictionaries/en_wordlist.combined.gz diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz Binary files differindex 4b552617d..0763b627c 100644 --- a/dictionaries/fr_wordlist.combined.gz +++ b/dictionaries/fr_wordlist.combined.gz diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz Binary files differindex 83dbe79c4..0dd847244 100644 --- a/dictionaries/pt_BR_wordlist.combined.gz +++ b/dictionaries/pt_BR_wordlist.combined.gz diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict Binary files differindex 120e19b60..3a41257e5 100644 --- a/java/res/raw/main_en.dict +++ b/java/res/raw/main_en.dict diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict Binary files differindex fb43a1a18..31fb2af85 100644 --- a/java/res/raw/main_fr.dict +++ b/java/res/raw/main_fr.dict diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict Binary files differindex 8c1449919..557d46e89 100644 --- a/java/res/raw/main_pt_br.dict +++ b/java/res/raw/main_pt_br.dict diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 2f5e847f7..a54344ab5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -384,13 +384,12 @@ public final class BinaryDictInputOutput { /** * Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches - * it in the 'actualSize' member of the node, then returns it. + * it in the 'actualSize' member of the node. * * @param node the node to compute the maximum size of. * @param options file format options. - * @return the size of the node. */ - private static int calculateNodeMaximumSize(final Node node, final FormatOptions options) { + private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) { int size = getGroupCountSize(node); for (CharGroup g : node.mData) { final int groupSize = getCharGroupMaximumSize(g, options); @@ -401,7 +400,6 @@ public final class BinaryDictInputOutput { size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } node.mCachedSize = size; - return size; } /** @@ -591,22 +589,48 @@ public final class BinaryDictInputOutput { } /** - * Computes the byte size of a list of nodes and updates each node cached position. + * Initializes the cached addresses of nodes from their size. + * + * @param flatNodes the array of nodes. + * @param formatOptions file format options. + * @return the byte size of the entire stack. + */ + private static int initializeNodesCachedAddresses(final ArrayList<Node> flatNodes, + final FormatOptions formatOptions) { + int nodeOffset = 0; + for (final Node n : flatNodes) { + n.mCachedAddressBeforeUpdate = nodeOffset; + int groupCountSize = getGroupCountSize(n); + int groupOffset = 0; + for (final CharGroup g : n.mData) { + g.mCachedAddress = groupCountSize + nodeOffset + groupOffset; + groupOffset += g.mCachedSize; + } + final int nodeSize = groupCountSize + groupOffset + + (formatOptions.mSupportsDynamicUpdate + ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); + nodeOffset += n.mCachedSize; + } + return nodeOffset; + } + + /** + * Updates the cached addresses of nodes after recomputing their new positions. * * @param flatNodes the array of nodes. * @param formatOptions file format options. * @return the byte size of the entire stack. */ - // TODO: rename this method when all it does is fill back the cached addresses before update - // with cached addresses after update. - private static int stackNodes(final ArrayList<Node> flatNodes, + private static int updateNodeCachedAddresses(final ArrayList<Node> flatNodes, final FormatOptions formatOptions) { int nodeOffset = 0; for (final Node n : flatNodes) { n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate; int groupCountSize = getGroupCountSize(n); int groupOffset = 0; - for (CharGroup g : n.mData) { + for (final CharGroup g : n.mData) { + // TODO: just copy cached address after update into cached address before update + // when the two fields are separated. g.mCachedAddress = groupCountSize + nodeOffset + groupOffset; groupOffset += g.mCachedSize; } @@ -614,6 +638,7 @@ public final class BinaryDictInputOutput { + (formatOptions.mSupportsDynamicUpdate ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); if (nodeSize != n.mCachedSize) { + // TODO: remove this test when the addresses are separated throw new RuntimeException("Bug : Stored and computed node size differ"); } if (nodeOffset != n.mCachedAddressAfterUpdate) { @@ -665,12 +690,8 @@ public final class BinaryDictInputOutput { private static ArrayList<Node> computeAddresses(final FusionDictionary dict, final ArrayList<Node> flatNodes, final FormatOptions formatOptions) { // First get the worst possible sizes and offsets - int offset = 0; - for (final Node n : flatNodes) { - n.mCachedAddressAfterUpdate = offset; - offset += calculateNodeMaximumSize(n, formatOptions); - } - offset = stackNodes(flatNodes, formatOptions); + for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions); + final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions); MakedictLog.i("Compressing the array addresses. Original size : " + offset); MakedictLog.i("(Recursively seen size : " + offset + ")"); @@ -689,7 +710,7 @@ public final class BinaryDictInputOutput { nodeStartOffset += newNodeSize; changesDone |= changed; } - stackNodes(flatNodes, formatOptions); + updateNodeCachedAddresses(flatNodes, formatOptions); ++passes; if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); } while (changesDone); diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index e22e999f2..cbed2043c 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -328,12 +328,12 @@ class DicNode { return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0; } - float getProximityCorrectionCount() const { - return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount()); + int getProximityCorrectionCount() const { + return mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount(); } - float getEditCorrectionCount() const { - return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount()); + int getEditCorrectionCount() const { + return mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount(); } // Used to prune nodes diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index 2659e4a23..a8f797c5c 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -22,12 +22,14 @@ const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f; const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40; const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120; const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f; -const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 125; +// TODO: Unlimit max cache dic node size +const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170; const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4; const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f; -const float ScoringParams::PROXIMITY_COST = 0.086f; -const float ScoringParams::FIRST_PROXIMITY_COST = 0.104f; +const float ScoringParams::PROXIMITY_COST = 0.095f; +const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.102f; +const float ScoringParams::FIRST_PROXIMITY_COST = 0.019f; const float ScoringParams::OMISSION_COST = 0.458f; const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.491f; const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.582f; @@ -35,19 +37,19 @@ const float ScoringParams::INSERTION_COST = 0.730f; const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.586f; const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.70f; const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.623f; -const float ScoringParams::TRANSPOSITION_COST = 0.516f; +const float ScoringParams::TRANSPOSITION_COST = 0.526f; const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f; const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f; -const float ScoringParams::SUBSTITUTION_COST = 0.403f; +const float ScoringParams::SUBSTITUTION_COST = 0.383f; const float ScoringParams::COST_NEW_WORD = 0.042f; const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f; const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f; const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f; const float ScoringParams::COST_LOOKAHEAD = 0.073f; -const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.105f; -const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.038f; -const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.444f; +const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.093f; +const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.041f; +const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.447f; const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f; const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f; -const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.06f; +const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h index c39c41779..4ebcc7dc3 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -36,6 +36,7 @@ class ScoringParams { // TODO: explore optimization of gesture parameters. static const float DISTANCE_WEIGHT_LENGTH; static const float PROXIMITY_COST; + static const float FIRST_CHAR_PROXIMITY_COST; static const float FIRST_PROXIMITY_COST; static const float OMISSION_COST; static const float OMISSION_COST_SAME_CHAR; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 830aa80de..1bb160738 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -81,8 +81,11 @@ class TypingWeighting : public Weighting { const bool isFirstChar = pointIndex == 0; const bool isProximity = isProximityDicNode(traverseSession, dicNode); - float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST + float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_CHAR_PROXIMITY_COST : ScoringParams::PROXIMITY_COST) : 0.0f; + if (isProximity && dicNode->getProximityCorrectionCount() == 0) { + cost += ScoringParams::FIRST_PROXIMITY_COST; + } if (dicNode->getNodeCodePointCount() == 2) { // At the second character of the current word, we check if the first char is uppercase // and the word is a second or later word of a multiple word suggestion. We demote it |