diff options
13 files changed, 85 insertions, 84 deletions
diff --git a/java/res/values-sw/strings.xml b/java/res/values-sw/strings.xml index 50ae4dcae..cd31740b6 100644 --- a/java/res/values-sw/strings.xml +++ b/java/res/values-sw/strings.xml @@ -75,7 +75,7 @@ <string name="gesture_floating_preview_text_summary" msgid="4472696213996203533">"Onyesha neno lililopendekezwa unapoonyesha ishara"</string> <string name="added_word" msgid="8993883354622484372">"<xliff:g id="WORD">%s</xliff:g> : Imehifadhiwa"</string> <string name="label_go_key" msgid="1635148082137219148">"Nenda"</string> - <string name="label_next_key" msgid="362972844525672568">"Ifuatayo"</string> + <string name="label_next_key" msgid="362972844525672568">"Inayofuata"</string> <string name="label_previous_key" msgid="1211868118071386787">"Iliyotangulia"</string> <string name="label_done_key" msgid="2441578748772529288">"Kwisha"</string> <string name="label_send_key" msgid="2815056534433717444">"Tuma"</string> diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index c355fd60a..dd3b49f58 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -216,11 +216,40 @@ public final class BinaryDictionary extends Dictionary { // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni // calls when checking for changes in an entire dictionary. - public boolean isValidBigram(final String word1, final String word2) { - if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; + public boolean isValidBigram(final String word0, final String word1) { + if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return false; + final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); - final int[] codePoints2 = StringUtils.toCodePointArray(word2); - return isValidBigramNative(mNativeDict, codePoints1, codePoints2); + return isValidBigramNative(mNativeDict, codePoints0, codePoints1); + } + + // Add a unigram entry to binary dictionary in native code. + public void addUnigramWord(final String word, final int probability) { + if (TextUtils.isEmpty(word)) { + return; + } + final int[] codePoints = StringUtils.toCodePointArray(word); + addUnigramWordNative(mNativeDict, codePoints, probability); + } + + // Add a bigram entry to binary dictionary in native code. + public void addBigramWords(final String word0, final String word1, final int probability) { + if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { + return; + } + final int[] codePoints0 = StringUtils.toCodePointArray(word0); + final int[] codePoints1 = StringUtils.toCodePointArray(word1); + addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability); + } + + // Remove a bigram entry form binary dictionary in native code. + public void removeBigramWords(final String word0, final String word1) { + if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { + return; + } + final int[] codePoints0 = StringUtils.toCodePointArray(word0); + final int[] codePoints1 = StringUtils.toCodePointArray(word1); + removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); } @Override diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java index 98b58c4f2..ed047e13a 100644 --- a/java/src/com/android/inputmethod/research/ResearchLogger.java +++ b/java/src/com/android/inputmethod/research/ResearchLogger.java @@ -1760,7 +1760,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang public static void suddenJumpingTouchEventHandler_onTouchEvent(final MotionEvent me) { if (me != null) { getInstance().enqueueEvent(LOGSTATEMENT_SUDDENJUMPINGTOUCHEVENTHANDLER_ONTOUCHEVENT, - me.toString()); + MotionEvent.obtain(me)); } } diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index be40c9d83..973da67e4 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -504,6 +504,12 @@ class DicNode { if (!right->isUsed()) { return false; } + // Promote exact matches to prevent them from being pruned. + const bool leftExactMatch = isExactMatch(); + const bool rightExactMatch = right->isExactMatch(); + if (leftExactMatch != rightExactMatch) { + return leftExactMatch; + } const float diff = right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance(); static const float MIN_DIFF = 0.000001f; diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index ff304d2b2..708800938 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -123,9 +123,10 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); bigramsIt.hasNext(); /* no-op */) { bigramsIt.next(); - const int length = BinaryFormat::getWordAtAddress( - mBinaryDictionaryInfo->getDictRoot(), bigramsIt.getBigramPos(), - MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); + const int length = mBinaryDictionaryInfo->getStructurePolicy()-> + getCodePointsAndProbabilityAndReturnCodePointCount( + mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, + bigramBuffer, &unigramProbability); // inputSize == 0 means we are trying to find bigram predictions. if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { @@ -153,18 +154,8 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_VALID_WORD == pos) return 0; - const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); - const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; - if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { - BinaryFormat::getCodePointAndForwardPointer(root, &pos); - } else { - pos = BinaryFormat::skipOtherCharacters(root, pos); - } - pos = BinaryFormat::skipProbability(flags, pos); - pos = BinaryFormat::skipChildrenPosition(flags, pos); - pos = BinaryFormat::skipShortcuts(root, flags, pos); - return pos; + return BinaryFormat::getBigramListPositionForWordPosition( + mBinaryDictionaryInfo->getDictRoot(), pos); } bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h index 9557d8ce7..d3d597b5f 100644 --- a/native/jni/src/suggest/core/dictionary/binary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_format.h @@ -71,8 +71,9 @@ class BinaryFormat { static bool hasChildrenInFlags(const uint8_t flags); static int getTerminalPosition(const uint8_t *const root, const int *const inWord, const int length, const bool forceLowerCaseSearch); - static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, - int *outWord, int *outUnigramProbability); + static int getCodePointsAndProbabilityAndReturnCodePointCount( + const uint8_t *const root, const int nodePos, const int maxCodePointCount, + int *outCodePoints, int *outUnigramProbability); static int getBigramListPositionForWordPosition(const uint8_t *const root, int position); private: @@ -342,8 +343,9 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root, * outUnigramProbability: a pointer to an int to write the probability into. * Return value : the length of the word, of 0 if the word was not found. */ -AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, - const int maxDepth, int *outWord, int *outUnigramProbability) { +AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount( + const uint8_t *const root, const int nodePos, + const int maxCodePointCount, int *outCodePoints, int *outUnigramProbability) { int pos = 0; int wordPos = 0; @@ -353,7 +355,7 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co // The only reason we count nodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. - for (int loopCount = maxDepth; loopCount > 0; --loopCount) { + for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { int lastCandidateGroupPos = 0; // Let's loop through char groups in this node searching for either the terminal // or one of its ascendants. @@ -362,17 +364,17 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co const int startPos = pos; const uint8_t flags = getFlagsAndForwardPointer(root, &pos); const int character = getCodePointAndForwardPointer(root, &pos); - if (address == startPos) { + if (nodePos == startPos) { // We found the address. Copy the rest of the word in the buffer and return // the length. - outWord[wordPos] = character; + outCodePoints[wordPos] = character; if (FLAG_HAS_MULTIPLE_CHARS & flags) { int nextChar = getCodePointAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug - int charCount = maxDepth; + int charCount = maxCodePointCount; while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { - outWord[++wordPos] = nextChar; + outCodePoints[++wordPos] = nextChar; nextChar = getCodePointAndForwardPointer(root, &pos); } } @@ -399,7 +401,7 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co if (hasChildren) { // Here comes the tricky part. First, read the children position. const int childrenPos = readChildrenPosition(root, flags, pos); - if (childrenPos > address) { + if (childrenPos > nodePos) { // If the children pos is greater than address, it means the previous chargroup, // which address is stored in lastCandidateGroupPos, was the right one. found = true; @@ -429,12 +431,12 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co const int lastChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer - outWord[wordPos] = lastChar; + outCodePoints[wordPos] = lastChar; if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); - int charCount = maxDepth; + int charCount = maxCodePointCount; while (-1 != nextChar && --charCount > 0) { - outWord[++wordPos] = nextChar; + outCodePoints[++wordPos] = nextChar; nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); } } diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_policy.h index ab42c13b4..48ba5b8c2 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_policy.h @@ -50,8 +50,9 @@ class DictionaryStructurePolicy { const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0; - virtual void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int terminalNodePos, const int maxDepth, int *const outWord, + virtual int getCodePointsAndProbabilityAndReturnCodePointCount( + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; virtual int getTerminalNodePositionOfWord( diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 774d6074e..71d369876 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -18,10 +18,8 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dicnode/dic_node_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/dictionary.h" namespace latinime { @@ -29,23 +27,22 @@ namespace latinime { void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; - mMultiWordCostMultiplier = mDictionary->getBinaryDictionaryInfo() - ->getHeader()->getMultiWordCostMultiplier(); + const BinaryDictionaryInfo *const binaryDictionaryInfo = + mDictionary->getBinaryDictionaryInfo(); + mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { mPrevWordPos = NOT_VALID_WORD; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = BinaryFormat::getTerminalPosition( - dictionary->getBinaryDictionaryInfo()->getDictRoot(), prevWord, - prevWordLength, false /* forceLowerCaseSearch */); + mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (mPrevWordPos == NOT_VALID_WORD) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = BinaryFormat::getTerminalPosition( - dictionary->getBinaryDictionaryInfo()->getDictRoot(), prevWord, - prevWordLength, true /* forceLowerCaseSearch */); + mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 6e9aff5ec..c6da6f003 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -36,7 +36,6 @@ namespace latinime { const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; -const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1; /** * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates @@ -149,8 +148,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen &doubleLetterTerminalIndex, &doubleLetterLevel); int maxScore = S_INT_MIN; - int bestExactMatchedNodeTerminalIndex = -1; - int bestExactMatchedNodeOutputWordIndex = -1; // Force autocorrection for obvious long multi-word suggestions when the top suggestion is // a long multiple words suggestion. // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. @@ -191,8 +188,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // TODO: Better integration with java side autocorrection logic. const int finalScore = SCORING->calculateFinalScore( compoundDistance, traverseSession->getInputSize(), - (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) - || (isValidWord && SCORING->doesAutoCorrectValidWord())); + terminalDicNode->isExactMatch() + || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) + || (isValidWord && SCORING->doesAutoCorrectValidWord())); maxScore = max(maxScore, finalScore); // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. @@ -205,25 +203,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; - if (isSafeExactMatch) { - // Demote exact matches that are not the highest probable node among all exact - // matches. - const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0 - || terminals[bestExactMatchedNodeTerminalIndex].getProbability() - < terminalDicNode->getProbability(); - const int outputWordIndexToBeDemoted = isBestTerminal ? - bestExactMatchedNodeOutputWordIndex : outputWordIndex; - if (outputWordIndexToBeDemoted >= 0) { - frequencies[outputWordIndexToBeDemoted] -= - FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; - } - if (isBestTerminal) { - // Updates the best exact matched node index. - bestExactMatchedNodeTerminalIndex = terminalIndex; - // Updates the best exact matched output word index. - bestExactMatchedNodeOutputWordIndex = outputWordIndex; - } - } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 752bde9ac..875cbe4e0 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -82,8 +82,6 @@ class Suggest : public SuggestInterface { // Threshold for autocorrection classifier static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; - // Final score penalty to exact match words that are not the most probable exact match. - static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; const Traversal *const TRAVERSAL; const Scoring *const SCORING; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index c995af98a..c807fb7c9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -33,11 +33,13 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // TODO: Move children creating methods form DicNodeUtils. } -void PatriciaTriePolicy::getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int terminalNodePos, const int maxDepth, int *const outWord, +int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - BinaryFormat::getWordAtAddress(binaryDictionaryInfo->getDictRoot(), terminalNodePos, - maxDepth, outWord, outUnigramProbability); + return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount( + binaryDictionaryInfo->getDictRoot(), nodePos, + maxCodePointCount, outCodePoints, outUnigramProbability); } int PatriciaTriePolicy::getTerminalNodePositionOfWord( diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 9b9338145..0a16e414a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -36,8 +36,9 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy { const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; - void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int terminalNodePos, const int maxDepth, int *const outWord, + int getCodePointsAndProbabilityAndReturnCodePointCount( + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; int getTerminalNodePositionOfWord( diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index e098f353e..830aa80de 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -169,12 +169,7 @@ class TypingWeighting : public Weighting { float getTerminalLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, const float dicNodeLanguageImprobability) const { - // We promote exact matches here to prevent them from being pruned. The final score of - // exact match nodes might be demoted later in Suggest::outputSuggestions if there are - // multiple exact matches. - const float languageImprobability = (dicNode->isExactMatch()) ? - 0.0f : dicNodeLanguageImprobability; - return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; + return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const { |