diff options
Diffstat (limited to 'native')
5 files changed, 42 insertions, 5 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index fb98f3183..6e2219d87 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -249,7 +249,7 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight); SuggestionResults suggestionResults(MAX_RESULTS); const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength, - false /* isStartOfSentence */); + isBeginningOfSentence); if (givenSuggestOptions.isGesture() || inputSize > 0) { // TODO: Use SuggestionResults to return suggestions. dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp index 23908255b..7b0e7e1b4 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp @@ -26,6 +26,7 @@ #include "suggest/core/policy/scoring.h" #include "suggest/core/result/suggestion_results.h" #include "suggest/core/session/dic_traverse_session.h" +#include "suggest/core/suggest_options.h" namespace latinime { @@ -105,6 +106,11 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); + // When we have to block offensive words, non-exact matched offensive words should not be + // output. + const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); + const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord + && !isSafeExactMatch; // Increase output score of top typing suggestion to ensure autocorrection. // TODO: Better integration with java side autocorrection logic. @@ -115,8 +121,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; || (isValidWord && scoringPolicy->doesAutoCorrectValidWord()), boostExactMatches); - // Don't output invalid words. However, we still need to submit their shortcuts if any. - if (isValidWord) { + // Don't output invalid or blocked offensive words. However, we still need to submit their + // shortcuts if any. + if (isValidWord && !isBlockedOffensiveWord) { int codePoints[MAX_WORD_LENGTH]; terminalDicNode->outputResult(codePoints); const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ? diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 56c53c1c2..640f6a2fc 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -85,6 +85,14 @@ class PrevWordsInfo { return mPrevWordCodePointCount[n - 1]; } + // n is 1-indexed. + bool isNthPrevWordBeginningOfSentence(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return false; + } + return mIsBeginningOfSentence[n - 1]; + } + private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); diff --git a/native/jni/src/suggest/core/suggest_options.h b/native/jni/src/suggest/core/suggest_options.h index 1b21aafcf..2e22a7ac3 100644 --- a/native/jni/src/suggest/core/suggest_options.h +++ b/native/jni/src/suggest/core/suggest_options.h @@ -34,6 +34,10 @@ class SuggestOptions{ return getBoolOption(USE_FULL_EDIT_DISTANCE); } + AK_FORCE_INLINE bool blockOffensiveWords() const { + return getBoolOption(BLOCK_OFFENSIVE_WORDS); + } + AK_FORCE_INLINE bool getAdditionalFeaturesBoolOption(const int key) const { return getBoolOption(key + ADDITIONAL_FEATURES_OPTIONS); } @@ -45,9 +49,10 @@ class SuggestOptions{ // reorder options. static const int IS_GESTURE = 0; static const int USE_FULL_EDIT_DISTANCE = 1; + static const int BLOCK_OFFENSIVE_WORDS = 2; // Additional features options are stored after the other options and used as setting values of // experimental features. - static const int ADDITIONAL_FEATURES_OPTIONS = 2; + static const int ADDITIONAL_FEATURES_OPTIONS = 3; const int *const mOptions; const int mLength; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index a8cc1609a..aec3b8ea3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -251,7 +251,24 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI false /* tryLowerCaseSearch */); // TODO: Support N-gram. if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { - return false; + if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { + const std::vector<UnigramProperty::ShortcutProperty> shortcuts; + const UnigramProperty beginningOfSentenceUnigramProperty( + true /* representsBeginningOfSentence */, true /* isNotAWord */, + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), + prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), + &beginningOfSentenceUnigramProperty)) { + AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); + return false; + } + // Refresh Terminal PtNode positions. + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + } else { + return false; + } } const int word1Pos = getTerminalPtNodePositionOfWord( bigramProperty->getTargetCodePoints()->data(), |