aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni
diff options
context:
space:
mode:
authorAdrian Velicu <adrianv@google.com>2014-10-21 07:39:18 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-10-21 07:39:18 +0000
commitc51b9b5b3f9b9b80d07186691ddfa09502fd4659 (patch)
tree6ec9485b9d5a9a8b6e4ee8622998131800d17a91 /native/jni
parent2b087f9a12d8bae2bd3adc659684df615e1c96db (diff)
parent05172bf1a5693c2e108e91436b98ecd35d2dadad (diff)
downloadlatinime-c51b9b5b3f9b9b80d07186691ddfa09502fd4659.tar.gz
latinime-c51b9b5b3f9b9b80d07186691ddfa09502fd4659.tar.xz
latinime-c51b9b5b3f9b9b80d07186691ddfa09502fd4659.zip
Merge "Renaming "blacklist" flag to "possibly offensive""
Diffstat (limited to 'native/jni')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp15
-rw-r--r--native/jni/src/suggest/core/dictionary/property/unigram_property.h49
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.cpp2
-rw-r--r--native/jni/src/suggest/core/dictionary/word_attributes.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp13
14 files changed, 107 insertions, 59 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index f8dadb488..b01acead7 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -358,7 +358,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
- jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
+ jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
@@ -377,8 +377,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
- isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
- std::move(shortcuts));
+ isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */,
+ 1 /* count */), std::move(shortcuts));
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
}
@@ -480,8 +480,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
- jfieldID isBlacklistedFieldId =
- env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
+ jfieldID isPossiblyOffensiveFieldId =
+ env->GetFieldID(languageModelParamClass, "mIsPossiblyOffensive", "Z");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
@@ -504,7 +504,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
- jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
+ jboolean isPossiblyOffensive = env->GetBooleanField(languageModelParam,
+ isPossiblyOffensiveFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
@@ -519,7 +520,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
- isBlacklisted, unigramProbability,
+ isPossiblyOffensive, unigramProbability,
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty);
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index 5ed2e2602..f194f979a 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -49,21 +49,44 @@ class UnigramProperty {
};
UnigramProperty()
- : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
- mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
+ : mRepresentsBeginningOfSentence(false), mIsNotAWord(false),
+ mIsBlacklisted(false), mIsPossiblyOffensive(false), mProbability(NOT_A_PROBABILITY),
+ mHistoricalInfo(), mShortcuts() {}
+ // In contexts which do not support the Blacklisted flag (v2, v4<403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo,
- const std::vector<ShortcutProperty> &&shortcuts)
+ const bool isPossiblyOffensive, const int probability,
+ const HistoricalInfo historicalInfo, const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
- mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(false),
+ mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
- // Without shortcuts.
+ // Without shortcuts, in contexts which do not support the Blacklisted flag (v2, v4<403)
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo)
+ const bool isPossiblyOffensive, const int probability,
+ const HistoricalInfo historicalInfo)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
- mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(false),
+ mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
+ mHistoricalInfo(historicalInfo), mShortcuts() {}
+
+ // In contexts which DO support the Blacklisted flag (v403)
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const bool isPossiblyOffensive, const int probability,
+ const HistoricalInfo historicalInfo, const std::vector<ShortcutProperty> &&shortcuts)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
+ mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
+ mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
+
+ // Without shortcuts, in contexts which DO support the Blacklisted flag (v403)
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const bool isPossiblyOffensive, const int probability,
+ const HistoricalInfo historicalInfo)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
+ mIsPossiblyOffensive(isPossiblyOffensive), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts() {}
bool representsBeginningOfSentence() const {
@@ -74,13 +97,12 @@ class UnigramProperty {
return mIsNotAWord;
}
- bool isBlacklisted() const {
- return mIsBlacklisted;
+ bool isPossiblyOffensive() const {
+ return mIsPossiblyOffensive;
}
- bool isPossiblyOffensive() const {
- // TODO: Have dedicated flag.
- return mProbability == 0;
+ bool isBlacklisted() const {
+ return mIsBlacklisted;
}
bool hasShortcuts() const {
@@ -106,6 +128,7 @@ class UnigramProperty {
const bool mRepresentsBeginningOfSentence;
const bool mIsNotAWord;
const bool mIsBlacklisted;
+ const bool mIsPossiblyOffensive;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
const std::vector<ShortcutProperty> mShortcuts;
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
index caac8fe79..a707f1ba2 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
@@ -28,7 +28,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
false /* needsNullTermination */);
- jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
+ jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isPossiblyOffensive(),
!mNgrams.empty(), mUnigramProperty.hasShortcuts(),
mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
diff --git a/native/jni/src/suggest/core/dictionary/word_attributes.h b/native/jni/src/suggest/core/dictionary/word_attributes.h
index 6e9da3570..5351e7d7d 100644
--- a/native/jni/src/suggest/core/dictionary/word_attributes.h
+++ b/native/jni/src/suggest/core/dictionary/word_attributes.h
@@ -43,6 +43,14 @@ class WordAttributes {
return mIsNotAWord;
}
+ // Whether or not a word is possibly offensive.
+ // * Static dictionaries <v202, as well as dynamic dictionaries <v403, will set this based on
+ // whether or not the probability of the word is zero.
+ // * Static dictionaries >=v203 will set this based on the IS_POSSIBLY_OFFENSIVE PtNode flag.
+ // * Dynamic dictionaries >=v403 will set this based on the IS_POSSIBLY_OFFENSIVE language model
+ // flag (the PtNode flag IS_BLACKLISTED is ignored and kept as zero)
+ //
+ // See the ::getWordAttributes function for each of these dictionary policies for more details.
bool isPossiblyOffensive() const {
return mIsPossiblyOffensive;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 6243f14cc..d558b949a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -245,7 +245,7 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag.
return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
- sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
+ sourcePtNodeParams.isPossiblyOffensive(), sourcePtNodeParams.isNotAWord(),
sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */,
sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
@@ -316,7 +316,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
if (!ptNodeParams->hasShortcutTargets()) {
// Update has shortcut targets flag.
return updatePtNodeFlags(ptNodeParams->getHeadPos(),
- ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ ptNodeParams->isPossiblyOffensive(), ptNodeParams->isNotAWord(),
ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
@@ -330,7 +330,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
- return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isPossiblyOffensive(),
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
@@ -386,8 +386,9 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
- return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
- isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
+ return updatePtNodeFlags(nodePos, ptNodeParams->isPossiblyOffensive(),
+ ptNodeParams->isNotAWord(), isTerminal, ptNodeParams->hasShortcutTargets(),
+ ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index d0dccc3be..29f9ba37f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -608,8 +608,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
- ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
- *historicalInfo, std::move(shortcuts));
+ ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
+ ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index 92fd6f214..e524e86e5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -146,7 +146,7 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
- unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@@ -180,8 +180,9 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
return false;
}
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
- parentPtNodePos, ptNodeCodePoints, unigramProperty->getProbability()));
+ unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
+ true /* isTerminal */, parentPtNodePos, ptNodeCodePoints,
+ unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) {
return false;
@@ -214,7 +215,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount);
if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
+ false /* isNotAWord */, false /* isPossiblyOffensive */, false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints,
NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
@@ -222,7 +223,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
} else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
firstPtNodeCodePoints, unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@@ -240,7 +241,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
- reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
+ reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isPossiblyOffensive(),
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount),
reallocatingPtNodeParams->getProbability()));
@@ -249,7 +250,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
- unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ unigramProperty->isNotAWord(), unigramProperty->isPossiblyOffensive(),
true /* isTerminal */, firstPartOfReallocatedPtNodePos,
newPtNodeCodePoints.skip(overlappingCodePointCount),
unigramProperty->getProbability()));
@@ -276,20 +277,20 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const bool isTerminal, const int parentPos,
+ const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
- isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ isPossiblyOffensive, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability);
}
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord,
- const bool isBlacklisted, const bool isTerminal, const int parentPos,
+ const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
- isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ isPossiblyOffensive, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePoints, probability);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index 2bbe2f4dc..db5f6ab17 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -85,12 +85,12 @@ class DynamicPtUpdatingHelper {
const CodePointArrayView newPtNodeCodePoints);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
- const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const bool isNotAWord, const bool isPossiblyOffensive, const bool isTerminal,
const int parentPos, const CodePointArrayView codePoints, const int probability) const;
- const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
- const bool isTerminal, const int parentPos, const CodePointArrayView codePoints,
- const int probability) const;
+ const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord,
+ const bool isPossiblyOffensive, const bool isTerminal, const int parentPos,
+ const CodePointArrayView codePoints, const int probability) const;
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
index 6a498b2f4..b8d78bf10 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
@@ -41,8 +41,8 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
// Flag for non-words (typically, shortcut only entries)
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
-// Flag for blacklist
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
+// Flag for possibly offensive words
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_POSSIBLY_OFFENSIVE = 0x01;
/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
index a69ec4435..6a2bf5d3c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
@@ -54,8 +54,8 @@ class PatriciaTrieReadingUtils {
/**
* Node Flags
*/
- static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
- return (flags & FLAG_IS_BLACKLISTED) != 0;
+ static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) {
+ return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0;
}
static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
@@ -82,12 +82,12 @@ class PatriciaTrieReadingUtils {
return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
}
- static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
+ static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive,
const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
const bool hasBigrams, const bool hasMultipleChars,
const int childrenPositionFieldSize) {
NodeFlags nodeFlags = 0;
- nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
+ nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags;
nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
@@ -127,7 +127,7 @@ class PatriciaTrieReadingUtils {
static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
static const NodeFlags FLAG_HAS_BIGRAMS;
static const NodeFlags FLAG_IS_NOT_A_WORD;
- static const NodeFlags FLAG_IS_BLACKLISTED;
+ static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE;
};
} // namespace latinime
#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 3ff1829bd..585e87a24 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -145,7 +145,18 @@ class PtNodeParams {
}
AK_FORCE_INLINE bool isBlacklisted() const {
- return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ // Note: this method will be removed in the next change.
+ // It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
+ // * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
+ // when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
+ // * getWordAttributes will be changed to always return blacklisted=false and
+ // isPossiblyOffensive according to the function below (instead of the current
+ // behaviour of checking if the probability is zero)
+ return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isPossiblyOffensive() const {
+ return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
}
AK_FORCE_INLINE bool isNotAWord() const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index b7f1199c5..ca44da9fb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -476,8 +476,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
}
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
- ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
- HistoricalInfo(), std::move(shortcuts));
+ ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
+ ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 794c63ffd..3488f7d2a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -342,7 +342,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, const bo
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(false /* isNotAWord */,
- false /* isBlacklisted */, isTerminal, false /* hasShortcutTargets */,
+ false /* isPossiblyOffensive */, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, hasMultipleChars, CHILDREN_POSITION_FIELD_SIZE);
if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index ead1bde50..d90066cec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -299,7 +299,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
}
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
- false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
+ false /* isBlacklisted */, false /* isPossiblyOffensive */,
+ MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@@ -375,8 +376,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
if (wordId == NOT_A_WORD_ID) {
// The word is not in the dictionary.
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
- false /* isNotAWord */, false /* isBlacklisted */, NOT_A_PROBABILITY,
- HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */));
+ false /* isNotAWord */, false /* isBlacklisted */, false /* isPossiblyOffensive */,
+ NOT_A_PROBABILITY, HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */,
+ 0 /* count */));
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
AKLOGE("Cannot add unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
@@ -391,7 +393,7 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
&& ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */,
- true /* isNotAWord */, false /* isBlacklisted */, NOT_A_PROBABILITY,
+ true /* isNotAWord */, false /* isPossiblyOffensive */, NOT_A_PROBABILITY,
HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */));
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
@@ -529,7 +531,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
- probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts));
+ probabilityEntry.isPossiblyOffensive(), probabilityEntry.getProbability(),
+ *historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}