aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/word_attributes.h60
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h2
10 files changed, 116 insertions, 28 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 19f92cc0b..26c7e3357 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -72,10 +72,10 @@ namespace latinime {
if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = dictionaryStructurePolicy->getProbabilityOfWordInContext(
+ const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
- const float cost = static_cast<float>(MAX_PROBABILITY - probability)
+ const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
/ static_cast<float>(MAX_PROBABILITY);
return cost;
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index c9725d1b0..1de405104 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -84,9 +84,10 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
if (codePointCount <= 0) {
return;
}
- const int probability = mDictStructurePolicy->getProbabilityOfWordInContext(mPrevWordIds.data(),
- targetWordId, nullptr /* multiBigramMap */);
- mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
+ const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext(
+ mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */);
+ mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount,
+ wordAttributes.getProbability());
}
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
diff --git a/native/jni/src/suggest/core/dictionary/word_attributes.h b/native/jni/src/suggest/core/dictionary/word_attributes.h
new file mode 100644
index 000000000..6e9da3570
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/word_attributes.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORD_ATTRIBUTES_H
+#define LATINIME_WORD_ATTRIBUTES_H
+
+#include "defines.h"
+
+class WordAttributes {
+ public:
+ // Invalid word attributes.
+ WordAttributes()
+ : mProbability(NOT_A_PROBABILITY), mIsBlacklisted(false), mIsNotAWord(false),
+ mIsPossiblyOffensive(false) {}
+
+ WordAttributes(const int probability, const bool isBlacklisted, const bool isNotAWord,
+ const bool isPossiblyOffensive)
+ : mProbability(probability), mIsBlacklisted(isBlacklisted), mIsNotAWord(isNotAWord),
+ mIsPossiblyOffensive(isPossiblyOffensive) {}
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ bool isBlacklisted() const {
+ return mIsBlacklisted;
+ }
+
+ bool isNotAWord() const {
+ return mIsNotAWord;
+ }
+
+ bool isPossiblyOffensive() const {
+ return mIsPossiblyOffensive;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(WordAttributes);
+
+ int mProbability;
+ bool mIsBlacklisted;
+ bool mIsNotAWord;
+ bool mIsPossiblyOffensive;
+};
+
+ // namespace
+#endif /* LATINIME_WORD_ATTRIBUTES_H */
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 4e55418ae..7414f696c 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/property/word_property.h"
+#include "suggest/core/dictionary/word_attributes.h"
#include "utils/int_array_view.h"
namespace latinime {
@@ -57,8 +58,8 @@ class DictionaryStructureWithBufferPolicy {
virtual int getWordId(const CodePointArrayView wordCodePoints,
const bool forceLowerCaseSearch) const = 0;
- virtual int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
- MultiBigramMap *const multiBigramMap) const = 0;
+ virtual const WordAttributes getWordAttributesInContext(const int *const prevWordIds,
+ const int wordId, MultiBigramMap *const multiBigramMap) const = 0;
// TODO: Remove
virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index df3daa816..547cc997c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -118,24 +118,33 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
return getWordIdFromTerminalPtNodePos(ptNodePos);
}
-int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
- const int wordId, MultiBigramMap *const multiBigramMap) const {
+const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
+ const int *const prevWordIds, const int wordId,
+ MultiBigramMap *const multiBigramMap) const {
if (wordId == NOT_A_WORD_ID) {
- return NOT_A_PROBABILITY;
+ return WordAttributes();
}
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds,
- wordId, ptNodeParams.getProbability());
+ const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */,
+ prevWordIds, wordId, ptNodeParams.getProbability());
+ return getWordAttributes(probability, ptNodeParams);
}
if (prevWordIds) {
const int probability = getProbabilityOfWord(prevWordIds, wordId);
if (probability != NOT_A_PROBABILITY) {
- return probability;
+ return getWordAttributes(probability, ptNodeParams);
}
}
- return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+ return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY),
+ ptNodeParams);
+}
+
+const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
+ const PtNodeParams &ptNodeParams) const {
+ return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
+ ptNodeParams.getProbability() == 0);
}
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 06d704174..871b556e1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -91,7 +91,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
- int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
+ const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -166,6 +166,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
int getTerminalPtNodePosFromWordId(const int wordId) const;
+ const WordAttributes getWordAttributes(const int probability,
+ const PtNodeParams &ptNodeParams) const;
};
} // namespace v402
} // namespace backward
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 80bbf47c0..44148e817 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -282,25 +282,33 @@ int PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
return getWordIdFromTerminalPtNodePos(ptNodePos);
}
-int PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
+const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(const int *const prevWordIds,
const int wordId, MultiBigramMap *const multiBigramMap) const {
if (wordId == NOT_A_WORD_ID) {
- return NOT_A_PROBABILITY;
+ return WordAttributes();
}
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams =
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds,
- wordId, ptNodeParams.getProbability());
+ const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */,
+ prevWordIds, wordId, ptNodeParams.getProbability());
+ return getWordAttributes(probability, ptNodeParams);
}
if (prevWordIds) {
const int bigramProbability = getProbabilityOfWord(prevWordIds, wordId);
if (bigramProbability != NOT_A_PROBABILITY) {
- return bigramProbability;
+ return getWordAttributes(bigramProbability, ptNodeParams);
}
}
- return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+ return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY),
+ ptNodeParams);
+}
+
+const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
+ const PtNodeParams &ptNodeParams) const {
+ return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
+ ptNodeParams.getProbability() == 0);
}
int PatriciaTriePolicy::getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index a2d6b6fa6..8c1665d7d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -66,7 +66,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
- int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
+ const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -163,6 +163,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DicNodeVector *const childDicNodes) const;
int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
int getTerminalPtNodePosFromWordId(const int wordId) const;
+ const WordAttributes getWordAttributes(const int probability,
+ const PtNodeParams &ptNodeParams) const;
};
} // namespace latinime
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 308c35585..e4462550e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -113,14 +113,19 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
return ptNodeParams.getTerminalId();
}
-int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
- const int wordId, MultiBigramMap *const multiBigramMap) const {
+const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
+ const int *const prevWordIds, const int wordId,
+ MultiBigramMap *const multiBigramMap) const {
if (wordId == NOT_A_WORD_ID) {
- return NOT_A_PROBABILITY;
+ return WordAttributes();
}
+ const int ptNodePos =
+ mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
// TODO: Support n-gram.
- return mBuffers->getLanguageModelDictContent()->getWordProbability(
- WordIdArrayView::singleElementView(prevWordIds), wordId);
+ return WordAttributes(mBuffers->getLanguageModelDictContent()->getWordProbability(
+ WordIdArrayView::singleElementView(prevWordIds), wordId), ptNodeParams.isBlacklisted(),
+ ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0);
}
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index c9df9df4b..980c16e4a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -68,7 +68,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
- int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
+ const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;