aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp7
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp3
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp2
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h21
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h5
-rw-r--r--native/jni/src/suggest/core/suggest.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h (renamed from native/jni/src/suggest/core/dictionary/probability_utils.h)0
12 files changed, 62 insertions, 28 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index bb54e608e..e81591992 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -21,7 +21,6 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
-#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
@@ -93,13 +92,15 @@ namespace latinime {
if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
- return ProbabilityUtils::backoff(unigramProbability);
+ return dictionaryStructurePolicy->getProbability(unigramProbability,
+ NOT_A_PROBABILITY);
}
if (multiBigramMap) {
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
wordPos, unigramProbability);
}
- return ProbabilityUtils::backoff(unigramProbability);
+ return dictionaryStructurePolicy->getProbability(unigramProbability,
+ NOT_A_PROBABILITY);
}
////////////////
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index f437c95f6..9bc96877e 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -116,10 +116,6 @@ class DicNodeStatePrevWord {
return mPrevWordStart;
}
- int16_t getPrevWordProbability() const {
- return mPrevWordProbability;
- }
-
int getPrevWordNodePos() const {
return mPrevWordNodePos;
}
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index e74a1dbc8..cf1cd8815 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -23,7 +23,6 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
-#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
@@ -131,7 +130,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
// in very bad cases. This means that sometimes, we'll see some bigrams interverted
// here, but it can't get too bad.
- const int probability = ProbabilityUtils::computeProbabilityForBigram(
+ const int probability = mDictionaryStructurePolicy->getProbability(
unigramProbability, bigramsIt.getProbability());
addWordBigram(bigramBuffer, codePointCount, probability, outBigramProbability,
outBigramCodePoints, outputTypes);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 8418a608a..02ece639c 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -90,7 +90,7 @@ int Dictionary::getProbability(const int *word, int length) const {
if (NOT_A_VALID_WORD_POS == pos) {
return NOT_A_PROBABILITY;
}
- return getDictionaryStructurePolicy()->getUnigramProbability(pos);
+ return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index fb4a80083..9efe5f6f9 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -22,7 +22,6 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/bloom_filter.h"
-#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/hash_map_compat.h"
@@ -43,11 +42,12 @@ class MultiBigramMap {
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition);
if (mapPosition != mBigramMaps.end()) {
- return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability);
+ return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
+ unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
addBigramsForWordPosition(structurePolicy, wordPosition);
- return mBigramMaps[wordPosition].getBigramProbability(
+ return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
nextWordPosition, unigramProbability);
}
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
@@ -82,17 +82,17 @@ class MultiBigramMap {
}
AK_FORCE_INLINE int getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nextWordPosition, const int unigramProbability) const {
+ int bigramProbability = NOT_A_PROBABILITY;
if (mBloomFilter.isInFilter(nextWordPosition)) {
const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
mBigramMap.find(nextWordPosition);
if (bigramProbabilityIt != mBigramMap.end()) {
- const int bigramProbability = bigramProbabilityIt->second;
- return ProbabilityUtils::computeProbabilityForBigram(
- unigramProbability, bigramProbability);
+ bigramProbability = bigramProbabilityIt->second;
}
}
- return ProbabilityUtils::backoff(unigramProbability);
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
private:
@@ -111,17 +111,18 @@ class MultiBigramMap {
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
+ int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) {
- return ProbabilityUtils::computeProbabilityForBigram(
- unigramProbability, bigramsIt.getProbability());
+ bigramProbability = bigramsIt.getProbability();
+ break;
}
}
- return ProbabilityUtils::backoff(unigramProbability);
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 532411509..c8cbbcfdf 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -47,7 +47,10 @@ class DictionaryStructureWithBufferPolicy {
virtual int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
- virtual int getUnigramProbability(const int nodePos) const = 0;
+ virtual int getProbability(const int unigramProbability,
+ const int bigramProbability) const = 0;
+
+ virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 7d8dd21c5..e788e914a 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -171,7 +171,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost;
- const bool isPossiblyOffensiveWord = terminalDicNode->getProbability() <= 0;
+ const bool isPossiblyOffensiveWord =
+ traverseSession->getDictionaryStructurePolicy()->getProbability(
+ terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
const bool isExactMatch = terminalDicNode->isExactMatch();
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
// Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 3b9878b82..945677b50 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
@@ -134,7 +135,20 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
return NOT_A_VALID_WORD_POS;
}
-int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
+int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ // TODO: check mHeaderPolicy.usesForgettingCurve();
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+}
+
+int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
@@ -144,7 +158,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
}
- return nodeReader.getProbability();
+ return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 5873d3d65..cdab0e16a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -57,7 +57,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
- int getUnigramProbability(const int nodePos) const;
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int nodePos) const;
int getShortcutPositionOfNode(const int nodePos) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index adcf2dbdf..d5a83a938 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -21,6 +21,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
@@ -306,7 +307,19 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
}
}
-int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
+int PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+}
+
+int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
@@ -324,7 +337,8 @@ int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
return NOT_A_PROBABILITY;
}
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
+ mDictRoot, &pos), NOT_A_PROBABILITY);
}
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index d0567fd85..75d976205 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -56,7 +56,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
- int getUnigramProbability(const int nodePos) const;
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int nodePos) const;
int getShortcutPositionOfNode(const int nodePos) const;
diff --git a/native/jni/src/suggest/core/dictionary/probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h
index 21fe355b8..21fe355b8 100644
--- a/native/jni/src/suggest/core/dictionary/probability_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h