aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/binary_format.h
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/binary_format.h')
-rw-r--r--native/jni/src/binary_format.h20
1 files changed, 13 insertions, 7 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 71ade48a3..b87593ca9 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -18,6 +18,7 @@
#define LATINIME_BINARY_FORMAT_H
#include <limits>
+#include "bloom_filter.h"
#include "unigram_dictionary.h"
namespace latinime {
@@ -66,8 +67,8 @@ class BinaryFormat {
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
uint16_t* outWord);
- static int getProbability(const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const int unigramFreq);
+ static int getProbability(const int position, const std::map<int, int> *bigramMap,
+ const uint8_t *bigramFilter, const int unigramFreq);
// Flags for special processing
// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
@@ -520,13 +521,18 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
}
// This should probably return a probability in log space.
-inline int BinaryFormat::getProbability(const std::map<int, int> *bigramMap,
+inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq) {
- // TODO: use the bigram filter for fast rejection, then the bigram map for lookup
- // to get the bigram probability. If the bigram is not found, use the unigram frequency.
- // Don't forget that they can be null.
+ if (!bigramMap || !bigramFilter) return unigramFreq;
+ if (!isInFilter(bigramFilter, position)) return unigramFreq;
+ const std::map<int, int>::const_iterator bigramFreq = bigramMap->find(position);
+ if (bigramFreq != bigramMap->end()) {
+ // TODO: return the frequency in bigramFreq->second
+ return unigramFreq;
+ } else {
+ return unigramFreq;
+ }
// TODO: if the unigram frequency is used, compute the actual probability
- return unigramFreq;
}
} // namespace latinime