diff options
author | 2012-05-10 11:56:33 -0700 | |
---|---|---|
committer | 2012-05-10 11:56:33 -0700 | |
commit | f5a2399300f7420115695985a43e4fe489555eba (patch) | |
tree | a5f893bf3a22ecaec04c364406311b5e6419101f /native/jni/src/binary_format.h | |
parent | dd589f0ffb4c8bf74dfe28039e23a03b9696be1f (diff) | |
parent | 6cb23a49bfc36ea2fad56f1c7f3730cd38667627 (diff) | |
download | latinime-f5a2399300f7420115695985a43e4fe489555eba.tar.gz latinime-f5a2399300f7420115695985a43e4fe489555eba.tar.xz latinime-f5a2399300f7420115695985a43e4fe489555eba.zip |
am 6cb23a49: Merge "Perform the actual bigram frequency lookup." into jb-dev
* commit '6cb23a49bfc36ea2fad56f1c7f3730cd38667627':
Perform the actual bigram frequency lookup.
Diffstat (limited to 'native/jni/src/binary_format.h')
-rw-r--r-- | native/jni/src/binary_format.h | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 71ade48a3..b87593ca9 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -18,6 +18,7 @@ #define LATINIME_BINARY_FORMAT_H #include <limits> +#include "bloom_filter.h" #include "unigram_dictionary.h" namespace latinime { @@ -66,8 +67,8 @@ class BinaryFormat { const int length); static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, uint16_t* outWord); - static int getProbability(const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const int unigramFreq); + static int getProbability(const int position, const std::map<int, int> *bigramMap, + const uint8_t *bigramFilter, const int unigramFreq); // Flags for special processing // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or @@ -520,13 +521,18 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a } // This should probably return a probability in log space. -inline int BinaryFormat::getProbability(const std::map<int, int> *bigramMap, +inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq) { - // TODO: use the bigram filter for fast rejection, then the bigram map for lookup - // to get the bigram probability. If the bigram is not found, use the unigram frequency. - // Don't forget that they can be null. + if (!bigramMap || !bigramFilter) return unigramFreq; + if (!isInFilter(bigramFilter, position)) return unigramFreq; + const std::map<int, int>::const_iterator bigramFreq = bigramMap->find(position); + if (bigramFreq != bigramMap->end()) { + // TODO: return the frequency in bigramFreq->second + return unigramFreq; + } else { + return unigramFreq; + } // TODO: if the unigram frequency is used, compute the actual probability - return unigramFreq; } } // namespace latinime |