diff options
author | 2012-05-29 00:25:49 -0700 | |
---|---|---|
committer | 2012-05-29 00:25:49 -0700 | |
commit | c941bc9bd3eb5b564f8b5f63ff07a44097262671 (patch) | |
tree | f64db4c02b03b9cc236fefb21b7871f1af80ac15 | |
parent | ca134ccc2a65f568741f0111dcd7c29aa6560ac6 (diff) | |
parent | 22cf695834009929a6c0fbcd75a9ff56efa04d83 (diff) | |
download | latinime-c941bc9bd3eb5b564f8b5f63ff07a44097262671.tar.gz latinime-c941bc9bd3eb5b564f8b5f63ff07a44097262671.tar.xz latinime-c941bc9bd3eb5b564f8b5f63ff07a44097262671.zip |
am 22cf6958: Merge "Split a method to reconstruct freq from uni/bi freq" into jb-dev
* commit '22cf695834009929a6c0fbcd75a9ff56efa04d83':
Split a method to reconstruct freq from uni/bi freq
-rw-r--r-- | native/jni/src/binary_format.h | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 4a1649c5e..e42589b81 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -67,6 +67,7 @@ class BinaryFormat { const int length); static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, uint16_t* outWord); + static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq); @@ -529,6 +530,16 @@ static inline int backoff(const int unigramFreq) { // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8); } +inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) { + // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the + // unigram frequency to be the median value of the 17th step from the top. A value of + // 0 for the bigram frequency represents the middle of the 16th step from the top, + // while a value of 15 represents the middle of the top step. + // See makedict.BinaryDictInputOutput for details. + const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); + return (int)(unigramFreq + bigramFreq * stepSize); +} + // This returns a probability in log space. inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq) { @@ -537,13 +548,7 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int, const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position); if (bigramFreqIt != bigramMap->end()) { const int bigramFreq = bigramFreqIt->second; - // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the - // unigram frequency to be the median value of the 17th step from the top. A value of - // 0 for the bigram frequency represents the middle of the 16th step from the top, - // while a value of 15 represents the middle of the top step. - // See makedict.BinaryDictInputOutput for details. - const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); - return (int)(unigramFreq + bigramFreq * stepSize); + return computeFrequencyForBigram(unigramFreq, bigramFreq); } else { return backoff(unigramFreq); } |