aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-05-29 00:23:18 -0700
committerAndroid (Google) Code Review <android-gerrit@google.com>2012-05-29 00:23:18 -0700
commit22cf695834009929a6c0fbcd75a9ff56efa04d83 (patch)
tree3e114e1f5bac23e97e012d881a02efdc6f787104 /native/jni/src
parentbf61bba35feda09a2ad08475481ce042879ff5bb (diff)
parent19ebd936462ee6e4796b8755be82d67437406845 (diff)
downloadlatinime-22cf695834009929a6c0fbcd75a9ff56efa04d83.tar.gz
latinime-22cf695834009929a6c0fbcd75a9ff56efa04d83.tar.xz
latinime-22cf695834009929a6c0fbcd75a9ff56efa04d83.zip
Merge "Split a method to reconstruct freq from uni/bi freq" into jb-dev
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/binary_format.h19
1 files changed, 12 insertions, 7 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 4a1649c5e..e42589b81 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -67,6 +67,7 @@ class BinaryFormat {
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
uint16_t* outWord);
+ static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@@ -529,6 +530,16 @@ static inline int backoff(const int unigramFreq) {
// return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8);
}
+inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) {
+ // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
+ // unigram frequency to be the median value of the 17th step from the top. A value of
+ // 0 for the bigram frequency represents the middle of the 16th step from the top,
+ // while a value of 15 represents the middle of the top step.
+ // See makedict.BinaryDictInputOutput for details.
+ const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
+ return (int)(unigramFreq + bigramFreq * stepSize);
+}
+
// This returns a probability in log space.
inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq) {
@@ -537,13 +548,7 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int,
const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position);
if (bigramFreqIt != bigramMap->end()) {
const int bigramFreq = bigramFreqIt->second;
- // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
- // unigram frequency to be the median value of the 17th step from the top. A value of
- // 0 for the bigram frequency represents the middle of the 16th step from the top,
- // while a value of 15 represents the middle of the top step.
- // See makedict.BinaryDictInputOutput for details.
- const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
- return (int)(unigramFreq + bigramFreq * stepSize);
+ return computeFrequencyForBigram(unigramFreq, bigramFreq);
} else {
return backoff(unigramFreq);
}