aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-05-29 00:59:14 -0700
committerAndroid Git Automerger <android-git-automerger@android.com>2012-05-29 00:59:14 -0700
commite3864d429e699469644fa7ae867fd48e8206bd04 (patch)
tree44dc2e6988e36c880176d4db7a351ebd68eb9e2b /native/jni/src
parent18f650172d29800edb772d3798391b2d430426df (diff)
parentb074dfead504fba4039e4eaaded07c10244ff292 (diff)
downloadlatinime-e3864d429e699469644fa7ae867fd48e8206bd04.tar.gz
latinime-e3864d429e699469644fa7ae867fd48e8206bd04.tar.xz
latinime-e3864d429e699469644fa7ae867fd48e8206bd04.zip
am b074dfea: Merge "Add a comment on a caveat for future reference." into jb-dev
* commit 'b074dfead504fba4039e4eaaded07c10244ff292': Add a comment on a caveat for future reference.
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/bigram_dictionary.cpp5
1 files changed, 5 insertions, 0 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 8d0c8597f..eb4bf8d1a 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -126,6 +126,11 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
// codesSize == 0 means we are trying to find bigram predictions.
if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) {
const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ // Due to space constraints, the frequency for bigrams is approximate - the lower the
+ // unigram frequency, the worse the precision. The theoritical maximum error in
+ // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
+ // in very bad cases. This means that sometimes, we'll see some bigrams interverted
+ // here, but it can't get too bad.
const int frequency =
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq);
if (addWordBigram(bigramBuffer, length, frequency)) {