diff options
author | 2012-05-29 16:50:25 +0900 | |
---|---|---|
committer | 2012-05-29 16:50:25 +0900 | |
commit | 46fe49fc05df02228222b8a8e49d4cc8e1f0ea3c (patch) | |
tree | e2c06c93e01d559c25ef9e1468769ce7bc7e7d47 | |
parent | e308459531a4dd64ee80aa76e351725180ad856e (diff) | |
download | latinime-46fe49fc05df02228222b8a8e49d4cc8e1f0ea3c.tar.gz latinime-46fe49fc05df02228222b8a8e49d4cc8e1f0ea3c.tar.xz latinime-46fe49fc05df02228222b8a8e49d4cc8e1f0ea3c.zip |
Add a comment on a caveat for future reference.
Change-Id: I328a0cd4346275aac960e1369bf370688a004e11
-rw-r--r-- | native/jni/src/bigram_dictionary.cpp | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 8d0c8597f..eb4bf8d1a 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -126,6 +126,11 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // codesSize == 0 means we are trying to find bigram predictions. if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) { const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + // Due to space constraints, the frequency for bigrams is approximate - the lower the + // unigram frequency, the worse the precision. The theoritical maximum error in + // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4 + // in very bad cases. This means that sometimes, we'll see some bigrams interverted + // here, but it can't get too bad. const int frequency = BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq); if (addWordBigram(bigramBuffer, length, frequency)) { |