aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2012-05-29 00:40:32 -0700
committerAndroid Git Automerger <android-git-automerger@android.com>2012-05-29 00:40:32 -0700
commit01fcf0dab0393ddf08b678d5e31560e7020335a4 (patch)
tree712d7ad40b7bba0267e9c9a3c979dd9796a8b9e8
parent14c72f071ea951a4ad5ce068c7944b135e859e48 (diff)
parent18f650172d29800edb772d3798391b2d430426df (diff)
downloadlatinime-01fcf0dab0393ddf08b678d5e31560e7020335a4.tar.gz
latinime-01fcf0dab0393ddf08b678d5e31560e7020335a4.tar.xz
latinime-01fcf0dab0393ddf08b678d5e31560e7020335a4.zip
am 18f65017: am bfba64bc: Merge "Compute the correct frequency for bigram prediction" into jb-dev
* commit '18f650172d29800edb772d3798391b2d430426df': Compute the correct frequency for bigram prediction
-rw-r--r--native/jni/src/bigram_dictionary.cpp7
-rw-r--r--native/jni/src/binary_format.h6
2 files changed, 9 insertions, 4 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 8c73f4400..8d0c8597f 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -117,14 +117,17 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
uint16_t bigramBuffer[MAX_WORD_LENGTH];
+ int unigramFreq;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
- bigramBuffer);
+ bigramBuffer, &unigramFreq);
// codesSize == 0 means we are trying to find bigram predictions.
if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) {
- const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ const int frequency =
+ BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq);
if (addWordBigram(bigramBuffer, length, frequency)) {
++bigramCount;
}
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 85fdd9418..51bf8ebbc 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -66,7 +66,7 @@ class BinaryFormat {
static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
- uint16_t* outWord);
+ uint16_t* outWord, int* outUnigramFrequency);
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@@ -391,10 +391,11 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
* address: the byte position of the last chargroup of the word we are searching for (this is
* what is stored as the "bigram address" in each bigram)
* outword: an array to write the found word, with MAX_WORD_LENGTH size.
+ * outUnigramFrequency: a pointer to an int to write the frequency into.
* Return value : the length of the word, of 0 if the word was not found.
*/
inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address,
- const int maxDepth, uint16_t* outWord) {
+ const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) {
int pos = 0;
int wordPos = 0;
@@ -427,6 +428,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
nextChar = getCharCodeAndForwardPointer(root, &pos);
}
}
+ *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
return ++wordPos;
}
// We need to skip past this char group, so skip any remaining chars after the