aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/binary_format.h
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/binary_format.h')
-rw-r--r--native/jni/src/binary_format.h15
1 files changed, 12 insertions, 3 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index f59302460..d5d67c108 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -62,10 +62,11 @@ class BinaryFormat {
static bool hasChildrenInFlags(const uint8_t flags);
static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
int *pos);
- static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord,
+ static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
uint16_t* outWord);
+ static int getProbability(const int bigramListPosition, const int unigramFreq);
// Flags for special processing
// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
@@ -304,7 +305,7 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
- const uint16_t* const inWord, const int length) {
+ const int32_t* const inWord, const int length) {
int pos = 0;
int wordPos = 0;
@@ -313,7 +314,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
// there was no match (or we would have found it).
if (wordPos > length) return NOT_VALID_WORD;
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
- const uint16_t wChar = inWord[wordPos];
+ const int32_t wChar = inWord[wordPos];
while (true) {
// If there are no more character groups in this node, it means we could not
// find a matching character for this depth, therefore there is no match.
@@ -517,6 +518,14 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
return 0;
}
+// This should probably return a probability in log space.
+inline int BinaryFormat::getProbability(const int bigramListPosition, const int unigramFreq) {
+ // TODO: use the bigram list position to get the bigram probability. If the bigram
+ // is not found, use the unigram frequency.
+ // TODO: if the unigram frequency is used, compute the actual probability
+ return unigramFreq;
+}
+
} // namespace latinime
#endif // LATINIME_BINARY_FORMAT_H