aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/bigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/bigram_dictionary.cpp')
-rw-r--r--native/jni/src/bigram_dictionary.cpp23
1 files changed, 13 insertions, 10 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index c592542bd..b57227880 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -20,15 +20,18 @@
#include "bigram_dictionary.h"
-#include "char_utils.h"
#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/bloom_filter.h"
+#include "suggest/core/dictionary/char_utils.h"
#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/dictionary/probability_utils.h"
namespace latinime {
-BigramDictionary::BigramDictionary(const uint8_t *const streamStart) : DICT_ROOT(streamStart) {
+BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo) {
if (DEBUG_DICT) {
AKLOGI("BigramDictionary - constructor");
}
@@ -52,7 +55,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
int insertAt = 0;
while (insertAt < MAX_RESULTS) {
if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability
- && length < getCodePointCount(MAX_WORD_LENGTH,
+ && length < CharUtils::getCodePointCount(MAX_WORD_LENGTH,
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
break;
}
@@ -103,7 +106,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
- const uint8_t *const root = DICT_ROOT;
+ const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
@@ -134,7 +137,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
// in very bad cases. This means that sometimes, we'll see some bigrams interverted
// here, but it can't get too bad.
- const int probability = BinaryFormat::computeProbabilityForBigram(
+ const int probability = ProbabilityUtils::computeProbabilityForBigram(
unigramProbability, bigramProbabilityTemp);
addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
outputTypes);
@@ -149,7 +152,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return 0;
- const uint8_t *const root = DICT_ROOT;
+ const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
forceLowerCaseSearch);
@@ -170,7 +173,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord,
const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const {
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
- const uint8_t *const root = DICT_ROOT;
+ const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
false /* forceLowerCaseSearch */);
if (0 == pos) {
@@ -196,9 +199,9 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons
// what user typed.
int maxAlt = MAX_ALTERNATIVES;
- const int firstBaseLowerCodePoint = toBaseLowerCase(*word);
+ const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
while (maxAlt > 0) {
- if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
+ if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
return true;
}
inputCodePoints++;
@@ -209,7 +212,7 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons
bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2,
int length2) const {
- const uint8_t *const root = DICT_ROOT;
+ const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;