aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp24
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h5
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp17
-rw-r--r--native/jni/src/suggest/core/session/prev_words_info.h57
4 files changed, 64 insertions, 39 deletions
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 847fa1b02..295e760d6 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() {
*/
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const {
- int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
- prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
- // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) {
- // If no bigrams for this exact word, search again in lower case.
- pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
- prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
- }
- // If still no bigrams, we really don't have them!
- if (NOT_A_DICT_POS == pos) return;
-
int unigramProbability = 0;
int bigramCodePoints[MAX_WORD_LENGTH];
- BinaryDictionaryBigramsIterator bigramsIt(
- mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
+ BinaryDictionaryBigramsIterator bigramsIt =
+ prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
@@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word1, int length1) const {
- int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
- prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
- // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
-
- BinaryDictionaryBigramsIterator bigramsIt(
- mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
+ BinaryDictionaryBigramsIterator bigramsIt =
+ prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
index d16ac47fe..bc9d57671 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator {
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
mHasNext(pos != NOT_A_DICT_POS) {}
+ BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
+ : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
+ mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
+ mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}
+
AK_FORCE_INLINE bool hasNext() const {
return mHasNext;
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index b9e9db719..dc2b66a2c 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
- if (!prevWordsInfo->getPrevWordCodePoints()) {
- mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
- return;
- }
- // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
- prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
- false /* forceLowerCaseSearch */);
- if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
- // Check bigrams for lower-cased previous word if original was not found. Useful for
- // auto-capitalized words like "The [current_word]".
- mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
- prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
- true /* forceLowerCaseSearch */);
- }
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(
+ getDictionaryStructurePolicy(), mPrevWordsPtNodePos);
}
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index bc685945e..70a99ef38 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -18,6 +18,8 @@
#define LATINIME_PREV_WORDS_INFO_H
#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
@@ -38,17 +40,64 @@ class PrevWordsInfo {
mPrevWordCodePointCount[0] = prevWordCodePointCount;
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
- const int *getPrevWordCodePoints() const {
- return mPrevWordCodePoints[0];
+
+ void getPrevWordsTerminalPtNodePos(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ int *const outPrevWordsTerminalPtNodePos) const {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
+ mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
+ mIsBeginningOfSentence[i]);
+ }
}
- int getPrevWordCodePointCount() const {
- return mPrevWordCodePointCount[0];
+ BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
+ int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
+ mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
+ // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
+ // dictionary or has no bigrams
+ if (NOT_A_DICT_POS == pos) {
+ // If no bigrams for this exact word, search again in lower case.
+ pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
+ mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
+ }
+ return BinaryDictionaryBigramsIterator(
+ dictStructurePolicy->getBigramsStructurePolicy(), pos);
}
private:
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+ static int getTerminalPtNodePosOfWord(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence) {
+ if (!dictStructurePolicy || !wordCodePoints) {
+ return NOT_A_DICT_POS;
+ }
+ const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
+ if (wordPtNodePos != NOT_A_DICT_POS) {
+ return wordPtNodePos;
+ }
+ // Check bigrams for lower-cased previous word if original was not found. Useful for
+ // auto-capitalized words like "The [current_word]".
+ return dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
+ }
+
+ static int getBigramListPositionForWord(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *wordCodePoints, const int wordCodePointCount,
+ const bool forceLowerCaseSearch) {
+ if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
+ const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
+ if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
+ return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
+ }
+
void clear() {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
mPrevWordCodePoints[i] = nullptr;