aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp53
-rw-r--r--native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp10
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp42
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h18
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary_utils.cpp8
-rw-r--r--native/jni/src/suggest/core/dictionary/property/historical_info.h10
-rw-r--r--native/jni/src/suggest/core/dictionary/property/ngram_property.h10
-rw-r--r--native/jni/src/suggest/core/dictionary/property/unigram_property.h20
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h8
-rw-r--r--native/jni/src/suggest/core/policy/traversal.h2
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp6
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h4
-rw-r--r--native/jni/src/suggest/core/session/ngram_context.h (renamed from native/jni/src/suggest/core/session/prev_words_info.h)31
-rw-r--r--native/jni/src/suggest/core/suggest.cpp4
-rw-r--r--native/jni/src/suggest/core/suggest_options.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h26
-rw-r--r--native/jni/src/utils/jni_data_utils.h6
29 files changed, 229 insertions, 181 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 76c7fdd6f..f8dadb488 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -28,7 +28,7 @@
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
@@ -242,15 +242,15 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
&weightOfLangModelVsSpatialModel);
SuggestionResults suggestionResults(MAX_RESULTS);
- const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
- times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
+ times, pointerIds, inputCodePoints, inputSize, &ngramContext,
&givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
} else {
- dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
+ dictionary->getPredictions(&ngramContext, &suggestionResults);
}
if (DEBUG_DICT) {
suggestionResults.dumpSuggestions();
@@ -289,10 +289,10 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
- const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
- return dictionary->getNgramProbability(&prevWordsInfo,
+ return dictionary->getNgramProbability(&ngramContext,
CodePointArrayView(wordCodePoints, wordLength));
}
@@ -327,8 +327,9 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
- jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
- jobject outBigramProbabilityInfo, jobject outShortcutTargets,
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */,
+ jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets,
+ jobject outNgramProbabilityInfo, jobject outShortcutTargets,
jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
@@ -351,7 +352,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, codePointCount));
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
- outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+ outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
}
@@ -401,7 +402,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
if (!dictionary) {
return false;
}
- const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize wordLength = env->GetArrayLength(word);
@@ -410,7 +411,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
// Use 1 for count to indicate the ngram has inputted.
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
- return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
+ return dictionary->addNgramEntry(&ngramContext, &ngramProperty);
}
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@@ -420,31 +421,32 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
if (!dictionary) {
return false;
}
- const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
- return dictionary->removeNgramEntry(&prevWordsInfo,
+ return dictionary->removeNgramEntry(&ngramContext,
CodePointArrayView(wordCodePoints, codePointCount));
}
-static bool latinime_BinaryDictionary_updateCounter(JNIEnv *env, jclass clazz, jlong dict,
- jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
- jintArray word, jboolean isValidWord, jint count, jint timestamp) {
+static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv *env,
+ jclass clazz, jlong dict, jobjectArray prevWordCodePointArrays,
+ jbooleanArray isBeginningOfSentenceArray, jintArray word, jboolean isValidWord, jint count,
+ jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
- const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
- return dictionary->updateCounter(&prevWordsInfo,
+ return dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
historicalInfo);
}
@@ -527,9 +529,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
const NgramProperty ngramProperty(
CodePointArrayView(word1CodePoints, word1Length).toVector(),
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
+ const NgramContext ngramContext(word0CodePoints, word0Length,
false /* isBeginningOfSentence */);
- dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
+ dictionary->addNgramEntry(&ngramContext, &ngramProperty);
}
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;
@@ -639,10 +641,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
+ const NgramContext ngramContext(wordCodePoints, wordCodePointCount,
wordProperty.getUnigramProperty()->representsBeginningOfSentence());
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
- if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
+ if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext,
&ngramProperty)) {
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false;
@@ -718,7 +720,8 @@ static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("getWordPropertyNative"),
const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
- "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
+ "Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;"
+ "Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
@@ -747,9 +750,9 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
},
{
- const_cast<char *>("updateCounterNative"),
+ const_cast<char *>("updateEntriesForWordWithNgramContextNative"),
const_cast<char *>("(J[[I[Z[IZII)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_updateCounter)
+ reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForWordWithNgramContext)
},
{
const_cast<char *>("addMultipleDictionaryEntriesNative"),
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
index 766064153..3c6bff3b6 100644
--- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
+++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
@@ -22,7 +22,7 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/session/dic_traverse_session.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
namespace latinime {
class Dictionary;
@@ -40,14 +40,14 @@ static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong tra
}
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
if (!previousWord) {
- PrevWordsInfo prevWordsInfo;
- ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
+ NgramContext emptyNgramContext;
+ ts->init(dict, &emptyNgramContext, 0 /* suggestOptions */);
return;
}
int prevWord[previousWordLength];
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
- PrevWordsInfo prevWordsInfo(prevWord, previousWordLength, false /* isStartOfSentence */);
- ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
+ NgramContext ngramContext(prevWord, previousWordLength, false /* isStartOfSentence */);
+ ts->init(dict, &ngramContext, 0 /* suggestOptions */);
}
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 7a69d3ceb..697e99ffb 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -23,7 +23,7 @@
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/dic_traverse_session.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
@@ -46,11 +46,11 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ int inputSize, const NgramContext *const ngramContext,
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
- traverseSession->init(this, prevWordsInfo, suggestOptions);
+ traverseSession->init(this, ngramContext, suggestOptions);
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
@@ -58,10 +58,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
}
Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
- const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds,
+ const NgramContext *const ngramContext, const WordIdArrayView prevWordIds,
SuggestionResults *const suggestionResults,
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
- : mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds),
+ : mNgramContext(ngramContext), mPrevWordIds(prevWordIds),
mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
@@ -69,7 +69,7 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
if (targetWordId == NOT_A_WORD_ID) {
return;
}
- if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
&& ngramProbability == NOT_A_PROBABILITY) {
return;
}
@@ -85,20 +85,20 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
wordAttributes.getProbability());
}
-void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
+void Dictionary::getPredictions(const NgramContext *const ngramContext,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
true /* tryLowerCaseSearch */);
- NgramListenerForPrediction listener(prevWordsInfo, prevWordIds, outSuggestionResults,
+ NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults,
mDictionaryStructureWithBufferPolicy.get());
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
}
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
- return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
+ return getNgramProbability(nullptr /* ngramContext */, codePoints);
}
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
@@ -107,18 +107,18 @@ int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoi
mDictionaryStructureWithBufferPolicy.get(), codePoints);
}
-int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
+int Dictionary::getNgramProbability(const NgramContext *const ngramContext,
const CodePointArrayView codePoints) const {
TimeKeeper::setCurrentTime();
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
- if (!prevWordsInfo) {
+ if (!ngramContext) {
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds
- (mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
+ mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
true /* tryLowerCaseSearch */);
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
}
@@ -140,24 +140,24 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
}
-bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty);
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty);
}
-bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
+ return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints);
}
-bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView codePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints,
- isValidWord, historicalInfo);
+ return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext,
+ codePoints, isValidWord, historicalInfo);
}
bool Dictionary::flush(const char *const filePath) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index a58dbfbd7..843aec473 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -33,7 +33,7 @@ namespace latinime {
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
-class PrevWordsInfo;
+class NgramContext;
class ProximityInfo;
class SuggestionResults;
class SuggestOptions;
@@ -66,18 +66,18 @@ class Dictionary {
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ int inputSize, const NgramContext *const ngramContext,
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const;
- void getPredictions(const PrevWordsInfo *const prevWordsInfo,
+ void getPredictions(const NgramContext *const ngramContext,
SuggestionResults *const outSuggestionResults) const;
int getProbability(const CodePointArrayView codePoints) const;
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
- int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
+ int getNgramProbability(const NgramContext *const ngramContext,
const CodePointArrayView codePoints) const;
bool addUnigramEntry(const CodePointArrayView codePoints,
@@ -85,13 +85,13 @@ class Dictionary {
bool removeUnigramEntry(const CodePointArrayView codePoints);
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView codePoints);
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView codePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
@@ -123,7 +123,7 @@ class Dictionary {
class NgramListenerForPrediction : public NgramListener {
public:
- NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
+ NgramListenerForPrediction(const NgramContext *const ngramContext,
const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults,
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
@@ -131,7 +131,7 @@ class Dictionary {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
- const PrevWordsInfo *const mPrevWordsInfo;
+ const NgramContext *const mNgramContext;
const WordIdArrayView mPrevWordIds;
SuggestionResults *const mSuggestionResults;
const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
index b85f3622a..9573c37bc 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
@@ -21,7 +21,7 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/int_array_view.h"
@@ -33,10 +33,10 @@ namespace latinime {
std::vector<DicNode> current;
std::vector<DicNode> next;
- // No prev words information.
- PrevWordsInfo emptyPrevWordsInfo;
+ // No ngram context.
+ NgramContext emptyNgramContext;
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = emptyPrevWordsInfo.getPrevWordIds(
+ const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
current.emplace_back();
DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
diff --git a/native/jni/src/suggest/core/dictionary/property/historical_info.h b/native/jni/src/suggest/core/dictionary/property/historical_info.h
index 5ed9ebfca..f9bd6fd8c 100644
--- a/native/jni/src/suggest/core/dictionary/property/historical_info.h
+++ b/native/jni/src/suggest/core/dictionary/property/historical_info.h
@@ -47,12 +47,12 @@ class HistoricalInfo {
}
private:
- // Default copy constructor and assign operator are used for using in std::vector.
+ // Default copy constructor is used for using in std::vector.
+ DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
- // TODO: Make members const.
- int mTimestamp;
- int mLevel;
- int mCount;
+ const int mTimestamp;
+ const int mLevel;
+ const int mCount;
};
} // namespace latinime
#endif /* LATINIME_HISTORICAL_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/property/ngram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
index dce460099..8709799f9 100644
--- a/native/jni/src/suggest/core/dictionary/property/ngram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
@@ -44,13 +44,13 @@ class NgramProperty {
}
private:
- // Default copy constructor and assign operator are used for using in std::vector.
+ // Default copy constructor is used for using in std::vector.
DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty);
+ DISALLOW_ASSIGNMENT_OPERATOR(NgramProperty);
- // TODO: Make members const.
- std::vector<int> mTargetCodePoints;
- int mProbability;
- HistoricalInfo mHistoricalInfo;
+ const std::vector<int> mTargetCodePoints;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
};
} // namespace latinime
#endif // LATINIME_NGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index d1f0ab4ca..5ed2e2602 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -41,12 +41,11 @@ class UnigramProperty {
}
private:
- // Default copy constructor and assign operator are used for using in std::vector.
+ // Default copy constructor is used for using in std::vector.
DISALLOW_DEFAULT_CONSTRUCTOR(ShortcutProperty);
- // TODO: Make members const.
- std::vector<int> mTargetCodePoints;
- int mProbability;
+ const std::vector<int> mTargetCodePoints;
+ const int mProbability;
};
UnigramProperty()
@@ -104,13 +103,12 @@ class UnigramProperty {
// Default copy constructor is used for using as a return value.
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
- // TODO: Make members const.
- bool mRepresentsBeginningOfSentence;
- bool mIsNotAWord;
- bool mIsBlacklisted;
- int mProbability;
- HistoricalInfo mHistoricalInfo;
- std::vector<ShortcutProperty> mShortcuts;
+ const bool mRepresentsBeginningOfSentence;
+ const bool mIsNotAWord;
+ const bool mIsBlacklisted;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+ const std::vector<ShortcutProperty> mShortcuts;
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 6624b7921..ceda5c03f 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -33,7 +33,7 @@ class DicNodeVector;
class DictionaryHeaderStructurePolicy;
class MultiBigramMap;
class NgramListener;
-class PrevWordsInfo;
+class NgramContext;
class UnigramProperty;
/*
@@ -81,15 +81,15 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not.
- virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ virtual bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) = 0;
// Returns whether the update was success or not.
- virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ virtual bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not.
- virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ virtual bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) = 0;
diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h
index 6dfa7e314..5b6616d9a 100644
--- a/native/jni/src/suggest/core/policy/traversal.h
+++ b/native/jni/src/suggest/core/policy/traversal.h
@@ -44,7 +44,7 @@ class Traversal {
virtual bool needsToTraverseAllUserInput() const = 0;
virtual float getMaxSpatialDistance() const = 0;
virtual int getDefaultExpandDicNodeSize() const = 0;
- virtual int getMaxCacheSize(const int inputSize) const = 0;
+ virtual int getMaxCacheSize(const int inputSize, const float weightForLocale) const = 0;
virtual int getTerminalCacheSize() const = 0;
virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index b4d01d0f0..52dc2f86c 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -20,7 +20,7 @@
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
namespace latinime {
@@ -30,12 +30,12 @@ const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_S
256 * 1024;
void DicTraverseSession::init(const Dictionary *const dictionary,
- const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
+ const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
- mPrevWordIdCount = prevWordsInfo->getPrevWordIds(getDictionaryStructurePolicy(),
+ mPrevWordIdCount = ngramContext->getPrevWordIds(getDictionaryStructurePolicy(),
&mPrevWordIdArray, true /* tryLowerCaseSearch */).size();
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 9f841aa3c..bc53167f0 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -30,7 +30,7 @@ namespace latinime {
class Dictionary;
class DictionaryStructureWithBufferPolicy;
-class PrevWordsInfo;
+class NgramContext;
class ProximityInfo;
class SuggestOptions;
@@ -61,7 +61,7 @@ class DicTraverseSession {
// Non virtual inline destructor -- never inherit this class
AK_FORCE_INLINE ~DicTraverseSession() {}
- void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
+ void init(const Dictionary *dictionary, const NgramContext *const ngramContext,
const SuggestOptions *const suggestOptions);
// TODO: Remove and merge into init
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/ngram_context.h
index 553d5ad07..64c71410f 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/ngram_context.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_PREV_WORDS_INFO_H
-#define LATINIME_PREV_WORDS_INFO_H
+#ifndef LATINIME_NGRAM_CONTEXT_H
+#define LATINIME_NGRAM_CONTEXT_H
#include <array>
@@ -26,25 +26,26 @@
namespace latinime {
-class PrevWordsInfo {
+// Rename to NgramContext.
+class NgramContext {
public:
// No prev word information.
- PrevWordsInfo() : mPrevWordCount(0) {
+ NgramContext() : mPrevWordCount(0) {
clear();
}
- PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
- : mPrevWordCount(prevWordsInfo.mPrevWordCount) {
+ NgramContext(const NgramContext &ngramContext)
+ : mPrevWordCount(ngramContext.mPrevWordCount) {
for (size_t i = 0; i < mPrevWordCount; ++i) {
- mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
- memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+ mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
- mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+ mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
}
}
// Construct from previous words.
- PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
const size_t prevWordCount)
: mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
@@ -61,7 +62,7 @@ class PrevWordsInfo {
}
// Construct from a previous word.
- PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
+ NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence) : mPrevWordCount(1) {
clear();
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
@@ -78,8 +79,8 @@ class PrevWordsInfo {
}
// TODO: Remove.
- const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
- return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
+ const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const {
+ return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
std::min(mPrevWordCount, maxPrevWordCount));
}
@@ -122,7 +123,7 @@ class PrevWordsInfo {
}
private:
- DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);
+ DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
@@ -165,4 +166,4 @@ class PrevWordsInfo {
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
-#endif // LATINIME_PREV_WORDS_INFO_H
+#endif // LATINIME_NGRAM_CONTEXT_H
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 457414f2b..cf2df86a6 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -28,6 +28,7 @@
#include "suggest/core/policy/weighting.h"
#include "suggest/core/result/suggestions_output_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/suggest_options.h"
namespace latinime {
@@ -88,7 +89,8 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
traverseSession->getDicTraverseCache()->continueSearch();
} else {
// Restart recognition at the root.
- traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
+ traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize(),
+ traverseSession->getSuggestOptions()->weightForLocale()),
TRAVERSAL->getTerminalCacheSize());
// Create a new dic node here
DicNode rootNode;
diff --git a/native/jni/src/suggest/core/suggest_options.h b/native/jni/src/suggest/core/suggest_options.h
index d456680dd..4d331292b 100644
--- a/native/jni/src/suggest/core/suggest_options.h
+++ b/native/jni/src/suggest/core/suggest_options.h
@@ -42,6 +42,12 @@ class SuggestOptions{
return getBoolOption(SPACE_AWARE_GESTURE_ENABLED);
}
+ AK_FORCE_INLINE float weightForLocale() const {
+ // The weight is in thousands and we want the real value, so we divide by 1000.
+ // NativeSuggestOptions#setWeightForLocale does the opposite processing in Java.
+ return static_cast<float>(getIntOption(WEIGHT_FOR_LOCALE_IN_THOUSANDS)) / 1000.0f;
+ }
+
AK_FORCE_INLINE bool getAdditionalFeaturesBoolOption(const int key) const {
return getBoolOption(key + ADDITIONAL_FEATURES_OPTIONS);
}
@@ -55,9 +61,10 @@ class SuggestOptions{
static const int USE_FULL_EDIT_DISTANCE = 1;
static const int BLOCK_OFFENSIVE_WORDS = 2;
static const int SPACE_AWARE_GESTURE_ENABLED = 3;
+ static const int WEIGHT_FOR_LOCALE_IN_THOUSANDS = 4;
// Additional features options are stored after the other options and used as setting values of
// experimental features.
- static const int ADDITIONAL_FEATURES_OPTIONS = 4;
+ static const int ADDITIONAL_FEATURES_OPTIONS = 5;
const int *const mOptions;
const int mLength;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 8d169743c..6243f14cc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -310,7 +310,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
const int shortcutProbability) {
if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
targetCodePoints, targetCodePointCount, shortcutProbability)) {
- AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ AKLOGE("Cannot add new shortcut entry. terminalId: %d", ptNodeParams->getTerminalId());
return false;
}
if (!ptNodeParams->hasShortcutTargets()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 36eafa1e9..0eae934ae 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -33,7 +33,7 @@
#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -186,7 +186,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI
if (bigramsIt.getBigramPos() == ptNodePos
&& bigramsIt.getProbability() != NOT_A_PROBABILITY) {
const int bigramConditionalProbability = getBigramConditionalProbability(
- prevWordPtNodeParams.getProbability(), bigramsIt.getProbability());
+ prevWordPtNodeParams.getProbability(),
+ prevWordPtNodeParams.representsBeginningOfSentence(),
+ bigramsIt.getProbability());
return getProbability(ptNodeParams.getProbability(), bigramConditionalProbability);
}
}
@@ -209,15 +211,19 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI
while (bigramsIt.hasNext()) {
bigramsIt.next();
const int bigramConditionalProbability = getBigramConditionalProbability(
- prevWordPtNodeParams.getProbability(), bigramsIt.getProbability());
+ prevWordPtNodeParams.getProbability(),
+ prevWordPtNodeParams.representsBeginningOfSentence(), bigramsIt.getProbability());
listener->onVisitEntry(bigramConditionalProbability,
getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()));
}
}
int Ver4PatriciaTriePolicy::getBigramConditionalProbability(const int prevWordUnigramProbability,
- const int bigramProbability) const {
+ const bool isInBeginningOfSentenceContext, const int bigramProbability) const {
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ if (isInBeginningOfSentenceContext) {
+ return bigramProbability;
+ }
// Calculate conditional probability.
return std::min(MAX_PROBABILITY - prevWordUnigramProbability + bigramProbability,
MAX_PROBABILITY);
@@ -338,7 +344,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
}
-bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
@@ -349,8 +355,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false;
}
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
@@ -359,23 +365,23 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSearch */);
if (prevWordIds.empty()) {
return false;
}
if (prevWordIds[0] == NOT_A_WORD_ID) {
- if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
+ if (ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
- if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
- prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
+ ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
} else {
return false;
}
@@ -399,7 +405,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
}
-bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
@@ -410,8 +416,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
return false;
}
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
@@ -419,7 +425,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
wordCodePoints.size());
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSerch */);
if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) {
return false;
@@ -440,26 +446,27 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
-bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
- const CodePointArrayView wordCodePoints, const bool isValidWord,
- const HistoricalInfo historicalInfo) {
+bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
+ const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
+ const bool isValidWord, const HistoricalInfo historicalInfo) {
if (!mBuffers->isUpdatable()) {
- AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
+ "dictionary.");
return false;
}
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
- AKLOGE("Cannot update unigarm entry in updateCounter().");
+ AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
- const int probabilityForNgram = prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
? NOT_A_PROBABILITY : probability;
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
historicalInfo);
- if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
- AKLOGE("Cannot update unigarm entry in updateCounter().");
+ if (!addNgramEntry(ngramContext, &ngramProperty)) {
+ AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index b82563e61..1ad5e7e36 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -112,13 +112,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints);
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
@@ -175,7 +175,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordAttributes getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const;
int getBigramConditionalProbability(const int prevWordUnigramProbability,
- const int bigramProbability) const;
+ const bool isInBeginningOfSentenceContext, const int bigramProbability) const;
};
} // namespace v402
} // namespace backward
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index d3d684bfa..b7f1199c5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -23,7 +23,7 @@
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 32a95bb6c..b17681388 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -93,25 +93,26 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
}
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
return false;
}
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
// This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
+ "dictionary.");
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
index dc0ed96d0..90d4687dd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -37,7 +37,7 @@ const PtNodeParams Ver2ParticiaTrieNodeReader::fetchPtNodeParamsInBufferFromPtNo
int shortcutPos = NOT_A_DICT_POS;
int bigramPos = NOT_A_DICT_POS;
int siblingPos = NOT_A_DICT_POS;
- PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortuctPolicy,
+ PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortcutPolicy,
mBigramPolicy, mCodePointTable, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
if (mergedNodeCodePointCount <= 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
index 24ec5bcca..838d37314 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -35,7 +35,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader {
const DictionaryBigramsStructurePolicy *const bigramPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const int *const codePointTable)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy),
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mCodePointTable(codePointTable) {}
virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const;
@@ -45,7 +45,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader {
const ReadOnlyByteArrayView mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramPolicy;
- const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const int *const mCodePointTable;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index f13512d5a..d28006ae9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -285,7 +285,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
const int shortcutProbability) {
if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
targetCodePoints, targetCodePointCount, shortcutProbability)) {
- AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ AKLOGE("Cannot add new shortcut entry. terminalId: %d", ptNodeParams->getTerminalId());
return false;
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 036197c41..445bbe07e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -26,7 +26,7 @@
#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -266,7 +266,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return true;
}
-bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
@@ -277,8 +277,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false;
}
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
@@ -287,7 +287,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSearch */);
if (prevWordIds.empty()) {
return false;
@@ -296,19 +296,19 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (prevWordIds[i] != NOT_A_WORD_ID) {
continue;
}
- if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
+ if (!ngramContext->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
return false;
}
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
- if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
- prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
+ ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
}
const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */);
@@ -326,7 +326,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
}
-bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
@@ -337,8 +337,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
return false;
}
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
@@ -346,7 +346,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
wordCodePoints.size());
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSerch */);
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
return false;
@@ -363,29 +363,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
-bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
- const CodePointArrayView wordCodePoints, const bool isValidWord,
- const HistoricalInfo historicalInfo) {
+bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
+ const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
+ const bool isValidWord, const HistoricalInfo historicalInfo) {
if (!mBuffers->isUpdatable()) {
- AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
+ "dictionary.");
return false;
}
// TODO: Have count up method in language model dict content.
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
- false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
+ false /* isNotAWord */, false /* isBlacklisted */, probability, historicalInfo);
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
- AKLOGE("Cannot update unigarm entry in updateCounter().");
+ AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
- const int probabilityForNgram = prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
? NOT_A_PROBABILITY : probability;
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
historicalInfo);
- for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
- const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
- if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
- AKLOGE("Cannot update ngram entry in updateCounter().");
+ for (size_t i = 1; i <= ngramContext->getPrevWordCount(); ++i) {
+ const NgramContext trimmedNgramContext(ngramContext->getTrimmedNgramContext(i));
+ if (!addNgramEntry(&trimmedNgramContext, &ngramProperty)) {
+ AKLOGE("Cannot update ngram entry in updateEntriesForWordWithNgramContext().");
return false;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 662bb8d4b..60e30f209 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -92,13 +92,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints);
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index 3fc566e7a..b621eef22 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -31,6 +31,7 @@ const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
// TODO: Unlimit max cache dic node size
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
+const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_LOW_PROBABILITY_LOCALE = 50;
const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
@@ -61,4 +62,7 @@ const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
+const float ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_SUBSTITUTION = 0.99f;
+const float ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_OMISSION = 0.99f;
+const float ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SMALL_CACHE_SIZE = 0.99f;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index b12de6d87..731424f3d 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -30,6 +30,7 @@ class ScoringParams {
static const float AUTOCORRECT_OUTPUT_THRESHOLD;
static const int MAX_CACHE_DIC_NODE_SIZE;
static const int MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
+ static const int MAX_CACHE_DIC_NODE_SIZE_FOR_LOW_PROBABILITY_LOCALE;
static const int THRESHOLD_SHORT_WORD_LENGTH;
static const float EXACT_MATCH_PROMOTION;
@@ -68,6 +69,9 @@ class ScoringParams {
static const float TYPING_BASE_OUTPUT_SCORE;
static const float TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
static const float NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT;
+ static const float LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_SUBSTITUTION;
+ static const float LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_OMISSION;
+ static const float LOCALE_WEIGHT_THRESHOLD_FOR_SMALL_CACHE_SIZE;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ScoringParams);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index b64ee8be4..b9b6314ae 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -26,6 +26,7 @@
#include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/typing/scoring_params.h"
#include "utils/char_utils.h"
@@ -77,6 +78,13 @@ class TypingTraversal : public Traversal {
if (!CORRECT_NEW_WORD_SPACE_SUBSTITUTION) {
return false;
}
+ if (traverseSession->getSuggestOptions()->weightForLocale()
+ < ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_SUBSTITUTION) {
+ // Space substitution is heavy, so we skip doing it if the weight for this language
+ // is low because we anticipate the suggestions out of this dictionary are not for
+ // the language the user intends to type in.
+ return false;
+ }
if (!canDoLookAheadCorrection(traverseSession, dicNode)) {
return false;
}
@@ -91,6 +99,13 @@ class TypingTraversal : public Traversal {
if (!CORRECT_NEW_WORD_SPACE_OMISSION) {
return false;
}
+ if (traverseSession->getSuggestOptions()->weightForLocale()
+ < ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_OMISSION) {
+ // Space omission is heavy, so we skip doing it if the weight for this language
+ // is low because we anticipate the suggestions out of this dictionary are not for
+ // the language the user intends to type in.
+ return false;
+ }
const int inputSize = traverseSession->getInputSize();
// TODO: Don't refer to isCompletion?
if (dicNode->isCompletion(inputSize)) {
@@ -141,9 +156,14 @@ class TypingTraversal : public Traversal {
return DicNodeVector::DEFAULT_NODES_SIZE_FOR_OPTIMIZATION;
}
- AK_FORCE_INLINE int getMaxCacheSize(const int inputSize) const {
- return (inputSize <= 1) ? ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT
- : ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
+ AK_FORCE_INLINE int getMaxCacheSize(const int inputSize, const float weightForLocale) const {
+ if (inputSize <= 1) {
+ return ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
+ }
+ if (weightForLocale < ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SMALL_CACHE_SIZE) {
+ return ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_LOW_PROBABILITY_LOCALE;
+ }
+ return ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
}
AK_FORCE_INLINE int getTerminalCacheSize() const {
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index 235a03bba..25cc41742 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "jni.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "utils/char_utils.h"
@@ -96,7 +96,7 @@ class JniDataUtils {
}
}
- static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+ static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
@@ -119,7 +119,7 @@ class JniDataUtils {
&isBeginningOfSentenceBoolean);
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
}
- return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+ return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
prevWordCount);
}