aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp45
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h18
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary_utils.cpp8
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h8
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp6
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h4
-rw-r--r--native/jni/src/suggest/core/session/ngram_context.h (renamed from native/jni/src/suggest/core/session/prev_words_info.h)31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp124
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp55
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h5
-rw-r--r--native/jni/src/utils/jni_data_utils.h6
24 files changed, 215 insertions, 225 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index e55c9eb8a..885118524 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -119,7 +119,7 @@ static inline void dumpWordInfo(const int *word, const int length, const int ran
const int probability) {
static char charBuf[50];
const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
- if (N > 1) {
+ if (N > 0) {
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
}
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 8d3f8a9f8..697e99ffb 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -23,7 +23,7 @@
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/dic_traverse_session.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
@@ -46,25 +46,22 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ int inputSize, const NgramContext *const ngramContext,
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
- traverseSession->init(this, prevWordsInfo, suggestOptions);
+ traverseSession->init(this, ngramContext, suggestOptions);
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
weightOfLangModelVsSpatialModel, outSuggestionResults);
- if (DEBUG_DICT) {
- outSuggestionResults->dumpSuggestions();
- }
}
Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
- const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds,
+ const NgramContext *const ngramContext, const WordIdArrayView prevWordIds,
SuggestionResults *const suggestionResults,
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
- : mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds),
+ : mNgramContext(ngramContext), mPrevWordIds(prevWordIds),
mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
@@ -72,7 +69,7 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
if (targetWordId == NOT_A_WORD_ID) {
return;
}
- if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
&& ngramProbability == NOT_A_PROBABILITY) {
return;
}
@@ -88,20 +85,20 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
wordAttributes.getProbability());
}
-void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
+void Dictionary::getPredictions(const NgramContext *const ngramContext,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
true /* tryLowerCaseSearch */);
- NgramListenerForPrediction listener(prevWordsInfo, prevWordIds, outSuggestionResults,
+ NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults,
mDictionaryStructureWithBufferPolicy.get());
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
}
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
- return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
+ return getNgramProbability(nullptr /* ngramContext */, codePoints);
}
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
@@ -110,18 +107,18 @@ int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoi
mDictionaryStructureWithBufferPolicy.get(), codePoints);
}
-int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
+int Dictionary::getNgramProbability(const NgramContext *const ngramContext,
const CodePointArrayView codePoints) const {
TimeKeeper::setCurrentTime();
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
- if (!prevWordsInfo) {
+ if (!ngramContext) {
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds
- (mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
+ mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
true /* tryLowerCaseSearch */);
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
}
@@ -143,24 +140,24 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
}
-bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty);
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty);
}
-bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView codePoints) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
+ return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints);
}
-bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView codePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints,
- isValidWord, historicalInfo);
+ return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext,
+ codePoints, isValidWord, historicalInfo);
}
bool Dictionary::flush(const char *const filePath) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index a58dbfbd7..843aec473 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -33,7 +33,7 @@ namespace latinime {
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
-class PrevWordsInfo;
+class NgramContext;
class ProximityInfo;
class SuggestionResults;
class SuggestOptions;
@@ -66,18 +66,18 @@ class Dictionary {
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ int inputSize, const NgramContext *const ngramContext,
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const;
- void getPredictions(const PrevWordsInfo *const prevWordsInfo,
+ void getPredictions(const NgramContext *const ngramContext,
SuggestionResults *const outSuggestionResults) const;
int getProbability(const CodePointArrayView codePoints) const;
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
- int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
+ int getNgramProbability(const NgramContext *const ngramContext,
const CodePointArrayView codePoints) const;
bool addUnigramEntry(const CodePointArrayView codePoints,
@@ -85,13 +85,13 @@ class Dictionary {
bool removeUnigramEntry(const CodePointArrayView codePoints);
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView codePoints);
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView codePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
@@ -123,7 +123,7 @@ class Dictionary {
class NgramListenerForPrediction : public NgramListener {
public:
- NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
+ NgramListenerForPrediction(const NgramContext *const ngramContext,
const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults,
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
@@ -131,7 +131,7 @@ class Dictionary {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
- const PrevWordsInfo *const mPrevWordsInfo;
+ const NgramContext *const mNgramContext;
const WordIdArrayView mPrevWordIds;
SuggestionResults *const mSuggestionResults;
const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
index b85f3622a..9573c37bc 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
@@ -21,7 +21,7 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/int_array_view.h"
@@ -33,10 +33,10 @@ namespace latinime {
std::vector<DicNode> current;
std::vector<DicNode> next;
- // No prev words information.
- PrevWordsInfo emptyPrevWordsInfo;
+ // No ngram context.
+ NgramContext emptyNgramContext;
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = emptyPrevWordsInfo.getPrevWordIds(
+ const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
current.emplace_back();
DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 6624b7921..ceda5c03f 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -33,7 +33,7 @@ class DicNodeVector;
class DictionaryHeaderStructurePolicy;
class MultiBigramMap;
class NgramListener;
-class PrevWordsInfo;
+class NgramContext;
class UnigramProperty;
/*
@@ -81,15 +81,15 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not.
- virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ virtual bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) = 0;
// Returns whether the update was success or not.
- virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ virtual bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not.
- virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ virtual bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) = 0;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index b4d01d0f0..52dc2f86c 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -20,7 +20,7 @@
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
namespace latinime {
@@ -30,12 +30,12 @@ const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_S
256 * 1024;
void DicTraverseSession::init(const Dictionary *const dictionary,
- const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
+ const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
- mPrevWordIdCount = prevWordsInfo->getPrevWordIds(getDictionaryStructurePolicy(),
+ mPrevWordIdCount = ngramContext->getPrevWordIds(getDictionaryStructurePolicy(),
&mPrevWordIdArray, true /* tryLowerCaseSearch */).size();
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 9f841aa3c..bc53167f0 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -30,7 +30,7 @@ namespace latinime {
class Dictionary;
class DictionaryStructureWithBufferPolicy;
-class PrevWordsInfo;
+class NgramContext;
class ProximityInfo;
class SuggestOptions;
@@ -61,7 +61,7 @@ class DicTraverseSession {
// Non virtual inline destructor -- never inherit this class
AK_FORCE_INLINE ~DicTraverseSession() {}
- void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
+ void init(const Dictionary *dictionary, const NgramContext *const ngramContext,
const SuggestOptions *const suggestOptions);
// TODO: Remove and merge into init
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/ngram_context.h
index 553d5ad07..64c71410f 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/ngram_context.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_PREV_WORDS_INFO_H
-#define LATINIME_PREV_WORDS_INFO_H
+#ifndef LATINIME_NGRAM_CONTEXT_H
+#define LATINIME_NGRAM_CONTEXT_H
#include <array>
@@ -26,25 +26,26 @@
namespace latinime {
-class PrevWordsInfo {
+// Rename to NgramContext.
+class NgramContext {
public:
// No prev word information.
- PrevWordsInfo() : mPrevWordCount(0) {
+ NgramContext() : mPrevWordCount(0) {
clear();
}
- PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
- : mPrevWordCount(prevWordsInfo.mPrevWordCount) {
+ NgramContext(const NgramContext &ngramContext)
+ : mPrevWordCount(ngramContext.mPrevWordCount) {
for (size_t i = 0; i < mPrevWordCount; ++i) {
- mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
- memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+ mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
- mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+ mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
}
}
// Construct from previous words.
- PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
const size_t prevWordCount)
: mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
@@ -61,7 +62,7 @@ class PrevWordsInfo {
}
// Construct from a previous word.
- PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
+ NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence) : mPrevWordCount(1) {
clear();
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
@@ -78,8 +79,8 @@ class PrevWordsInfo {
}
// TODO: Remove.
- const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
- return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
+ const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const {
+ return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
std::min(mPrevWordCount, maxPrevWordCount));
}
@@ -122,7 +123,7 @@ class PrevWordsInfo {
}
private:
- DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);
+ DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
@@ -165,4 +166,4 @@ class PrevWordsInfo {
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
-#endif // LATINIME_PREV_WORDS_INFO_H
+#endif // LATINIME_NGRAM_CONTEXT_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 6ed65d921..4c4dfc578 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -35,23 +35,15 @@ const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE
// count.
const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
-const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY =
- "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP";
const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
"FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
-const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY =
- "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS";
const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
-const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2;
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
-// 30 days
-const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS =
- 30 * 24 * 60 * 60;
const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index daf40d4f9..bc8eaded3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -53,15 +53,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
- mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
- DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
- mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
- DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
@@ -86,15 +80,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
- mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
- DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
- mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
- DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
@@ -113,12 +101,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
- mForgettingCurveOccurrencesToLevelUp(
- headerPolicy->mForgettingCurveOccurrencesToLevelUp),
mForgettingCurveProbabilityValuesTableId(
headerPolicy->mForgettingCurveProbabilityValuesTableId),
- mForgettingCurveDurationToLevelDown(
- headerPolicy->mForgettingCurveDurationToLevelDown),
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
mMaxBigramCount(headerPolicy->mMaxBigramCount),
mCodePointTable(headerPolicy->mCodePointTable) {}
@@ -130,8 +114,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
- mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
- mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0),
+ mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0),
mCodePointTable(nullptr) {}
~HeaderPolicy() {}
@@ -217,18 +200,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return &mAttributeMap;
}
- AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
- return mForgettingCurveOccurrencesToLevelUp;
- }
-
AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
return mForgettingCurveProbabilityValuesTableId;
}
- AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const {
- return mForgettingCurveDurationToLevelDown;
- }
-
AK_FORCE_INLINE int getMaxUnigramCount() const {
return mMaxUnigramCount;
}
@@ -280,9 +255,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const MAX_BIGRAM_COUNT_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
- static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
- static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS;
static const int DEFAULT_MAX_UNIGRAM_COUNT;
static const int DEFAULT_MAX_BIGRAM_COUNT;
@@ -300,9 +273,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mBigramCount;
const int mExtendedRegionSize;
const bool mHasHistoricalInfoOfWords;
- const int mForgettingCurveOccurrencesToLevelUp;
const int mForgettingCurveProbabilityValuesTableId;
- const int mForgettingCurveDurationToLevelDown;
const int mMaxUnigramCount;
const int mMaxBigramCount;
const int *const mCodePointTable;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
index 4a740d47b..ef6166ffd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
@@ -74,8 +74,8 @@ bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
return false;
}
writingPos += getEntrySize();
- mSize++;
}
+ mSize = terminalId + 1;
}
return writeEntry(probabilityEntry, entryPos);
}
@@ -100,7 +100,6 @@ bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
bool ProbabilityDictContent::runGC(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ProbabilityDictContent *const originalProbabilityDictContent) {
- mSize = 0;
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
it != terminalIdMap->end(); ++it) {
const ProbabilityEntry probabilityEntry =
@@ -109,7 +108,6 @@ bool ProbabilityDictContent::runGC(
AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
return false;
}
- mSize++;
}
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 8d169743c..6243f14cc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -310,7 +310,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
const int shortcutProbability) {
if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
targetCodePoints, targetCodePointCount, shortcutProbability)) {
- AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ AKLOGE("Cannot add new shortcut entry. terminalId: %d", ptNodeParams->getTerminalId());
return false;
}
if (!ptNodeParams->hasShortcutTargets()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 11f7b305f..1c61bd4b9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -33,7 +33,7 @@
#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -146,18 +146,15 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probabi
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
- if (mHeaderPolicy->isDecayingDict()) {
- // Both probabilities are encoded. Decode them and get probability.
- return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
- } else {
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return ProbabilityUtils::backoff(unigramProbability);
- } else {
- return bigramProbability;
- }
+ // In the v4 format, bigramProbability is a conditional probability.
+ const int bigramConditionalProbability = bigramProbability;
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
}
+ if (bigramConditionalProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ }
+ return bigramConditionalProbability;
}
int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
@@ -170,37 +167,66 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
- if (!prevWordIds.empty()) {
- const int bigramsPosition = getBigramsPositionOfPtNode(
- getTerminalPtNodePosFromWordId(prevWordIds[0]));
- BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == ptNodePos
- && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
- return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
- }
- }
+ if (prevWordIds.empty()) {
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+ }
+ if (prevWordIds[0] == NOT_A_WORD_ID) {
return NOT_A_PROBABILITY;
}
- return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+ const PtNodeParams prevWordPtNodeParams =
+ mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordIds[0]);
+ if (prevWordPtNodeParams.isDeleted()) {
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+ }
+ const int bigramsPosition = mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ prevWordPtNodeParams.getTerminalId());
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == ptNodePos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ const int bigramConditionalProbability = getBigramConditionalProbability(
+ prevWordPtNodeParams.getProbability(), bigramsIt.getProbability());
+ return getProbability(ptNodeParams.getProbability(), bigramConditionalProbability);
+ }
+ }
+ return NOT_A_PROBABILITY;
}
void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds,
NgramListener *const listener) const {
- if (prevWordIds.empty()) {
+ if (prevWordIds.firstOrDefault(NOT_A_DICT_POS) == NOT_A_DICT_POS) {
+ return;
+ }
+ const PtNodeParams prevWordPtNodeParams =
+ mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordIds[0]);
+ if (prevWordPtNodeParams.isDeleted()) {
return;
}
- const int bigramsPosition = getBigramsPositionOfPtNode(
- getTerminalPtNodePosFromWordId(prevWordIds[0]));
+ const int bigramsPosition = mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ prevWordPtNodeParams.getTerminalId());
BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- listener->onVisitEntry(bigramsIt.getProbability(),
+ const int bigramConditionalProbability = getBigramConditionalProbability(
+ prevWordPtNodeParams.getProbability(), bigramsIt.getProbability());
+ listener->onVisitEntry(bigramConditionalProbability,
getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()));
}
}
+int Ver4PatriciaTriePolicy::getBigramConditionalProbability(const int prevWordUnigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ // Calculate conditional probability.
+ return std::min(MAX_PROBABILITY - prevWordUnigramProbability + bigramProbability,
+ MAX_PROBABILITY);
+ } else {
+ // bigramProbability is a conditional probability.
+ return bigramProbability;
+ }
+}
+
BinaryDictionaryShortcutIterator Ver4PatriciaTriePolicy::getShortcutIterator(
const int wordId) const {
const int shortcutPos = getShortcutPositionOfPtNode(getTerminalPtNodePosFromWordId(wordId));
@@ -312,7 +338,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
}
-bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
@@ -323,8 +349,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false;
}
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
@@ -333,23 +359,23 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSearch */);
if (prevWordIds.empty()) {
return false;
}
if (prevWordIds[0] == NOT_A_WORD_ID) {
- if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
+ if (ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
- if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
- prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
+ ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
} else {
return false;
}
@@ -373,7 +399,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
}
-bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
@@ -384,8 +410,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
return false;
}
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
@@ -393,7 +419,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
wordCodePoints.size());
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSerch */);
if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) {
return false;
@@ -414,23 +440,27 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
-bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
- const CodePointArrayView wordCodePoints, const bool isValidWord,
- const HistoricalInfo historicalInfo) {
+bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
+ const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
+ const bool isValidWord, const HistoricalInfo historicalInfo) {
if (!mBuffers->isUpdatable()) {
- AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
+ "dictionary.");
return false;
}
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
- AKLOGE("Cannot update unigarm entry in updateCounter().");
+ AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
- const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
- if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
- AKLOGE("Cannot update unigarm entry in updateCounter().");
+ const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
+ ? NOT_A_PROBABILITY : probability;
+ const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
+ historicalInfo);
+ if (!addNgramEntry(ngramContext, &ngramProperty)) {
+ AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 995d7764f..4aa399c3e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -112,13 +112,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints);
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
@@ -174,6 +174,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getTerminalPtNodePosFromWordId(const int wordId) const;
const WordAttributes getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const;
+ int getBigramConditionalProbability(const int prevWordUnigramProbability,
+ const int bigramProbability) const;
};
} // namespace v402
} // namespace backward
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index d3d684bfa..b7f1199c5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -23,7 +23,7 @@
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 32a95bb6c..b17681388 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -93,25 +93,26 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
}
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
return false;
}
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
// This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
+ "dictionary.");
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
index dc0ed96d0..90d4687dd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -37,7 +37,7 @@ const PtNodeParams Ver2ParticiaTrieNodeReader::fetchPtNodeParamsInBufferFromPtNo
int shortcutPos = NOT_A_DICT_POS;
int bigramPos = NOT_A_DICT_POS;
int siblingPos = NOT_A_DICT_POS;
- PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortuctPolicy,
+ PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortcutPolicy,
mBigramPolicy, mCodePointTable, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
if (mergedNodeCodePointCount <= 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
index 24ec5bcca..838d37314 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -35,7 +35,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader {
const DictionaryBigramsStructurePolicy *const bigramPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const int *const codePointTable)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy),
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mCodePointTable(codePointTable) {}
virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const;
@@ -45,7 +45,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader {
const ReadOnlyByteArrayView mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramPolicy;
- const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const int *const mCodePointTable;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index f13512d5a..d28006ae9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -285,7 +285,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN
const int shortcutProbability) {
if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
targetCodePoints, targetCodePointCount, shortcutProbability)) {
- AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ AKLOGE("Cannot add new shortcut entry. terminalId: %d", ptNodeParams->getTerminalId());
return false;
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 41b109f95..445bbe07e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -26,7 +26,7 @@
#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -266,7 +266,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return true;
}
-bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
@@ -277,8 +277,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false;
}
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
@@ -287,7 +287,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSearch */);
if (prevWordIds.empty()) {
return false;
@@ -296,19 +296,19 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (prevWordIds[i] != NOT_A_WORD_ID) {
continue;
}
- if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
+ if (!ngramContext->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
return false;
}
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
- if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
- prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
+ ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
}
const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */);
@@ -326,7 +326,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
}
-bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
@@ -337,8 +337,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
mDictBuffer->getTailPosition());
return false;
}
- if (!prevWordsInfo->isValid()) {
- AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ if (!ngramContext->isValid()) {
+ AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
return false;
}
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
@@ -346,7 +346,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
wordCodePoints.size());
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
- const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
+ const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSerch */);
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
return false;
@@ -363,26 +363,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
-bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
- const CodePointArrayView wordCodePoints, const bool isValidWord,
- const HistoricalInfo historicalInfo) {
+bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
+ const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
+ const bool isValidWord, const HistoricalInfo historicalInfo) {
if (!mBuffers->isUpdatable()) {
- AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
+ "dictionary.");
return false;
}
// TODO: Have count up method in language model dict content.
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
- false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
+ false /* isNotAWord */, false /* isBlacklisted */, probability, historicalInfo);
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
- AKLOGE("Cannot update unigarm entry in updateCounter().");
+ AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
- const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
- for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
- const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
- if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
- AKLOGE("Cannot update ngram entry in updateCounter().");
+ const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
+ ? NOT_A_PROBABILITY : probability;
+ const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
+ historicalInfo);
+ for (size_t i = 1; i <= ngramContext->getPrevWordCount(); ++i) {
+ const NgramContext trimmedNgramContext(ngramContext->getTrimmedNgramContext(i));
+ if (!addNgramEntry(&trimmedNgramContext, &ngramProperty)) {
+ AKLOGE("Cannot update ngram entry in updateEntriesForWordWithNgramContext().");
return false;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 662bb8d4b..60e30f209 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -92,13 +92,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
- bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool addNgramEntry(const NgramContext *const ngramContext,
const NgramProperty *const ngramProperty);
- bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints);
- bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index af4bc186a..e5ef2abf8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -29,10 +29,14 @@ namespace latinime {
const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
-const int ForgettingCurveUtils::MAX_LEVEL = 3;
-const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1;
-const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
-const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
+const int ForgettingCurveUtils::MAX_LEVEL = 15;
+const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 2;
+const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 31;
+const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 30;
+const int ForgettingCurveUtils::OCCURRENCES_TO_RAISE_THE_LEVEL = 1;
+// TODO: Evaluate whether this should be 7.5 days.
+// 15 days
+const int ForgettingCurveUtils::DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS = 15 * 24 * 60 * 60;
const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
@@ -54,19 +58,23 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|| (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel()
&& originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) {
// Initial information.
+ int count = newHistoricalInfo->getCount();
+ if (count >= OCCURRENCES_TO_RAISE_THE_LEVEL) {
+ const int level = clampToValidLevelRange(newHistoricalInfo->getLevel() + 1);
+ return HistoricalInfo(timestamp, level, 0 /* count */);
+ }
const int level = clampToValidLevelRange(newHistoricalInfo->getLevel());
- const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
- return HistoricalInfo(timestamp, level, count);
+ return HistoricalInfo(timestamp, level, clampToValidCountRange(count, headerPolicy));
} else {
const int updatedCount = originalHistoricalInfo->getCount() + 1;
- if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
+ if (updatedCount >= OCCURRENCES_TO_RAISE_THE_LEVEL) {
// The count exceeds the max value the level can be incremented.
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
// The level is already max.
return HistoricalInfo(timestamp,
originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount());
} else {
- // Level up.
+ // Raise the level.
return HistoricalInfo(timestamp,
originalHistoricalInfo->getLevel() + 1, 0 /* count */);
}
@@ -79,31 +87,18 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ int ForgettingCurveUtils::decodeProbability(
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(),
- headerPolicy->getForgettingCurveDurationToLevelDown());
+ DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS);
return sProbabilityTable.getProbability(
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
clampToValidLevelRange(historicalInfo->getLevel()),
clampToValidTimeStepCountRange(elapsedTimeStepCount));
}
-/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
- const int bigramProbability) {
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return std::min(backoff(unigramProbability), MAX_PROBABILITY);
- } else {
- // TODO: Investigate better way to handle bigram probability.
- return std::min(std::max(unigramProbability,
- bigramProbability + MULTIPLIER_TWO_IN_PROBABILITY_SCALE), MAX_PROBABILITY);
- }
-}
-
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy) {
return historicalInfo->getLevel() > 0
|| getElapsedTimeStepCount(historicalInfo->getTimestamp(),
- headerPolicy->getForgettingCurveDurationToLevelDown())
+ DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS)
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
@@ -113,14 +108,14 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo();
}
- const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
+ const int durationToLevelDownInSeconds = DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS;
const int elapsedTimeStep = getElapsedTimeStepCount(
originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds);
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info.
return *originalHistoricalInfo;
}
- // Level down.
+ // Lower the level.
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
@@ -170,7 +165,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count,
const HeaderPolicy *const headerPolicy) {
- return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1);
+ return std::min(std::max(count, 0), OCCURRENCES_TO_RAISE_THE_LEVEL - 1);
}
/* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) {
@@ -187,9 +182,9 @@ const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID =
const int ForgettingCurveUtils::ProbabilityTable::STRONG_PROBABILITY_TABLE_ID = 2;
const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_PROBABILITY_TABLE_ID = 3;
const int ForgettingCurveUtils::ProbabilityTable::WEAK_MAX_PROBABILITY = 127;
-const int ForgettingCurveUtils::ProbabilityTable::MODEST_BASE_PROBABILITY = 32;
-const int ForgettingCurveUtils::ProbabilityTable::STRONG_BASE_PROBABILITY = 35;
-const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_BASE_PROBABILITY = 40;
+const int ForgettingCurveUtils::ProbabilityTable::MODEST_BASE_PROBABILITY = 8;
+const int ForgettingCurveUtils::ProbabilityTable::STRONG_BASE_PROBABILITY = 9;
+const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_BASE_PROBABILITY = 10;
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
@@ -202,7 +197,7 @@ ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
const float endProbability = getBaseProbabilityForLevel(tableId, level - 1);
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT;
++timeStepCount) {
- if (level == 0) {
+ if (level < MIN_VISIBLE_LEVEL) {
mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY;
continue;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 10abb405a..ccbc4a98d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -39,9 +39,6 @@ class ForgettingCurveUtils {
static int decodeProbability(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy);
- static int getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbability);
-
static bool needsToKeep(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy);
@@ -101,6 +98,8 @@ class ForgettingCurveUtils {
static const int MIN_VISIBLE_LEVEL;
static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+ static const int OCCURRENCES_TO_RAISE_THE_LEVEL;
+ static const int DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS;
static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT;
static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT;
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index 235a03bba..25cc41742 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "jni.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "utils/char_utils.h"
@@ -96,7 +96,7 @@ class JniDataUtils {
}
}
- static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+ static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
@@ -119,7 +119,7 @@ class JniDataUtils {
&isBeginningOfSentenceBoolean);
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
}
- return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+ return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
prevWordCount);
}