aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp12
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h7
-rw-r--r--native/jni/src/suggest/core/dictionary/property/historical_info.h (renamed from native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h)12
-rw-r--r--native/jni/src/suggest/core/dictionary/property/ngram_property.h (renamed from native/jni/src/suggest/core/dictionary/property/bigram_property.h)34
-rw-r--r--native/jni/src/suggest/core/dictionary/property/unigram_property.h34
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.cpp21
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.h14
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h8
-rw-r--r--native/jni/src/suggest/core/session/prev_words_info.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp55
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h2
36 files changed, 245 insertions, 175 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index a3bb408c3..8d3f8a9f8 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -144,9 +144,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
}
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty) {
+ const NgramProperty *const ngramProperty) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty);
}
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
@@ -155,6 +155,14 @@ bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
}
+bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView codePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints,
+ isValidWord, historicalInfo);
+}
+
bool Dictionary::flush(const char *const filePath) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->flush(filePath);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 324e3504a..a58dbfbd7 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/ngram_listener.h"
+#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@@ -85,11 +86,15 @@ class Dictionary {
bool removeUnigramEntry(const CodePointArrayView codePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty);
+ const NgramProperty *const ngramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints);
+ bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView codePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo);
+
bool flush(const char *const filePath);
bool flushWithGC(const char *const filePath);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/core/dictionary/property/historical_info.h
index 428ca8626..5ed9ebfca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
+++ b/native/jni/src/suggest/core/dictionary/property/historical_info.h
@@ -34,7 +34,7 @@ class HistoricalInfo {
return mTimestamp != NOT_A_TIMESTAMP;
}
- int getTimeStamp() const {
+ int getTimestamp() const {
return mTimestamp;
}
@@ -47,12 +47,12 @@ class HistoricalInfo {
}
private:
- // Copy constructor is public to use this class as a type of return value.
- DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
+ // Default copy constructor and assign operator are used for using in std::vector.
- const int mTimestamp;
- const int mLevel;
- const int mCount;
+ // TODO: Make members const.
+ int mTimestamp;
+ int mLevel;
+ int mCount;
};
} // namespace latinime
#endif /* LATINIME_HISTORICAL_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
index 9e0baa032..dce460099 100644
--- a/native/jni/src/suggest/core/dictionary/property/bigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
@@ -14,22 +14,22 @@
* limitations under the License.
*/
-#ifndef LATINIME_BIGRAM_PROPERTY_H
-#define LATINIME_BIGRAM_PROPERTY_H
+#ifndef LATINIME_NGRAM_PROPERTY_H
+#define LATINIME_NGRAM_PROPERTY_H
#include <vector>
#include "defines.h"
+#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime {
-// TODO: Change to NgramProperty.
-class BigramProperty {
+class NgramProperty {
public:
- BigramProperty(const std::vector<int> &&targetCodePoints, const int probability,
- const int timestamp, const int level, const int count)
+ NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
+ const HistoricalInfo historicalInfo)
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
- mTimestamp(timestamp), mLevel(level), mCount(count) {}
+ mHistoricalInfo(historicalInfo) {}
const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints;
@@ -39,28 +39,18 @@ class BigramProperty {
return mProbability;
}
- int getTimestamp() const {
- return mTimestamp;
- }
-
- int getLevel() const {
- return mLevel;
- }
-
- int getCount() const {
- return mCount;
+ const HistoricalInfo getHistoricalInfo() const {
+ return mHistoricalInfo;
}
private:
// Default copy constructor and assign operator are used for using in std::vector.
- DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty);
+ DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty);
// TODO: Make members const.
std::vector<int> mTargetCodePoints;
int mProbability;
- int mTimestamp;
- int mLevel;
- int mCount;
+ HistoricalInfo mHistoricalInfo;
};
} // namespace latinime
-#endif // LATINIME_WORD_PROPERTY_H
+#endif // LATINIME_NGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
index b7e7d6686..d1f0ab4ca 100644
--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -20,6 +20,7 @@
#include <vector>
#include "defines.h"
+#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime {
@@ -50,15 +51,21 @@ class UnigramProperty {
UnigramProperty()
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
- mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
- mShortcuts() {}
+ mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp, const int level,
- const int count, const std::vector<ShortcutProperty> *const shortcuts)
+ const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo,
+ const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
- mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
+ mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
+
+ // Without shortcuts.
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ mHistoricalInfo(historicalInfo), mShortcuts() {}
bool representsBeginningOfSentence() const {
return mRepresentsBeginningOfSentence;
@@ -85,16 +92,8 @@ class UnigramProperty {
return mProbability;
}
- int getTimestamp() const {
- return mTimestamp;
- }
-
- int getLevel() const {
- return mLevel;
- }
-
- int getCount() const {
- return mCount;
+ const HistoricalInfo getHistoricalInfo() const {
+ return mHistoricalInfo;
}
const std::vector<ShortcutProperty> &getShortcuts() const {
@@ -110,10 +109,7 @@ class UnigramProperty {
bool mIsNotAWord;
bool mIsBlacklisted;
int mProbability;
- // Historical information
- int mTimestamp;
- int mLevel;
- int mCount;
+ HistoricalInfo mHistoricalInfo;
std::vector<ShortcutProperty> mShortcuts;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
index 66daf3e3f..caac8fe79 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
@@ -17,6 +17,7 @@
#include "suggest/core/dictionary/property/word_property.h"
#include "utils/jni_data_utils.h"
+#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime {
@@ -28,11 +29,12 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
false /* needsNullTermination */);
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
- !mBigrams.empty(), mUnigramProperty.hasShortcuts(),
+ !mNgrams.empty(), mUnigramProperty.hasShortcuts(),
mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
- int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
- mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
+ const HistoricalInfo &historicalInfo = mUnigramProperty.getHistoricalInfo();
+ int probabilityInfo[] = {mUnigramProperty.getProbability(), historicalInfo.getTimestamp(),
+ historicalInfo.getLevel(), historicalInfo.getCount()};
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
probabilityInfo);
@@ -42,18 +44,19 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
// Output bigrams.
- for (const auto &bigramProperty : mBigrams) {
- const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints();
+ // TODO: Support n-gram
+ for (const auto &ngramProperty : mNgrams) {
+ const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints();
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */,
word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
env->DeleteLocalRef(bigramWord1CodePointArray);
-
- int bigramProbabilityInfo[] = {bigramProperty.getProbability(),
- bigramProperty.getTimestamp(), bigramProperty.getLevel(),
- bigramProperty.getCount()};
+ const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
+ int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
+ ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
+ ngramHistoricalInfo.getCount()};
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h
index 4e6febb3f..0c23e8225 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "jni.h"
-#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
namespace latinime {
@@ -31,12 +31,12 @@ class WordProperty {
public:
// Default constructor is used to create an instance that indicates an invalid word.
WordProperty()
- : mCodePoints(), mUnigramProperty(), mBigrams() {}
+ : mCodePoints(), mUnigramProperty(), mNgrams() {}
WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
- const std::vector<BigramProperty> *const bigrams)
+ const std::vector<NgramProperty> *const bigrams)
: mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
- mBigrams(*bigrams) {}
+ mNgrams(*bigrams) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
@@ -46,8 +46,8 @@ class WordProperty {
return &mUnigramProperty;
}
- const std::vector<BigramProperty> *getBigramProperties() const {
- return &mBigrams;
+ const std::vector<NgramProperty> *getNgramProperties() const {
+ return &mNgrams;
}
private:
@@ -56,7 +56,7 @@ class WordProperty {
const std::vector<int> mCodePoints;
const UnigramProperty mUnigramProperty;
- const std::vector<BigramProperty> mBigrams;
+ const std::vector<NgramProperty> mNgrams;
};
} // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 1546b2610..6624b7921 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
+#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/dictionary/word_attributes.h"
#include "utils/int_array_view.h"
@@ -81,12 +82,17 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not.
virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty) = 0;
+ const NgramProperty *const ngramProperty) = 0;
// Returns whether the update was success or not.
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints) = 0;
+ // Returns whether the update was success or not.
+ virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView wordCodePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo) = 0;
+
// Returns whether the flush was success or not.
virtual bool flush(const char *const filePath) = 0;
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index 02e82a8e0..553d5ad07 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -33,7 +33,7 @@ class PrevWordsInfo {
clear();
}
- PrevWordsInfo(PrevWordsInfo &&prevWordsInfo)
+ PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
: mPrevWordCount(prevWordsInfo.mPrevWordCount) {
for (size_t i = 0; i < mPrevWordCount; ++i) {
mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
@@ -73,6 +73,16 @@ class PrevWordsInfo {
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
+ size_t getPrevWordCount() const {
+ return mPrevWordCount;
+ }
+
+ // TODO: Remove.
+ const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
+ return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
+ std::min(mPrevWordCount, maxPrevWordCount));
+ }
+
bool isValid() const {
if (mPrevWordCodePointCount[0] > 0) {
return true;
@@ -112,7 +122,7 @@ class PrevWordsInfo {
}
private:
- DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+ DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
index 3e8e059f2..bc0f47f79 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
@@ -24,7 +24,7 @@
#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
-#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
@@ -60,7 +60,7 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
}
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
// 1. The word has no bigrams yet.
// 2. The word has bigrams, and there is the target in the list.
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
@@ -79,7 +79,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
- bigramProperty);
+ ngramProperty);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
@@ -112,7 +112,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &newBigramEntry, bigramProperty);
+ &newBigramEntry, ngramProperty);
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
return false;
}
@@ -138,7 +138,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &updatedBigramEntry, bigramProperty);
+ &updatedBigramEntry, ngramProperty);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
@@ -264,18 +264,17 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry,
- const BigramProperty *const bigramProperty) const {
+ const NgramProperty *const ngramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
- const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
- bigramProperty->getLevel(), bigramProperty->getCount());
+ const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
+ originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
&historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
- return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
+ return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
index 50a4c9743..aac6f5470 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
@@ -36,7 +36,7 @@ namespace v402 {
class BigramDictContent;
} // namespace v402
} // namespace backward
-class BigramProperty;
+class NgramProperty;
namespace backward {
namespace v402 {
} // namespace v402
@@ -64,7 +64,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
}
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId);
@@ -80,7 +80,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
- const BigramProperty *const bigramProperty) const;
+ const NgramProperty *const ngramProperty) const;
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
index e2dd93c5e..9e1adff70 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
@@ -83,10 +83,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
}
if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
- historicalInfo->getTimeStamp());
+ historicalInfo->getTimestamp());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
index 40968b4d8..480095a2f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
@@ -25,8 +25,8 @@
#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
#include "defines.h"
+#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
namespace backward {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
index c671647d4..4a740d47b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
@@ -147,7 +147,7 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit
}
if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
index 8ccfa33dc..4111a49c0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
@@ -25,8 +25,8 @@
#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
#include "defines.h"
+#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
namespace backward {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 97a8bcc98..8d169743c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -232,8 +232,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
}
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
- if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, ngramProperty, outAddedNewEntry)) {
AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d",
prevWordIds[0], wordId);
return false;
@@ -396,8 +396,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const UnigramProperty *const unigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
- const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
- unigramProperty->getLevel(), unigramProperty->getCount());
+ const HistoricalInfo &historicalInfoForUpdate = unigramProperty->getHistoricalInfo();
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index 9d8a55bff..d0bab50f8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -94,7 +94,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index f752f89f1..11f7b305f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -30,7 +30,7 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h"
-#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/session/prev_words_info.h"
@@ -52,6 +52,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@@ -312,7 +313,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
}
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty) {
+ const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
@@ -326,9 +327,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
return false;
}
- if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. "
- "length: %zd", bigramProperty->getTargetCodePoints()->size());
+ "length: %zd", ngramProperty->getTargetCodePoints()->size());
return false;
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
@@ -339,11 +340,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
if (prevWordIds[0] == NOT_A_WORD_ID) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
- const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
- false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
- NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@@ -356,7 +355,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
}
const int wordPos = getTerminalPtNodePosFromWordId(getWordId(
- CodePointArrayView(*bigramProperty->getTargetCodePoints()),
+ CodePointArrayView(*ngramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */));
if (wordPos == NOT_A_DICT_POS) {
return false;
@@ -364,7 +363,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
bool addedNewBigram = false;
const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]);
if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos),
- wordPos, bigramProperty, &addedNewBigram)) {
+ wordPos, ngramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -414,6 +413,29 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
+
+bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView wordCodePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
+ const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+ false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
+ if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
+ AKLOGE("Cannot update unigarm entry in updateCounter().");
+ return false;
+ }
+ const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
+ if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
+ AKLOGE("Cannot update unigarm entry in updateCounter().");
+ return false;
+ }
+ return true;
+}
+
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
@@ -499,7 +521,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
- std::vector<BigramProperty> bigrams;
+ std::vector<NgramProperty> ngrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
if (bigramListPos != NOT_A_DICT_POS) {
int bigramWord1CodePoints[MAX_WORD_LENGTH];
@@ -526,10 +548,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
bigramEntry.getProbability();
- bigrams.emplace_back(
+ ngrams.emplace_back(
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
- probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount());
+ probability, *historicalInfo);
}
}
// Fetch shortcut information.
@@ -552,9 +573,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
- historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount(), &shortcuts);
- return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams);
+ *historicalInfo, std::move(shortcuts));
+ return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 8420c94d0..995d7764f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -59,6 +59,7 @@ namespace backward {
namespace v402 {
// Word id = Position of a PtNode that represents the word.
+// Max supported n-gram is bigram.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
@@ -112,11 +113,15 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty);
+ const NgramProperty *const ngramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints);
+ bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView wordCodePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo);
+
bool flush(const char *const filePath);
bool flushWithGC(const char *const filePath);
@@ -146,6 +151,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
// prevent the dictionary from overflowing.
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy *const mHeaderPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
index 3fb4caa08..2887dc6b1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
@@ -216,7 +216,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
probabilityEntry.getProbability();
priorityQueue.push(DictProbability(terminalPos, probability,
- probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ probabilityEntry.getHistoricalInfo()->getTimestamp()));
}
// Delete unigrams.
@@ -263,7 +263,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
bigramEntry.getProbability();
priorityQueue.push(DictProbability(entryPos, probability,
- bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ bigramEntry.getHistoricalInfo()->getTimestamp()));
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
index 2aa402748..b8a4a92e8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
@@ -76,6 +76,7 @@ class DynamicPtGcEventListeners {
int mValidUnigramCount;
};
+ // TODO: Remove when we stop supporting v402 format.
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class TraversePolicyToUpdateBigramProbability
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index 3b58d7d6d..92fd6f214 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -82,7 +82,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readi
}
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
- const int wordPos, const BigramProperty *const bigramProperty,
+ const int wordPos, const NgramProperty *const ngramProperty,
bool *const outAddedNewEntry) {
if (prevWordsPtNodePos.empty()) {
return false;
@@ -96,7 +96,7 @@ bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPt
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
const int wordId =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
- return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, ngramProperty, outAddedNewEntry);
}
bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index 710047e8c..2bbe2f4dc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -23,7 +23,7 @@
namespace latinime {
-class BigramProperty;
+class NgramProperty;
class BufferWithExtendableBuffer;
class DynamicPtReadingHelper;
class PtNodeReader;
@@ -46,7 +46,7 @@ class DynamicPtUpdatingHelper {
// TODO: Remove after stopping supporting v402.
// Add an n-gram entry.
bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
// TODO: Remove after stopping supporting v402.
// Remove an n-gram entry.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
index 955d779ac..954db9b0a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -25,7 +25,7 @@
namespace latinime {
-class BigramProperty;
+class NgramProperty;
class UnigramProperty;
// Interface class used to write PtNode information.
@@ -72,7 +72,7 @@ class PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) = 0;
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 13cf9a5a8..d3d684bfa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -436,7 +436,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
const PtNodeParams ptNodeParams =
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
// Fetch bigram information.
- std::vector<BigramProperty> bigrams;
+ std::vector<NgramProperty> ngrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
int bigramWord1CodePoints[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
@@ -450,9 +450,9 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH,
bigramWord1CodePoints, &word1Probability);
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
- bigrams.emplace_back(
+ ngrams.emplace_back(
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
- probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
+ probability, HistoricalInfo());
}
}
// Fetch shortcut information.
@@ -477,8 +477,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
- NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
- return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams);
+ HistoricalInfo(), std::move(shortcuts));
+ return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 0d679c5dc..32a95bb6c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -38,6 +38,7 @@ class DicNode;
class DicNodeVector;
// Word id = Position of a PtNode that represents the word.
+// Max supported n-gram is bigram.
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
@@ -93,7 +94,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty) {
+ const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
@@ -106,6 +107,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
+ bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView wordCodePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ return false;
+ }
+
bool flush(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index 139230228..956dabb4f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -296,7 +296,7 @@ bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPoli
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
headerPolicy) : probabilityEntry.getProbability();
outEntryInfo->emplace_back(probability,
- probabilityEntry.getHistoricalInfo()->getTimeStamp(),
+ probabilityEntry.getHistoricalInfo()->getTimestamp(),
entry.key(), targetLevel, prevWordIds->data());
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index e1e10ca17..fa1415633 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -21,10 +21,10 @@
#include <cstdint>
#include "defines.h"
-#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/historical_info.h"
+#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
@@ -53,15 +53,13 @@ class ProbabilityEntry {
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
unigramProperty->isPossiblyOffensive())),
mProbability(unigramProperty->getProbability()),
- mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
- unigramProperty->getCount()) {}
+ mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
- // Create from bigram property.
+ // Create from ngram property.
// TODO: Set flags.
- ProbabilityEntry(const BigramProperty *const bigramProperty)
- : mFlags(0), mProbability(bigramProperty->getProbability()),
- mHistoricalInfo(bigramProperty->getTimestamp(), bigramProperty->getLevel(),
- bigramProperty->getCount()) {}
+ ProbabilityEntry(const NgramProperty *const ngramProperty)
+ : mFlags(0), mProbability(ngramProperty->getProbability()),
+ mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
bool isValid() const {
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
@@ -103,7 +101,7 @@ class ProbabilityEntry {
uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
if (hasHistoricalInfo) {
encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
- ^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp());
+ ^ static_cast<uint64_t>(mHistoricalInfo.getTimestamp());
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index a1a33d27a..f13512d5a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -61,6 +61,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
}
}
+// TODO: Quit using bigramLinkedNodePos.
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos) {
@@ -208,15 +209,16 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
terminalId, &probabilityEntryToWrite);
}
+// TODO: Support counting ngram entries.
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
+ const NgramProperty *const ngramProperty, bool *const outAddedNewBigram) {
LanguageModelDictContent *const languageModelDictContent =
mBuffers->getMutableLanguageModelDictContent();
const ProbabilityEntry probabilityEntry =
languageModelDictContent->getNgramProbabilityEntry(prevWordIds, wordId);
- const ProbabilityEntry probabilityEntryOfBigramProperty(bigramProperty);
+ const ProbabilityEntry probabilityEntryOfNgramProperty(ngramProperty);
const ProbabilityEntry updatedProbabilityEntry = createUpdatedEntryFrom(
- &probabilityEntry, &probabilityEntryOfBigramProperty);
+ &probabilityEntry, &probabilityEntryOfNgramProperty);
if (!languageModelDictContent->setNgramProbabilityEntry(
prevWordIds, wordId, &updatedProbabilityEntry)) {
AKLOGE("Cannot add new ngram entry. prevWordId[0]: %d, prevWordId.size(): %zd, wordId: %d",
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 17915273b..ea4f09904 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -74,7 +74,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
- const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+ const NgramProperty *const ngramProperty, bool *const outAddedNewEntry);
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 0f0696410..41b109f95 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -23,7 +23,7 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h"
-#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/session/prev_words_info.h"
@@ -43,6 +43,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@@ -266,7 +267,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
}
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty) {
+ const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
@@ -280,9 +281,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
return false;
}
- if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. "
- "length: %zd", bigramProperty->getTargetCodePoints()->size());
+ "length: %zd", ngramProperty->getTargetCodePoints()->size());
return false;
}
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
@@ -298,11 +299,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
return false;
}
- const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
- false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
- NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@@ -311,13 +310,13 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
// Refresh word ids.
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
}
- const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()),
+ const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) {
return false;
}
bool addedNewEntry = false;
- if (mNodeWriter.addNgramEntry(prevWordIds, wordId, bigramProperty, &addedNewEntry)) {
+ if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) {
if (addedNewEntry) {
mBigramCount++;
}
@@ -364,6 +363,32 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
+bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView wordCodePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+ return false;
+ }
+ // TODO: Have count up method in language model dict content.
+ const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
+ const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+ false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
+ if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
+ AKLOGE("Cannot update unigarm entry in updateCounter().");
+ return false;
+ }
+ const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
+ for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
+ const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
+ if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
+ AKLOGE("Cannot update ngram entry in updateCounter().");
+ return false;
+ }
+ }
+ return true;
+}
+
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
@@ -451,7 +476,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
// TODO: Support n-gram.
- std::vector<BigramProperty> bigrams;
+ std::vector<NgramProperty> ngrams;
const WordIdArrayView prevWordIds = WordIdArrayView::singleElementView(&wordId);
int bigramWord1CodePoints[MAX_WORD_LENGTH];
for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
@@ -463,9 +488,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
probabilityEntry.getProbability();
- bigrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
- probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount());
+ ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
+ probability, *historicalInfo);
}
// Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
@@ -487,9 +511,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
- probabilityEntry.getProbability(), historicalInfo->getTimeStamp(),
- historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
- return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams);
+ probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts));
+ return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index c9bde2cf5..662bb8d4b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -37,6 +37,7 @@ namespace latinime {
class DicNode;
class DicNodeVector;
+// TODO: Support counting ngram entries.
// Word id = Artificial id that is stored in the PtNode looked up by the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
@@ -92,11 +93,15 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
- const BigramProperty *const bigramProperty);
+ const NgramProperty *const ngramProperty);
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints);
+ bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+ const CodePointArrayView wordCodePoints, const bool isValidWord,
+ const HistoricalInfo historicalInfo);
+
bool flush(const char *const filePath);
bool flushWithGC(const char *const filePath);
@@ -126,6 +131,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
// prevent the dictionary from overflowing.
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+ // TODO: Remove
+ static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy *const mHeaderPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 442abadee..e1ff973de 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -114,14 +114,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false;
}
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
- traversePolicyToUpdateBigramProbability(&ptNodeWriter);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateBigramProbability)) {
- return false;
- }
-
// Mapping from positions in mBuffer to positions in bufferToWrite.
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
index b6278c4cb..3569d0576 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -33,6 +33,7 @@ class Ver4PatriciaTrieWritingHelper {
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {}
+ // TODO: Support counting ngram entries.
bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
const int bigramCount) const;
@@ -70,11 +71,6 @@ class Ver4PatriciaTrieWritingHelper {
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
int *const outBigramCount);
- bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
- Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
-
- bool truncateBigrams(const int maxBigramCount);
-
Ver4DictBuffers *const mBuffers;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index fed0ae77e..af4bc186a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -43,7 +43,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
- const int timestamp = newHistoricalInfo->getTimeStamp();
+ const int timestamp = newHistoricalInfo->getTimestamp();
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
// Add entry as a valid word.
const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
@@ -78,7 +78,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ int ForgettingCurveUtils::decodeProbability(
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
- const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
+ const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(),
headerPolicy->getForgettingCurveDurationToLevelDown());
return sProbabilityTable.getProbability(
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
@@ -102,7 +102,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy) {
return historicalInfo->getLevel() > 0
- || getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
+ || getElapsedTimeStepCount(historicalInfo->getTimestamp(),
headerPolicy->getForgettingCurveDurationToLevelDown())
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
@@ -110,12 +110,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo,
const HeaderPolicy *const headerPolicy) {
- if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo();
}
const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
const int elapsedTimeStep = getElapsedTimeStepCount(
- originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds);
+ originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds);
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info.
return *originalHistoricalInfo;
@@ -124,7 +124,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
- const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() +
+ const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimestamp() +
levelDownAmount * durationToLevelDownInSeconds;
return HistoricalInfo(adjustedTimestampInSeconds,
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 313eb6b64..10abb405a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -20,7 +20,7 @@
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime {