aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h7
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.h8
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp2
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state_utils.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp91
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp)4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp)118
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp44
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h2
42 files changed, 366 insertions, 223 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 761063f8a..a80c97530 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -293,13 +293,6 @@ static inline void prof_out(void) {
#define M_PI_F 3.14159265f
#define MAX_PERCENTILE 100
-// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
-// As such, this is the maximum number of characters will be needed to represent an int as a
-// string, including the terminator; this is used as the size of a string buffer large enough to
-// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
-// of the binary dictionary where a {key,value} string pair scheme is used.
-#define LARGEST_INT_DIGIT_COUNT 11
-
#define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE (-1)
#define NOT_A_COORDINATE (-1)
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index e288413a3..fdc893653 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -88,11 +88,10 @@ void Dictionary::addUnigramWord(const int *const word, const int length,
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
}
-void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) {
+void Dictionary::addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty) {
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
- probability, timestamp);
+ mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index b6149b338..f0a7e5b6a 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -76,8 +76,8 @@ class Dictionary {
void addUnigramWord(const int *const codePoints, const int codePointCount,
const UnigramProperty *const unigramProperty);
- void addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp);
+ void addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty);
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h
index 5519a917c..aa3e0b68a 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.h
@@ -42,6 +42,14 @@ class WordProperty {
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
+ const UnigramProperty *getUnigramProperty() const {
+ return &mUnigramProperty;
+ }
+
+ const std::vector<BigramProperty> *getBigramProperties() const {
+ return &mBigrams;
+ }
+
private:
// Default copy constructor is used for using as a return value.
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index c40a2bdca..4c75a188e 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -226,7 +226,7 @@ int ProximityInfo::getKeyCenterXOfKeyIdG(
// When the referencePointY is NOT_A_COORDINATE, this method calculates the return value without
// using the line segment.
int ProximityInfo::getKeyCenterYOfKeyIdG(
- const int keyId, const int referencePointY, const bool isGeometric) const {
+ const int keyId, const int referencePointY, const bool isGeometric) const {
// TODO: Remove "isGeometric" and have separate "proximity_info"s for gesture and typing.
if (keyId < 0) {
return 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
index 71e83a80c..211a79737 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
@@ -56,7 +56,7 @@ class ProximityInfoStateUtils {
const std::vector<int> *const sampledLengthCache,
const std::vector<int> *const sampledInputIndice,
std::vector<float> *sampledSpeedRates, std::vector<float> *sampledDirections);
- static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
+ static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
const int inputSize, const int *const xCoordinates, const int *const yCoordinates,
const int *times, const int sampledInputSize,
const std::vector<int> *const sampledInputXs,
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index a8dab9fcd..845e629e6 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -49,6 +49,8 @@ class DictionaryHeaderStructurePolicy {
virtual bool shouldBoostExactMatches() const = 0;
+ virtual const std::vector<int> *getLocale() const = 0;
+
protected:
DictionaryHeaderStructurePolicy() {}
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 807f9b8dd..ce5a49f83 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -73,8 +73,8 @@ class DictionaryStructureWithBufferPolicy {
const UnigramProperty *const unigramProperty) = 0;
// Returns whether the update was success or not.
- virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) = 0;
+ virtual bool addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty) = 0;
// Returns whether the update was success or not.
virtual bool removeBigramWords(const int *const word0, const int length0,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 251a71941..da24302c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -139,6 +139,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
switch (mDictFormatVersion) {
case FormatUtils::VERSION_2:
return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
return FormatUtils::VERSION_4;
default:
@@ -238,6 +240,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int unigramCount, const int bigramCount, const int extendedRegionSize,
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
+ AK_FORCE_INLINE const std::vector<int> *getLocale() const {
+ return &mLocale;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index d20accfbc..2a9028a9e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -26,6 +26,13 @@
namespace latinime {
+// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
+// As such, this is the maximum number of characters will be needed to represent an int as a
+// string, including the terminator; this is used as the size of a string buffer large enough to
+// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
+// of the binary dictionary where a {key,value} string pair scheme is used.
+const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
+
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
@@ -91,8 +98,9 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
case FormatUtils::VERSION_2:
// Version 2 dictionary writing is not supported.
return false;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
- return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */,
+ return buffer->writeUintAndAdvancePosition(version /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
@@ -154,8 +162,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value) {
AttributeMap::mapped_type valueVector;
- char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
- snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value);
+ char charBuf[LARGEST_INT_DIGIT_COUNT];
+ snprintf(charBuf, sizeof(charBuf), "%d", value);
insertCharactersIntoVector(charBuf, &valueVector);
(*headerAttributes)[*key] = valueVector;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index a6b4c4e14..9b90488fc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -92,6 +92,7 @@ class HeaderReadWriteUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
+ static const int LARGEST_INT_DIGIT_COUNT;
static const int MAX_ATTRIBUTE_KEY_LENGTH;
static const int MAX_ATTRIBUTE_VALUE_LENGTH;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index be7e43b98..c4d18608c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -52,9 +52,11 @@ namespace latinime {
DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict(
const int formatVersion, const std::vector<int> &locale,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
- switch (formatVersion) {
+ FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
+ switch (dictFormatVersion) {
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4: {
- HeaderPolicy headerPolicy(FormatUtils::VERSION_4, locale, attributeMap);
+ HeaderPolicy headerPolicy(dictFormatVersion, locale, attributeMap);
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
@@ -87,11 +89,13 @@ namespace latinime {
if (!mmappedBuffer) {
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
}
- switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
- mmappedBuffer->getBufferSize())) {
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
+ mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
+ switch (formatVersion) {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
break;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4: {
const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
char dictPath[dictDirPathBufSize];
@@ -102,7 +106,8 @@ namespace latinime {
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
}
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
- Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer)));
+ Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer),
+ formatVersion));
if (!dictBuffers || !dictBuffers->isValid()) {
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
path);
@@ -135,6 +140,7 @@ namespace latinime {
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
AKLOGE("Given path is a file but the format is version 4. path: %s", path);
break;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
index 7d0d09631..08b4e0b5e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
index 15f924a6a..15f924a6a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index e02dd5550..9e575858a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
@@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
bool DynamicPtUpdatingHelper::addUnigramWord(
DynamicPtReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- const bool isNotAWord, const bool isBlacklisted, const int timestamp,
- bool *const outAddedNewUnigram) {
+ const int *const wordCodePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
@@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true;
- return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
- probability, timestamp, wordCodePoints + matchedCodePointCount,
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
+ wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
}
}
// All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
- return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
- timestamp, outAddedNewUnigram);
+ return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
}
if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true;
- return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
- isNotAWord, isBlacklisted, probability, timestamp,
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
}
@@ -83,17 +81,17 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(),
- isNotAWord, isBlacklisted, probability, timestamp, &pos);
+ unigramProperty, &pos);
}
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability, const int timestamp, bool *const outAddedNewBigram) {
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
const PtNodeParams sourcePtNodeParams(
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
const PtNodeParams targetPtNodeParams(
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
- timestamp, outAddedNewBigram);
+ return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
+ bigramProperty, outAddedNewBigram);
}
// Remove a bigram relation from word0Pos to word1Pos.
@@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, int *const forwardLinkFieldPos) {
+ const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
- isNotAWord, isBlacklisted, probability, timestamp);
+ unigramProperty);
}
-bool DynamicPtUpdatingHelper::setPtNodeProbability(
- const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp,
- bool *const outAddedNewUnigram) {
+bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability.
*outAddedNewUnigram = false;
- return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp);
+ return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty);
} else {
// Make the node terminal and write the probability.
*outAddedNewUnigram = true;
const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
- isNotAWord, isBlacklisted, true /* isTerminal */,
- originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
- originalPtNodeParams->getCodePoints(), probability));
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, originalPtNodeParams->getParentPos(),
+ originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
- timestamp, &writingPos)) {
+ unigramProperty, &writingPos)) {
return false;
}
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
@@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(
}
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
- const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp,
+ const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
- codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
+ codePointCount, unigramProperty);
}
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp) {
+ const UnigramProperty *const unigramProperty) {
int writingPos = mBuffer->getTailPosition();
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) {
return false;
}
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- isNotAWord, isBlacklisted, true /* isTerminal */,
- parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
- if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
- &writingPos)) {
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
+ parentPtNodePos, nodeCodePointCount, nodeCodePoints,
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
return false;
}
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
- const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd)
// \_ xy (extra child, terminal)
- // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
+ // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
// Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
@@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
} else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- isNotAWord, isBlacklisted, true /* isTerminal */,
- reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
- reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
+ overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
- timestamp, &writingPos)) {
+ unigramProperty, &writingPos)) {
return false;
}
}
@@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
- isNotAWord, isBlacklisted, true /* isTerminal */,
- firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
- newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, firstPartOfReallocatedPtNodePos,
+ newNodeCodePointCount - overlappingCodePointCount,
+ newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
- timestamp, &writingPos)) {
+ unigramProperty, &writingPos)) {
return false;
}
}
@@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
- const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const bool isTerminal, const int parentPos,
+ const PtNodeParams *const originalPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index 9b2815263..f10d15a9b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -22,10 +22,12 @@
namespace latinime {
+class BigramProperty;
class BufferWithExtendableBuffer;
class DynamicPtReadingHelper;
class PtNodeReader;
class PtNodeWriter;
+class UnigramProperty;
class DynamicPtUpdatingHelper {
public:
@@ -37,13 +39,12 @@ class DynamicPtUpdatingHelper {
// Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- const bool isNotAWord, const bool isBlacklisted, const int timestamp,
- bool *const outAddedNewUnigram);
+ const int *const wordCodePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
- const int timestamp, bool *const outAddedNewBigram);
+ bool addBigramWords(const int word0Pos, const int word1Pos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
@@ -62,25 +63,22 @@ class DynamicPtUpdatingHelper {
PtNodeWriter *const mPtNodeWriter;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
- const int probability, const int timestamp, int *const forwardLinkFieldPos);
+ const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
+ int *const forwardLinkFieldPos);
- bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp,
- bool *const outAddedNewUnigram);
+ bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
- const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, const int *const codePoints, const int codePointCount);
+ const UnigramProperty *const unigramProperty, const int *const codePoints,
+ const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
- const int probability, const int timestamp);
+ const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
bool reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
- const int timestamp, const int *const newNodeCodePoints,
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
index e843f074a..a8029f73f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -24,6 +24,9 @@
namespace latinime {
+class BigramProperty;
+class UnigramProperty;
+
// Interface class used to write PtNode information.
class PtNodeWriter {
public:
@@ -51,8 +54,8 @@ class PtNodeWriter {
virtual bool markPtNodeAsWillBecomeNonTerminal(
const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
- virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int probability, const int timestamp) = 0;
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) = 0;
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
const PtNodeParams *const toBeUpdatedPtNodeParams,
@@ -65,10 +68,10 @@ class PtNodeWriter {
int *const ptNodeWritingPos) = 0;
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
- const int timestamp, int *const ptNodeWritingPos) = 0;
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram) = 0;
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
index 847dcdee5..91c76941c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
@@ -44,7 +44,7 @@ const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
}
/* static */ int ShortcutListReadingUtils::readShortcutTarget(
- const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
index d065bf7fd..d065bf7fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
index a898e2afc..00bb502dc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 85f46603e..54d1e0f6d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -22,9 +22,9 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
@@ -88,8 +88,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) {
+ bool addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
index 6d2b4778c..8e16ccc05 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
index 5df2096a4..7a52fd180 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
@@ -14,10 +14,11 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -49,13 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
}
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
- const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ // 1. The word has no bigrams yet.
+ // 2. The word has bigrams, and there is the target in the list.
+ // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
+ // 4. The word has bigrams. We have to append new bigram entry to the list.
+ // 5. Same as 4, but the list is the last entry of the content file.
if (outAddedNewEntry) {
*outAddedNewEntry = false;
}
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
- // Updating PtNode doesn't have a bigram list.
+ // Case 1. PtNode that doesn't have a bigram list.
// Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
@@ -63,7 +69,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
- newProbability, timestamp);
+ bigramProperty);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
@@ -75,42 +81,55 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
return true;
}
- const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
- if (entryPosToUpdate != NOT_A_DICT_POS) {
- // Overwrite existing entry.
- const BigramEntry originalBigramEntry =
- mBigramDictContent->getBigramEntry(entryPosToUpdate);
- if (!originalBigramEntry.isValid()) {
- // Reuse invalid entry.
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
+ int tailEntryPos = NOT_A_DICT_POS;
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
+ &tailEntryPos);
+ if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5.
+ // Add new entry to the bigram list.
+ if (tailEntryPos == NOT_A_DICT_POS) {
+ // Case 4. Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ // Copy existing bigram list.
+ if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
+ return false;
}
}
- const BigramEntry updatedBigramEntry =
- originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ // Write new entry at the tail position of the bigram content.
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &updatedBigramEntry, newProbability, timestamp);
- return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+ &newBigramEntry, bigramProperty);
+ if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
+ return false;
+ }
+ // Update has next flag of the tail entry.
+ if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
}
- // Add new entry to the bigram list.
- // Create new bigram list.
- if (!mBigramDictContent->createNewBigramList(terminalId)) {
- return false;
+ // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
+ const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
+ // entry is updated.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
}
- // Write new entry at a head position of the bigram list.
- int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &newBigramEntry, newProbability, timestamp);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
- return false;
- }
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- // Append existing entries by copying.
- return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
+ &updatedBigramEntry, bigramProperty);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
@@ -119,7 +138,8 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram list doesn't exist.
return false;
}
- const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
+ nullptr /* outTailEntryPos */);
if (entryPosToUpdate == NOT_A_DICT_POS) {
// Bigram entry doesn't exist.
return false;
@@ -204,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
}
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
- const int bigramListPos) const {
+ const int bigramListPos, int *const outTailEntryPos) const {
+ if (outTailEntryPos) {
+ *outTailEntryPos = NOT_A_DICT_POS;
+ }
bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
@@ -220,23 +243,36 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
+ if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ }
}
return invalidEntryPos;
}
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
- const BigramEntry *const originalBigramEntry, const int newProbability,
- const int timestamp) const {
+ const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
+ bigramProperty->getLevel(), bigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalBigramEntry->getHistoricalInfo(), newProbability, timestamp,
- mHeaderPolicy);
+ originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
+ &historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
- return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
+ return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
}
}
+bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
+ const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
index 5b6c5a173..1613941c4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
@@ -24,6 +24,7 @@
namespace latinime {
class BigramDictContent;
+class BigramProperty;
class HeaderPolicy;
class TerminalPositionLookupTable;
@@ -43,8 +44,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
}
- bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
- const int timestamp, bool *const outAddedNewEntry);
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId);
@@ -56,10 +57,13 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
- int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
+ int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
- const int newProbability, const int timestamp) const;
+ const BigramProperty *const bigramProperty) const;
+
+ bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 279f5b33a..56f19dbae 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -113,13 +113,17 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
return true;
}
-bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
+bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
+ int *const outTailEntryPos) {
int readingPos = bigramListPos;
int writingPos = toPos;
bool hasNext = true;
while (hasNext) {
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
+ if (!hasNext) {
+ *outTailEntryPos = writingPos;
+ }
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index ba2a05209..944e0f9e2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -58,6 +58,11 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
+ bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
+ int writingPos = getContentBuffer()->getTailPosition();
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
int writingPos = entryWritingPos;
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
@@ -71,7 +76,7 @@ class BigramDictContent : public SparseTableDictContent {
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
}
- bool copyBigramList(const int bigramListPos, const int toPos);
+ bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
bool flushToFile(const char *const dictPath) const {
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
@@ -83,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount);
+ bool isContentTailPos(const int pos) const {
+ return pos == getContentBuffer()->getTailPosition();
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
index fe984615c..790273541 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
@@ -19,7 +19,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 95f654498..77ed38b89 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -27,7 +27,8 @@
namespace latinime {
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
- const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer) {
+ const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
if (!headerBuffer) {
ASSERT(false);
AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
@@ -35,7 +36,8 @@ namespace latinime {
}
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
const bool isUpdatable = headerBuffer->isUpdatable();
- return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable));
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
+ formatVersion));
}
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
@@ -113,11 +115,12 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
}
Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
- MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable)
+ MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion)
: mHeaderBuffer(std::move(headerBuffer)),
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
- mHeaderPolicy(mHeaderBuffer->getBuffer(), FormatUtils::VERSION_4),
+ mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
mHeaderPolicy.getSize(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index fc41432f4..df177c14a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -36,7 +36,8 @@ class Ver4DictBuffers {
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
- MmappedBuffer::MmappedBufferPtr headerBuffer);
+ MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion);
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
@@ -120,7 +121,8 @@ class Ver4DictBuffers {
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
Ver4DictBuffers(const char *const dictDirPath,
- const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable);
+ const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion);
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 67420a252..0a435e91c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -95,4 +95,4 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
}
}
-}
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 38ff42fee..f89d3d7a0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -16,13 +16,14 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -75,7 +76,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
- false /* isDeleted */, false /* willBecomeNonTerminal */);
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
@@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
&writingPos);
}
-bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
- const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
- const int timestamp) {
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) {
+ // Update probability and historical information.
+ // TODO: Update other information in the unigram property.
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
@@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
- newProbability, timestamp);
+ unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
@@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
- const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
+ const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
+ int *const ptNodeWritingPos) {
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
ptNodeWritingPos)) {
@@ -213,17 +217,16 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
// Write probability.
ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
- &newProbabilityEntry, ptNodeParams->getProbability(), timestamp);
+ &newProbabilityEntry, unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
&probabilityEntryToWrite);
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
- bool *const outAddedNewBigram) {
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) {
+ targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
@@ -379,18 +382,21 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
}
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
- const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
- const int timestamp) const {
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
+ unigramProperty->getLevel(), unigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp,
- mHeaderPolicy);
+ originalProbabilityEntry->getHistoricalInfo(),
+ unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo);
} else {
- return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(
+ unigramProperty->getProbability());
}
}
@@ -409,4 +415,4 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true;
}
-}
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index b2b0504a1..e90bc44c0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool markPtNodeAsWillBecomeNonTerminal(
const PtNodeParams *const toBeUpdatedPtNodeParams);
- virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int newProbability, const int timestamp);
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty);
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
@@ -73,10 +73,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
- const int timestamp, int *const ptNodeWritingPos);
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
@@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos);
- // Create updated probability entry using given probability and timestamp. In addition to the
+ // Create updated probability entry using given unigram property. In addition to the
// probability, this method updates historical information if needed.
+ // TODO: Update flags belonging to the unigram property.
const ProbabilityEntry createUpdatedEntryFrom(
- const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
- const int timestamp) const;
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const;
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index f8587f53a..8373dc549 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -183,9 +183,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
- unigramProperty->getProbability(), unigramProperty->isNotAWord(),
- unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
- &addedNewUnigram)) {
+ unigramProperty, &addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
}
@@ -215,8 +213,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
}
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1, const int probability,
- const int timestamp) {
+ const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
@@ -226,9 +223,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
mDictBuffer->getTailPosition());
return false;
}
- if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
+ if (length0 > MAX_WORD_LENGTH
+ || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
- "length0: %d, length1: %d", length0, length1);
+ "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size());
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
@@ -236,14 +234,14 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
+ const int word1Pos = getTerminalPtNodePositionOfWord(
+ bigramProperty->getTargetCodePoints()->data(),
+ bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
- &addedNewBigram)) {
+ if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -431,6 +429,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ // TODO: Return code point count like other methods.
+ // Null termination.
+ outCodePoints[0] = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
@@ -447,8 +448,13 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
- outCodePoints, &unigramProbability);
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ if (codePointCount < MAX_WORD_LENGTH) {
+ // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
+ // code points.
+ outCodePoints[codePointCount] = 0;
+ }
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 8f981def5..b78576484 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -21,10 +21,10 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
@@ -93,8 +93,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp);
+ bool addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty);
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 12298d967..f31c50253 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -19,9 +19,9 @@
#include <cstring>
#include <queue>
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 23cbe3aa3..a2e88a46c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -37,13 +37,13 @@ class BufferWithExtendableBuffer {
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
const int maxAdditionalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
// Without original buffer.
BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
: mOriginalBuffer(0), mOriginalBufferSize(0),
- mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 87fa5994c..7bc7b0a48 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -34,9 +34,12 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
const int dictVersion, const std::vector<int> localeAsCodePointVector,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
TimeKeeper::setCurrentTime();
- switch (dictVersion) {
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
+ switch (formatVersion) {
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
- return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap);
+ return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap,
+ formatVersion);
default:
AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
filePath, dictVersion);
@@ -46,8 +49,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
const std::vector<int> localeAsCodePointVector,
- const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
- HeaderPolicy headerPolicy(FormatUtils::VERSION_4, localeAsCodePointVector, attributeMap);
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap);
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE));
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index 54ec651f7..a822989db 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
@@ -46,7 +47,8 @@ class DictFileWritingUtils {
static bool createEmptyV4DictFile(const char *const filePath,
const std::vector<int> localeAsCodePointVector,
- const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
static bool flushBufferToFile(const char *const filePath,
const BufferWithExtendableBuffer *const buffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index c7d3df984..fed0ae77e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -30,7 +30,7 @@ const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3;
-const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
+const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1;
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
@@ -41,25 +41,34 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
// TODO: Revise the logic to decide the initial probability depending on the given probability.
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
- const HistoricalInfo *const originalHistoricalInfo,
- const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy) {
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
+ const int timestamp = newHistoricalInfo->getTimeStamp();
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
- return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
- } else if (!originalHistoricalInfo->isValid()) {
+ // Add entry as a valid word.
+ const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
+ } else if (!originalHistoricalInfo->isValid()
+ || originalHistoricalInfo->getLevel() < newHistoricalInfo->getLevel()
+ || (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel()
+ && originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) {
// Initial information.
- return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
+ const int level = clampToValidLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
} else {
const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
// The count exceeds the max value the level can be incremented.
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
// The level is already max.
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
- originalHistoricalInfo->getCount());
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount());
} else {
// Level up.
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
- 0 /* count */);
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel() + 1, 0 /* count */);
}
} else {
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
@@ -73,8 +82,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
headerPolicy->getForgettingCurveDurationToLevelDown());
return sProbabilityTable.getProbability(
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
- std::min(std::max(historicalInfo->getLevel(), 0), MAX_LEVEL),
- std::min(std::max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
+ clampToValidLevelRange(historicalInfo->getLevel()),
+ clampToValidTimeStepCountRange(elapsedTimeStepCount));
}
/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
@@ -155,6 +164,23 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return elapsedTimeInSeconds / timeStepDurationInSeconds;
}
+/* static */ int ForgettingCurveUtils::clampToVisibleEntryLevelRange(const int level) {
+ return std::min(std::max(level, MIN_VISIBLE_LEVEL), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count,
+ const HeaderPolicy *const headerPolicy) {
+ return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) {
+ return std::min(std::max(level, 0), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidTimeStepCountRange(const int timeStepCount) {
+ return std::min(std::max(timeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT);
+}
+
const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 4;
const int ForgettingCurveUtils::ProbabilityTable::WEAK_PROBABILITY_TABLE_ID = 0;
const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID = 1;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index bb8690939..3ff80aeec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -30,7 +30,7 @@ class ForgettingCurveUtils {
public:
static const HistoricalInfo createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
- const int timestamp, const HeaderPolicy *const headerPolicy);
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy);
static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo,
@@ -93,7 +93,7 @@ class ForgettingCurveUtils {
static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL;
- static const int MIN_VALID_LEVEL;
+ static const int MIN_VISIBLE_LEVEL;
static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
@@ -103,8 +103,11 @@ class ForgettingCurveUtils {
static const ProbabilityTable sProbabilityTable;
static int backoff(const int unigramProbability);
-
static int getElapsedTimeStepCount(const int timestamp, const int durationToLevelDown);
+ static int clampToVisibleEntryLevelRange(const int level);
+ static int clampToValidLevelRange(const int level);
+ static int clampToValidCountRange(const int count, const HeaderPolicy *const headerPolicy);
+ static int clampToValidTimeStepCountRange(const int timeStepCount);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index cd3c403fa..a8518cdca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -25,6 +25,18 @@ const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
+ switch (formatVersion) {
+ case VERSION_2:
+ return VERSION_2;
+ case VERSION_4_ONLY_FOR_TESTING:
+ return VERSION_4_ONLY_FOR_TESTING;
+ case VERSION_4:
+ return VERSION_4;
+ default:
+ return UNKNOWN_VERSION;
+ }
+}
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
const uint8_t *const dict, const int dictSize) {
// The magic number is stored big-endian.
@@ -46,6 +58,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// same so we use them for both here.
if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
+ return VERSION_4_ONLY_FOR_TESTING;
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
return VERSION_4;
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 759b1c9b2..20dfb9d8c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,6 +31,7 @@ class FormatUtils {
enum FORMAT_VERSION {
// These MUST have the same values as the relevant constants in FormatSpec.java.
VERSION_2 = 2,
+ VERSION_4_ONLY_FOR_TESTING = 399,
VERSION_4 = 401,
UNKNOWN_VERSION = -1
};
@@ -39,6 +40,7 @@ class FormatUtils {
// unsupported or obsolete dictionary formats.
static const uint32_t MAGIC_NUMBER;
+ static FORMAT_VERSION getFormatVersion(const int formatVersion);
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private: