aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp1
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp42
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp148
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h70
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp48
16 files changed, 239 insertions, 164 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 38159b0f3..8f21c50ec 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -67,7 +67,6 @@ static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclas
valueChars[valueUtf8Length] = '\0';
HeaderReadWriteUtils::AttributeMap::mapped_type value;
HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
-
attributeMap[key] = value;
}
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
index 461d7b454..9ccef020f 100644
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
@@ -44,7 +44,7 @@ class ShortcutUtils {
shortcutScore = finalScore;
// Protection against int underflow
shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
- kind = Dictionary::KIND_CORRECTION;
+ kind = Dictionary::KIND_SHORTCUT;
}
outputTypes[outputWordIndex] = kind;
frequencies[outputWordIndex] = shortcutScore;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index a6829b476..5492c6070 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -37,6 +37,8 @@ class DictionaryHeaderStructurePolicy {
virtual float getMultiWordCostMultiplier() const = 0;
+ virtual int getLastDecayedTime() const = 0;
+
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 9c3dc393f..b1170e251 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
}
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
- if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) {
+ if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
// This bigram is too weak to output.
*outBigramPos = NOT_A_DICT_POS;
} else {
@@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
bigramFlags);
const int probabilityToWrite = mIsDecayingDict ?
- ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
- originalProbability, probability) : probability;
+ ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ probability) : probability;
const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
probabilityToWrite);
@@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con
int *const writingPos) {
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
const int probabilityToWrite = mIsDecayingDict ?
- ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
+ ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
probability;
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
probabilityToWrite, false /* hasNext */, writingPos);
@@ -360,14 +360,14 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
}
bool DynamicBigramListPolicy::updateProbabilityForDecay(
- BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
+ const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
int *const bigramEntryPos, bool *const outRemoved) const {
*outRemoved = false;
if (mIsDecayingDict) {
// Update bigram probability for decaying.
- const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
- if (ForgettingCurveUtils::isValidBigram(newProbability)) {
+ const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
+ if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
// Write new probability.
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
BigramListReadWriteUtils::setProbabilityInFlags(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index b358b4ed5..0504b59d5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -27,6 +27,7 @@
namespace latinime {
class BufferWithExtendableBuffer;
+class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
@@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy;
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
+ DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const buffer,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const bool isDecayingDict)
- : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}
+ : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
+ mIsDecayingDict(isDecayingDict) {}
~DynamicBigramListPolicy() {}
@@ -74,6 +77,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const bool mIsDecayingDict;
@@ -81,7 +85,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
- bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
+ bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 26ea6ab68..5724c5d88 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -29,15 +30,16 @@ bool DynamicPatriciaTrieGcEventListeners
bool isUselessPtNode = !node->isTerminal();
if (node->isTerminal() && mIsDecayingDict) {
const int newProbability =
- ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability());
+ ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
+ mHeaderPolicy);
int writingPos = node->getProbabilityFieldPos();
// Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
mBuffer, newProbability, &writingPos)) {
return false;
}
- if (!ForgettingCurveUtils::isValidUnigram(newProbability)) {
- isUselessPtNode = false;
+ if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
+ isUselessPtNode = true;
}
}
if (mChildrenValue > 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 463715af5..9755120b0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -29,6 +29,8 @@
namespace latinime {
+class DictionaryHeaderStructurePolicy;
+
class DynamicPatriciaTrieGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
@@ -38,10 +40,12 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
- : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict),
- mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {}
+ : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
+ mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
+ mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -56,6 +60,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onDescend(const int ptNodeArrayPos) {
mValueStack.push_back(0);
+ mChildrenValue = 0;
return true;
}
@@ -72,9 +77,10 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
- const int mIsDecayingDict;
+ const bool mIsDecayingDict;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
@@ -85,7 +91,8 @@ class DynamicPatriciaTrieGcEventListeners {
class TraversePolicyToUpdateBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
+ TraversePolicyToUpdateBigramProbability(
+ DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 60d0db0c0..a8ea69f3c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -37,12 +37,13 @@ namespace latinime {
// BinaryDictionaryDecayingTests.
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@@ -314,15 +315,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
- const bool runGCwithDecay = needsToDecay();
- DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer,
- &mShortcutListPolicy, runGCwithDecay);
+ const bool needsToDecay = mHeaderPolicy.isDecayingDict()
+ && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
+ DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
+ &mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay);
+ &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
- if (runGCwithDecay) {
- mNeedsToDecayForTesting = false;
- }
+ mNeedsToDecayForTesting = false;
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -344,16 +345,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy.isDecayingDict()) {
- if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
- // Unigram count exceeds the limit.
- return true;
- } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
- // Bigram count exceeds the limit.
- return true;
- } else if (mindsBlockByGC && needsToDecay()) {
- // Time to update probabilities for decaying.
- return true;
- }
+ return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
}
return false;
}
@@ -364,14 +357,17 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
+ static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
+ static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
mNeedsToDecayForTesting = true;
}
}
-bool DynamicPatriciaTriePolicy::needsToDecay() const {
- return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
- || mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index c3bbe9977..be97ee1a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -37,7 +37,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
- mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
+ mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
@@ -102,10 +102,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const UNIGRAM_COUNT_QUERY;
static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
- static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
@@ -115,8 +116,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mUnigramCount;
int mBigramCount;
int mNeedsToDecayForTesting;
-
- bool needsToDecay() const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index 601ee663b..f108c219f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -93,6 +93,12 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
return false;
}
+ if (isEnd()) {
+ // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ }
pushReadingStateToStack();
while (!isEnd()) {
if (alreadyVisitedAllPtNodesInArray) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index 512a4d818..a71c06971 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -279,7 +279,9 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
- fetchPtNodeInfo();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
}
}
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 9dc2abe20..052558bfc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -165,7 +165,10 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
MAX_DICTIONARY_SIZE);
int unigramCount = 0;
int bigramCount = 0;
- if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) {
+ if (mNeedsToDecay) {
+ ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
+ }
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return;
}
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
@@ -237,7 +240,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
int parentOffsetFieldPos = nodeReader->getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) {
+ mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
+ &parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
@@ -481,14 +485,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
- int *const outBigramCount) {
+ const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- this, mBuffer, mNeedsToDecay);
+ headerPolicy, this, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
@@ -505,7 +509,6 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) {
return false;
}
-
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
@@ -524,7 +527,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
+ DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
@@ -545,7 +548,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) {
if (mNeedsToDecay) {
- return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability,
+ return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index 0caf29120..ca8664729 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -128,8 +128,9 @@ class DynamicPatriciaTrieWritingHelper {
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
- bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
- int *const outUnigramCount, int *const outBigramCount);
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
int getUpdatedProbability(const int originalProbability, const int newProbability);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index c525a8ab5..1632fd072 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,8 +14,13 @@
* limitations under the License.
*/
+#include <cmath>
+#include <ctime>
+#include <stdlib.h>
+
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
@@ -26,106 +31,125 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
-const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120;
-const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
-const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8;
-const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
-const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
-const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
+const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
+const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
+const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
+// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
+// duration of the decay is approximately 66hours.
+const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
+
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
+
+void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
+ mCurrentTime = time(0);
+}
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbabilityDelta) {
+ const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
- } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
- const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
- encodedUnigramProbability));
- return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
+ } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
+ return backoff(decodeProbability(encodedUnigramProbability));
} else {
- const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
- decodeUnigramProbability(encodedUnigramProbability),
- decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
- return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
+ const int unigramProbability = decodeProbability(encodedUnigramProbability);
+ const int bigramProbability = decodeProbability(encodedBigramProbability);
+ return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
-/* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability(
+// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
+// (i.e. unigram probability + bigram probability delta).
+/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
const int originalEncodedProbability, const int newProbability) {
if (originalEncodedProbability == NOT_A_PROBABILITY) {
- // The unigram is not in this dictionary.
+ // The bigram relation is not in this dictionary.
if (newProbability == NOT_A_PROBABILITY) {
- // The unigram is not in other dictionaries.
+ // The bigram target is not in other dictionaries.
return 0;
} else {
- return MIN_VALID_UNIGRAM_PROBABILITY;
+ return MIN_VALID_ENCODED_PROBABILITY;
}
} else {
if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
- return MIN_VALID_UNIGRAM_PROBABILITY;
+ && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
+ return MIN_VALID_ENCODED_PROBABILITY;
}
- return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
+ return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
}
}
-/* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) {
- return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
-}
-
-/* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
- const int encodedProbabilityDelta) {
- return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
+/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
+ return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
-/* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
- const int originalEncodedProbabilityDelta, const int newProbability) {
- if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
- // The bigram relation is not in this dictionary.
- if (newProbability == NOT_A_PROBABILITY) {
- // The bigram target is not in other dictionaries.
- return 0;
- } else {
- return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
+ const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
+ int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
+ // TODO: Implement the decay in more proper way.
+ for (int i = 0; i < decayIterationCount; ++i) {
+ const float currentRate = static_cast<float>(currentEncodedProbability)
+ / static_cast<float>(MAX_ENCODED_PROBABILITY);
+ const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
+ const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+ if (thresholdToDecay < randValue) {
+ currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
+ 0);
}
- } else {
- if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
- return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- }
- return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
- MAX_BIGRAM_PROBABILITY_DELTA);
}
+ return currentEncodedProbability;
}
-/* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) {
- return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
-}
-
-/* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
- return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
+ const int unigramCount, const int bigramCount,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
+ // Unigram count exceeds the limit.
+ return true;
+ } else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
+ // Bigram count exceeds the limit.
+ return true;
+ }
+ if (mindsBlockByDecay) {
+ return false;
+ }
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
+ // Time to decay.
+ return true;
+ }
+ return false;
}
-/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
- const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
- if (probability < 0) {
+/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
+ if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return NOT_A_PROBABILITY;
} else {
- return min(probability, MAX_UNIGRAM_PROBABILITY);
+ return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
}
}
-/* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta(
- const int encodedProbabilityDelta) {
- const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- if (probabilityDelta < 0) {
+// See comments in ProbabilityUtils::backoff().
+/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
+ if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else {
- return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
+ return max(unigramProbability - 8, 0);
}
}
-/* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) {
- return rawProbability;
+ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
+ // Table entry is as follows:
+ // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
+ // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
+ mTable.resize(MAX_ENCODED_PROBABILITY + 1);
+ for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
+ const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
+ static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
+ mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+ }
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 11ffb2e2f..2ad423874 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -17,54 +17,84 @@
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
#define LATINIME_FORGETTING_CURVE_UTILS_H
+#include <vector>
+
#include "defines.h"
namespace latinime {
+class DictionaryHeaderStructurePolicy;
+
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
-// TODO: Quit using bigram probability delta.
class ForgettingCurveUtils {
public:
+ class TimeKeeper {
+ public:
+ TimeKeeper() : mCurrentTime(0) {}
+ void setCurrentTime();
+ int peekCurrentTime() const { return mCurrentTime; };
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
+
+ int mCurrentTime;
+ };
+
static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
- static int getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbabilityDelta);
+ static TimeKeeper sTimeKeeper;
- static int getUpdatedUnigramProbability(const int originalEncodedProbability,
- const int newProbability);
+ static int getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbability);
- static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
+ static int getUpdatedEncodedProbability(const int originalEncodedProbability,
const int newProbability);
- static int isValidUnigram(const int encodedUnigramProbability);
-
- static int isValidBigram(const int encodedProbabilityDelta);
+ static int isValidEncodedProbability(const int encodedProbability);
- static int getUnigramProbabilityToSave(const int encodedProbability);
+ static int getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
- static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
+ static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
+ const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
+ class ProbabilityTable {
+ public:
+ ProbabilityTable();
+
+ int getProbability(const int encodedProbability) const {
+ if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
+ return NOT_A_PROBABILITY;
+ }
+ return mTable[encodedProbability];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
+
+ std::vector<int> mTable;
+ };
+
static const int MAX_COMPUTED_PROBABILITY;
- static const int MAX_UNIGRAM_PROBABILITY;
- static const int MIN_VALID_UNIGRAM_PROBABILITY;
- static const int UNIGRAM_PROBABILITY_STEP;
- static const int MAX_BIGRAM_PROBABILITY_DELTA;
- static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- static const int BIGRAM_PROBABILITY_DELTA_STEP;
+ static const int MAX_ENCODED_PROBABILITY;
+ static const int MIN_VALID_ENCODED_PROBABILITY;
+ static const int ENCODED_PROBABILITY_STEP;
+ static const float MIN_PROBABILITY_TO_DECAY;
+ static const int DECAY_INTERVAL_SECONDS;
- static int decodeUnigramProbability(const int encodedProbability);
+ static const ProbabilityTable sProbabilityTable;
- static int decodeBigramProbabilityDelta(const int encodedProbability);
+ static int decodeProbability(const int encodedProbability);
- static int getDecayedProbability(const int rawProbability);
+ static int backoff(const int unigramProbability);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index ecceb60d3..104eb2a7a 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -27,30 +27,30 @@ const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
-const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f;
-const float ScoringParams::PROXIMITY_COST = 0.095f;
-const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.102f;
-const float ScoringParams::FIRST_PROXIMITY_COST = 0.019f;
-const float ScoringParams::OMISSION_COST = 0.458f;
-const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.491f;
-const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.582f;
-const float ScoringParams::INSERTION_COST = 0.730f;
-const float ScoringParams::TERMINAL_INSERTION_COST = 0.93f;
-const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.586f;
-const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.70f;
-const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.623f;
-const float ScoringParams::TRANSPOSITION_COST = 0.526f;
-const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f;
-const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
-const float ScoringParams::SUBSTITUTION_COST = 0.383f;
-const float ScoringParams::COST_NEW_WORD = 0.042f;
-const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f;
-const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
-const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f;
-const float ScoringParams::COST_LOOKAHEAD = 0.073f;
-const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.093f;
-const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.041f;
-const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.447f;
+const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
+const float ScoringParams::PROXIMITY_COST = 0.0694f;
+const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f;
+const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f;
+const float ScoringParams::OMISSION_COST = 0.4676f;
+const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.399f;
+const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f;
+const float ScoringParams::INSERTION_COST = 0.7248f;
+const float ScoringParams::TERMINAL_INSERTION_COST = 0.8128f;
+const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
+const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
+const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
+const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
+const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.339f;
+const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.4576f;
+const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
+const float ScoringParams::COST_NEW_WORD = 0.0312f;
+const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
+const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
+const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.4836f;
+const float ScoringParams::COST_LOOKAHEAD = 0.00624f;
+const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.06836f;
+const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f;