aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp2
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h2
-rw-r--r--native/jni/src/suggest/core/suggest.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h97
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h2
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h10
12 files changed, 161 insertions, 18 deletions
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index a06e7d070..450203d98 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -119,7 +119,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
return weighting->getSubstitutionCost()
+ weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
case CT_NEW_WORD_SPACE_OMISSION:
- return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG);
+ return weighting->getSpaceOmissionCost(traverseSession, dicNode, inputStateG);
case CT_MATCH:
return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
case CT_COMPLETION:
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index bd6b3cf41..863c4eabe 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -57,7 +57,7 @@ class Weighting {
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
- virtual float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ virtual float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *const inputStateG) const = 0;
virtual float getNewWordBigramLanguageCost(
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index c71526293..68a36454e 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -160,8 +160,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
// TODO: Remove. Do not prune node here.
const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
// Process for handling space substitution (e.g., hevis => he is)
- if (allowsErrorCorrections
- && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
+ if (TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index 509bd683b..05a3a6356 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -24,9 +24,11 @@
namespace latinime {
const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
+const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0;
+const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1;
bool LanguageModelDictContent::save(FILE *const file) const {
- return mTrieMap.save(file);
+ return mTrieMap.save(file) && mGlobalCounters.save(file);
}
bool LanguageModelDictContent::runGC(
@@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) {
return false;
}
+ mGlobalCounters.incrementTotalCount();
+ mGlobalCounters.updateMaxValueOfCounters(
+ updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
for (size_t i = 0; i < prevWordIds.size(); ++i) {
if (prevWordIds[i] == NOT_A_WORD_ID) {
break;
@@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
return false;
}
+ mGlobalCounters.updateMaxValueOfCounters(
+ updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
if (!originalNgramProbabilityEntry.isValid()) {
entryCountersToUpdate->incrementNgramCount(i + 2);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 1cccf92d2..5b92b96e3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/dictionary/word_attributes.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -131,15 +132,17 @@ class LanguageModelDictContent {
const ProbabilityEntry mProbabilityEntry;
};
- LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
+ LanguageModelDictContent(const ReadWriteByteArrayView *const buffers,
const bool hasHistoricalInfo)
- : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
+ : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]),
+ mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
explicit LanguageModelDictContent(const bool hasHistoricalInfo)
- : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
+ : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {}
bool isNearSizeLimit() const {
- return mTrieMap.isNearSizeLimit();
+ return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters();
}
bool save(FILE *const file) const;
@@ -218,8 +221,11 @@ class LanguageModelDictContent {
// TODO: Remove
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
+ static const int TRIE_MAP_BUFFER_INDEX;
+ static const int GLOBAL_COUNTERS_BUFFER_INDEX;
TrieMap mTrieMap;
+ LanguageModelDictContentGlobalCounters mGlobalCounters;
const bool mHasHistoricalInfo;
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp
new file mode 100644
index 000000000..9518ab419
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD =
+ (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64;
+const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30;
+const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4;
+const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0;
+const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h
new file mode 100644
index 000000000..9953aa425
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
+#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
+
+#include <cstdio>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/byte_array_view.h"
+
+namespace latinime {
+
+class LanguageModelDictContentGlobalCounters {
+ public:
+ explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer)
+ : mBuffer(buffer, 0 /* maxAdditionalBufferSize */),
+ mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)),
+ mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {}
+
+ LanguageModelDictContentGlobalCounters()
+ : mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {}
+
+ bool needsToHalveCounters() const {
+ return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD
+ || mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
+ }
+
+ int getTotalCount() const {
+ return mTotalCount;
+ }
+
+ bool save(FILE *const file) const {
+ BufferWithExtendableBuffer bufferToWrite(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES,
+ TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) {
+ return false;
+ }
+ if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES,
+ MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) {
+ return false;
+ }
+ return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite);
+ }
+
+ void incrementTotalCount() {
+ mTotalCount += 1;
+ }
+
+ void updateMaxValueOfCounters(const int count) {
+ mMaxValueOfCounters = std::max(count, mMaxValueOfCounters);
+ }
+
+ void halveCounters() {
+ mMaxValueOfCounters /= 2;
+ mTotalCount /= 2;
+ }
+
+private:
+ DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters);
+
+ const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD;
+ const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
+ const static int COUNTER_SIZE_IN_BYTES;
+ const static int TOTAL_COUNT_INDEX;
+ const static int MAX_VALUE_OF_COUNTERS_INDEX;
+
+ BufferWithExtendableBuffer mBuffer;
+ int mTotalCount;
+ int mMaxValueOfCounters;
+
+ static int readValue(const BufferWithExtendableBuffer &buffer, const int index) {
+ const int pos = COUNTER_SIZE_IN_BYTES * index;
+ if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) {
+ return 0;
+ }
+ return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos);
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 45f88e9b2..4d088dcab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -179,7 +179,7 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
- mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
+ mLanguageModelDictContent(&contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mIsUpdatable(mDictBuffer->isUpdatable()) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 8e6cb974b..eb6080a24 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -67,6 +67,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
-const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index 6a2db687d..a6f9a8b23 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -48,17 +48,17 @@ const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
-const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.334f;
+const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.33f;
+const float ScoringParams::SPACE_OMISSION_COST = 0.1f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.37972f;
const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
-const float ScoringParams::COST_NEW_WORD = 0.0314f;
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
const float ScoringParams::COST_COMPLETION = 0.00624f;
const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
-const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.3482f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index 731424f3d..b8f889559 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -56,9 +56,9 @@ class ScoringParams {
static const float INSERTION_COST_FIRST_CHAR;
static const float TRANSPOSITION_COST;
static const float SPACE_SUBSTITUTION_COST;
+ static const float SPACE_OMISSION_COST;
static const float ADDITIONAL_PROXIMITY_COST;
static const float SUBSTITUTION_COST;
- static const float COST_NEW_WORD;
static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
static const float DISTANCE_WEIGHT_LANGUAGE;
static const float COST_FIRST_COMPLETION;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 84077174d..1338ac81a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -150,9 +150,10 @@ class TypingWeighting : public Weighting {
return cost + weightedDistance;
}
- float getNewWordSpatialCost(const DicTraverseSession *const traverseSession,
+ float getSpaceOmissionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
- return ScoringParams::COST_NEW_WORD * traverseSession->getMultiWordCostMultiplier();
+ const float cost = ScoringParams::SPACE_OMISSION_COST;
+ return cost * traverseSession->getMultiWordCostMultiplier();
}
float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession,
@@ -202,7 +203,10 @@ class TypingWeighting : public Weighting {
AK_FORCE_INLINE float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const {
- const float cost = ScoringParams::SPACE_SUBSTITUTION_COST + ScoringParams::COST_NEW_WORD;
+ const int inputIndex = dicNode->getInputIndex(0);
+ const float distanceToSpaceKey = traverseSession->getProximityInfoState(0)
+ ->getPointToKeyLength(inputIndex, KEYCODE_SPACE);
+ const float cost = ScoringParams::SPACE_SUBSTITUTION_COST * distanceToSpaceKey;
return cost * traverseSession->getMultiWordCostMultiplier();
}