diff options
Diffstat (limited to 'native/jni/src')
7 files changed, 19 insertions, 10 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp index 8f42df6d2..028e9ecbf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp @@ -29,10 +29,10 @@ bool DynamicPtGcEventListeners // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. bool isUselessPtNode = !ptNodeParams->isTerminal(); - if (ptNodeParams->isTerminal()) { + if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) { bool needsToKeepPtNode = true; - if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams, - &needsToKeepPtNode)) { + if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( + ptNodeParams, &needsToKeepPtNode)) { AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC."); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index bef401f87..5704c2e90 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -160,7 +160,8 @@ class PtNodeParams { } AK_FORCE_INLINE bool representsNonWordInfo() const { - return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]); + return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]) + && isNotAWord(); } // Parent node position diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index f31c50253..e868ddf6f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -213,13 +213,16 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( // Delete unigrams. while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) { const int ptNodePos = priorityQueue.top().getDictPos(); + priorityQueue.pop(); const PtNodeParams ptNodeParams = ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (ptNodeParams.representsNonWordInfo()) { + continue; + } if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) { AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos); return false; } - priorityQueue.pop(); } return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp index d3e0c237f..4a126ff85 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp @@ -33,7 +33,7 @@ namespace latinime { const int mmapFd = open(path, O_RDONLY); if (mmapFd < 0) { AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno); - return MmappedBufferPtr(nullptr); + return nullptr; } const int pagesize = sysconf(_SC_PAGESIZE); const int offset = bufferOffset % pagesize; @@ -45,13 +45,13 @@ namespace latinime { if (mmappedBuffer == MAP_FAILED) { AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); close(mmapFd); - return MmappedBufferPtr(nullptr); + return nullptr; } uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset; if (!buffer) { AKLOGE("DICT: buffer is null"); close(mmapFd); - return MmappedBufferPtr(nullptr); + return nullptr; } return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd, isUpdatable)); @@ -61,7 +61,7 @@ namespace latinime { const char *const path, const bool isUpdatable) { const int fileSize = FileUtils::getFileSize(path); if (fileSize == -1) { - return MmappedBufferPtr(nullptr); + return nullptr; } else if (fileSize == 0) { return MmappedBufferPtr(new MmappedBuffer(isUpdatable)); } else { @@ -76,7 +76,7 @@ namespace latinime { const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath, fileName); if (filePathLength >= filePathBufferSize) { - return MmappedBufferPtr(nullptr); + return nullptr; } return openBuffer(filePath, isUpdatable); } diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index fa9600c74..3fc566e7a 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -37,6 +37,7 @@ const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f; const float ScoringParams::PROXIMITY_COST = 0.0694f; const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f; const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f; +const float ScoringParams::INTENTIONAL_OMISSION_COST = 0.1f; const float ScoringParams::OMISSION_COST = 0.467f; const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.345f; const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f; diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h index b66962019..b12de6d87 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -44,6 +44,7 @@ class ScoringParams { static const float PROXIMITY_COST; static const float FIRST_CHAR_PROXIMITY_COST; static const float FIRST_PROXIMITY_COST; + static const float INTENTIONAL_OMISSION_COST; static const float OMISSION_COST; static const float OMISSION_COST_SAME_CHAR; static const float OMISSION_COST_FIRST_CHAR; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 0ba439b47..84077174d 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -54,12 +54,15 @@ class TypingWeighting : public Weighting { float getOmissionCost(const DicNode *const parentDicNode, const DicNode *const dicNode) const { const bool isZeroCostOmission = parentDicNode->isZeroCostOmission(); + const bool isIntentionalOmission = parentDicNode->canBeIntentionalOmission(); const bool sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode); // If the traversal omitted the first letter then the dicNode should now be on the second. const bool isFirstLetterOmission = dicNode->getNodeCodePointCount() == 2; float cost = 0.0f; if (isZeroCostOmission) { cost = 0.0f; + } else if (isIntentionalOmission) { + cost = ScoringParams::INTENTIONAL_OMISSION_COST; } else if (isFirstLetterOmission) { cost = ScoringParams::OMISSION_COST_FIRST_CHAR; } else { |