aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp19
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp41
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp71
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h11
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp29
-rw-r--r--native/jni/src/utils/int_array_view.h5
22 files changed, 240 insertions, 69 deletions
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
index b6bf7a98c..1e2494e92 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -19,17 +19,18 @@
namespace latinime {
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_CASE = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x80;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
- NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+ NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH;
const ErrorTypeUtils::ErrorType
ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index e3e76b238..fd1d5fcff 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -30,8 +30,9 @@ class ErrorTypeUtils {
typedef uint32_t ErrorType;
static const ErrorType NOT_AN_ERROR;
- static const ErrorType MATCH_WITH_CASE_ERROR;
- static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_WRONG_CASE;
+ static const ErrorType MATCH_WITH_MISSING_ACCENT;
+ static const ErrorType MATCH_WITH_WRONG_ACCENT;
static const ErrorType MATCH_WITH_DIGRAPH;
// Treat error as an intentional omission when the CorrectionType is omission and the node can
// be intentional omission.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 278f2b199..97a8bcc98 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -234,8 +234,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
- AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
- sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d",
+ prevWordIds[0], wordId);
return false;
}
const int ptNodePos =
@@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true;
}
+bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
+ if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ // Require historical info to suppress unigram entry.
+ return false;
+ }
+ const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
+ const ProbabilityEntry probabilityEntryToWrite =
+ ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
+}
+
} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index d49d9a666..9d8a55bff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
+ // Suppress unigram not to use the word for generating suggestions. So, this method can be used
+ // only for dictionaries with historical info. Also, suppressed entries are included in unigram
+ // count. They will be removed from the dictionary during GC.
+ bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);
+
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 1296b8acd..9c6452e40 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -210,7 +210,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
- AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd",
shortcut.getTargetCodePoints()->size());
return false;
}
@@ -245,7 +245,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
- AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
@@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
}
+bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
+}
+
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) {
@@ -275,7 +289,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. "
- "length: %d", bigramProperty->getTargetCodePoints()->size());
+ "length: %zd", bigramProperty->getTargetCodePoints()->size());
return false;
}
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 9e989b268..d77499636 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
- bool removeUnigramEntry(const int *const word, const int length) {
- // Removing unigram entry is not supported.
- return false;
- }
+ bool removeUnigramEntry(const int *const word, const int length);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index e4ea3da16..9fa93efc9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -111,8 +111,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
return nullptr;
}
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
- mmappedBuffer->getReadOnlyByteArrayView().data(),
- mmappedBuffer->getReadOnlyByteArrayView().size());
+ mmappedBuffer->getReadOnlyByteArrayView());
switch (formatVersion) {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
@@ -174,8 +173,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
if (!mmappedBuffer) {
return nullptr;
}
- switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView().data(),
- mmappedBuffer->getReadOnlyByteArrayView().size())) {
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index 5dc91ba10..f3bc4a0cb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -46,7 +46,7 @@ ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int terminalId, const ProbabilityEntry *const probabilityEntry) {
- const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ const int bitmapEntryIndex = createAndGetBitmapEntryIndex(prevWordIds);
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return false;
}
@@ -80,6 +80,19 @@ bool LanguageModelDictContent::runGCInner(
return true;
}
+int LanguageModelDictContent::createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds) {
+ if (prevWordIds.empty()) {
+ return mTrieMap.getRootBitmapEntryIndex();
+ }
+ const int lastBitmapEntryIndex =
+ getBitmapEntryIndex(prevWordIds.limit(prevWordIds.size() - 1));
+ if (lastBitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return TrieMap::INVALID_INDEX;
+ }
+ return mTrieMap.getNextLevelBitmapEntryIndex(prevWordIds[prevWordIds.size() - 1],
+ lastBitmapEntryIndex);
+}
+
int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
for (const int wordId : prevWordIds) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 18f2e0170..104ee2520 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -76,7 +76,7 @@ class LanguageModelDictContent {
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
int *const outNgramCount);
-
+ int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds);
int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index feff6b57f..ed77bd20e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -21,6 +21,8 @@
#include <cstdint>
#include "defines.h"
+#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
@@ -45,6 +47,20 @@ class ProbabilityEntry {
const HistoricalInfo *const historicalInfo)
: mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+ // Create from unigram property.
+ // TODO: Set flags.
+ ProbabilityEntry(const UnigramProperty *const unigramProperty)
+ : mFlags(0), mProbability(unigramProperty->getProbability()),
+ mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
+ unigramProperty->getCount()) {}
+
+ // Create from bigram property.
+ // TODO: Set flags.
+ ProbabilityEntry(const BigramProperty *const bigramProperty)
+ : mFlags(0), mProbability(bigramProperty->getProbability()),
+ mHistoricalInfo(bigramProperty->getTimestamp(), bigramProperty->getLevel(),
+ bigramProperty->getCount()) {}
+
const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
}
@@ -54,6 +70,10 @@ class ProbabilityEntry {
return ProbabilityEntry(mFlags, mProbability, historicalInfo);
}
+ bool isValid() const {
+ return (mProbability != NOT_A_PROBABILITY) || hasHistoricalInfo();
+ }
+
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
@@ -89,7 +109,7 @@ class ProbabilityEntry {
static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
if (hasHistoricalInfo) {
const int flags = readFromEncodedEntry(encodedEntry,
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
@@ -106,7 +126,7 @@ class ProbabilityEntry {
return ProbabilityEntry(flags, NOT_A_PROBABILITY, &historicalInfo);
} else {
const int flags = readFromEncodedEntry(encodedEntry,
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
Ver4DictConstants::PROBABILITY_SIZE);
const int probability = readFromEncodedEntry(encodedEntry,
Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 93d4e562d..e622442ba 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -46,7 +46,7 @@ const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
-const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE = 1;
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 6950ca70f..8d29f60d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -41,7 +41,7 @@ class Ver4DictConstants {
static const int NOT_A_TERMINAL_ID;
static const int PROBABILITY_SIZE;
- static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int FLAGS_IN_LANGUAGE_MODEL_SIZE;
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
static const int NOT_A_TERMINAL_ADDRESS;
static const int TERMINAL_ID_FIELD_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 857222f5d..2c848cb29 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -145,10 +145,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
- const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
- unigramProperty);
+ const ProbabilityEntry probabilityEntryOfUnigramProperty = ProbabilityEntry(unigramProperty);
+ const ProbabilityEntry updatedProbabilityEntry =
+ createUpdatedEntryFrom(&originalProbabilityEntry, &probabilityEntryOfUnigramProperty);
return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
+ toBeUpdatedPtNodeParams->getTerminalId(), &updatedProbabilityEntry);
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
@@ -216,16 +217,36 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
}
// Write probability.
ProbabilityEntry newProbabilityEntry;
+ const ProbabilityEntry probabilityEntryOfUnigramProperty = ProbabilityEntry(unigramProperty);
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
- &newProbabilityEntry, unigramProperty);
+ &newProbabilityEntry, &probabilityEntryOfUnigramProperty);
return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
terminalId, &probabilityEntryToWrite);
}
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
+ // TODO: Support n-gram.
+ LanguageModelDictContent *const languageModelDictContent =
+ mBuffers->getMutableLanguageModelDictContent();
+ const ProbabilityEntry probabilityEntry =
+ languageModelDictContent->getNgramProbabilityEntry(
+ prevWordIds.limit(1 /* maxSize */), wordId);
+ const ProbabilityEntry probabilityEntryOfBigramProperty(bigramProperty);
+ const ProbabilityEntry updatedProbabilityEntry = createUpdatedEntryFrom(
+ &probabilityEntry, &probabilityEntryOfBigramProperty);
+ if (!languageModelDictContent->setNgramProbabilityEntry(
+ prevWordIds.limit(1 /* maxSize */), wordId, &updatedProbabilityEntry)) {
+ AKLOGE("Cannot add new ngram entry. prevWordId: %d, wordId: %d",
+ prevWordIds[0], wordId);
+ return false;
+ }
+ if (!probabilityEntry.isValid() && outAddedNewBigram) {
+ *outAddedNewBigram = true;
+ }
+ // TODO: Remove.
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
- AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
+ AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d",
prevWordIds[0], wordId);
return false;
}
@@ -234,6 +255,7 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
const int wordId) {
+ // TODO: Remove.
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
@@ -352,20 +374,19 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry,
- const UnigramProperty *const unigramProperty) const {
+ const ProbabilityEntry *const probabilityEntry) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
- const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
- unigramProperty->getLevel(), unigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(),
- unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
+ probabilityEntry->getProbability(), probabilityEntry->getHistoricalInfo(),
+ mHeaderPolicy);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo);
} else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(
- unigramProperty->getProbability());
+ probabilityEntry->getProbability());
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 6703dba04..5d73b6ea3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -98,12 +98,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos);
- // Create updated probability entry using given unigram property. In addition to the
+ // Create updated probability entry using given probability property. In addition to the
// probability, this method updates historical information if needed.
- // TODO: Update flags belonging to the unigram property.
+ // TODO: Update flags.
const ProbabilityEntry createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry,
- const UnigramProperty *const unigramProperty) const;
+ const ProbabilityEntry *const probabilityEntry) const;
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
const bool isTerminal, const bool hasMultipleChars);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 723808399..d8f4595ff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -200,7 +200,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
- AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd",
shortcut.getTargetCodePoints()->size());
return false;
}
@@ -235,7 +235,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
- AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
@@ -286,7 +286,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. "
- "length: %d", bigramProperty->getTargetCodePoints()->size());
+ "length: %zd", bigramProperty->getTargetCodePoints()->size());
return false;
}
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 1916ea560..e6e7167c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -23,7 +23,7 @@ namespace latinime {
const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
-const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
switch (formatVersion) {
@@ -40,14 +40,14 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
}
}
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
- const uint8_t *const dict, const int dictSize) {
+ const ReadOnlyByteArrayView dictBuffer) {
// The magic number is stored big-endian.
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
// understand this format.
- if (dictSize < DICTIONARY_MINIMUM_SIZE) {
+ if (dictBuffer.size() < DICTIONARY_MINIMUM_SIZE) {
return UNKNOWN_VERSION;
}
- const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
+ const uint32_t magicNumber = ByteArrayUtils::readUint32(dictBuffer.data(), 0);
switch (magicNumber) {
case MAGIC_NUMBER:
// The layout of the header is as follows:
@@ -58,7 +58,7 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// Conceptually this converts the hardcoded value of the bytes in the file into
// the symbolic value we use in the code. But we want the constants to be the
// same so we use them for both here.
- return getFormatVersion(ByteArrayUtils::readUint16(dict, 4));
+ return getFormatVersion(ByteArrayUtils::readUint16(dictBuffer.data(), 4));
default:
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 55ad5799f..51ad9877c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -20,6 +20,7 @@
#include <cstdint>
#include "defines.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -42,12 +43,12 @@ class FormatUtils {
static const uint32_t MAGIC_NUMBER;
static FORMAT_VERSION getFormatVersion(const int formatVersion);
- static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
+ static FORMAT_VERSION detectFormatVersion(const ReadOnlyByteArrayView dictBuffer);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
- static const int DICTIONARY_MINIMUM_SIZE;
+ static const size_t DICTIONARY_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
index 407b8efd0..e630aba9a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
@@ -26,6 +26,7 @@ const int TrieMap::FIELD1_SIZE = 3;
const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
const uint32_t TrieMap::VALUE_FLAG = 0x400000;
const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
+const uint32_t TrieMap::INVALID_VALUE_IN_KEY_VALUE_ENTRY = VALUE_MASK;
const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
@@ -34,6 +35,7 @@ const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_O
const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
+const int TrieMap::TERMINAL_LINKED_ENTRY_COUNT = 2; // Value entry and bitmap entry.
const uint64_t TrieMap::MAX_VALUE =
(static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
@@ -76,7 +78,7 @@ int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIn
return terminalEntry.getValueEntryIndex() + 1;
}
// Create a value entry and a bitmap entry.
- const int valueEntryIndex = allocateTable(2 /* entryCount */);
+ const int valueEntryIndex = allocateTable(TERMINAL_LINKED_ENTRY_COUNT);
if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
return INVALID_INDEX;
}
@@ -108,6 +110,31 @@ bool TrieMap::save(FILE *const file) const {
return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
}
+bool TrieMap::remove(const int key, const int bitmapEntryIndex) {
+ const Entry bitmapEntry = readEntry(bitmapEntryIndex);
+ const uint32_t unsignedKey = static_cast<uint32_t>(key);
+ const int terminalEntryIndex = getTerminalEntryIndex(
+ unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
+ if (terminalEntryIndex == INVALID_INDEX) {
+ // Not found.
+ return false;
+ }
+ const Entry terminalEntry = readEntry(terminalEntryIndex);
+ if (!writeField1(VALUE_FLAG ^ INVALID_VALUE_IN_KEY_VALUE_ENTRY , terminalEntryIndex)) {
+ return false;
+ }
+ if (terminalEntry.hasTerminalLink()) {
+ const Entry nextLevelBitmapEntry = readEntry(terminalEntry.getValueEntryIndex() + 1);
+ if (!freeTable(terminalEntry.getValueEntryIndex(), TERMINAL_LINKED_ENTRY_COUNT)) {
+ return false;
+ }
+ if (!removeInner(nextLevelBitmapEntry)){
+ return false;
+ }
+ }
+ return true;
+}
+
/**
* Iterate next entry in a certain level.
*
@@ -129,7 +156,7 @@ const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *con
if (entry.isBitmapEntry()) {
// Move to child.
iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex());
- } else {
+ } else if (entry.isValidTerminalEntry()) {
if (outKey) {
*outKey = entry.getKey();
}
@@ -162,12 +189,12 @@ uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
}
bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
- if (value <= VALUE_MASK) {
+ if (value < VALUE_MASK) {
// Write value into the terminal entry.
return writeField1(value | VALUE_FLAG, terminalEntryIndex);
}
// Create value entry and write value.
- const int valueEntryIndex = allocateTable(2 /* entryCount */);
+ const int valueEntryIndex = allocateTable(TERMINAL_LINKED_ENTRY_COUNT);
if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
return false;
}
@@ -227,6 +254,9 @@ int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
// Move to the next level.
return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
}
+ if (!entry.isValidTerminalEntry()) {
+ return INVALID_INDEX;
+ }
if (entry.getKey() == key) {
// Terminal entry is found.
return entryIndex;
@@ -287,6 +317,10 @@ bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32
// Bitmap entry is found. Go to the next level.
return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
}
+ if (!entry.isValidTerminalEntry()) {
+ // Overwrite invalid terminal entry.
+ return writeTerminalEntry(key, value, entryIndex);
+ }
if (entry.getKey() == key) {
// Terminal entry for the key is found. Update the value.
return updateValue(entry, value, entryIndex);
@@ -384,4 +418,33 @@ bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t val
return true;
}
+bool TrieMap::removeInner(const Entry &bitmapEntry) {
+ const int tableSize = popCount(bitmapEntry.getBitmap());
+ for (int i = 0; i < tableSize; ++i) {
+ const int entryIndex = bitmapEntry.getTableIndex() + i;
+ const Entry entry = readEntry(entryIndex);
+ if (entry.isBitmapEntry()) {
+ // Delete next bitmap entry recursively.
+ if (!removeInner(entry)) {
+ return false;
+ }
+ } else {
+ // Invalidate terminal entry just in case.
+ if (!writeField1(VALUE_FLAG ^ INVALID_VALUE_IN_KEY_VALUE_ENTRY , entryIndex)) {
+ return false;
+ }
+ if (entry.hasTerminalLink()) {
+ const Entry nextLevelBitmapEntry = readEntry(entry.getValueEntryIndex() + 1);
+ if (!freeTable(entry.getValueEntryIndex(), TERMINAL_LINKED_ENTRY_COUNT)) {
+ return false;
+ }
+ if (!removeInner(nextLevelBitmapEntry)) {
+ return false;
+ }
+ }
+ }
+ }
+ return freeTable(bitmapEntry.getTableIndex(), tableSize);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
index 3e5c4010c..6d91790b2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -202,6 +202,8 @@ class TrieMap {
bool save(FILE *const file) const;
+ bool remove(const int key, const int bitmapEntryIndex);
+
private:
DISALLOW_COPY_AND_ASSIGN(TrieMap);
@@ -245,6 +247,11 @@ class TrieMap {
}
// For terminal entry.
+ AK_FORCE_INLINE bool isValidTerminalEntry() const {
+ return hasTerminalLink() || ((mData1 & VALUE_MASK) != INVALID_VALUE_IN_KEY_VALUE_ENTRY);
+ }
+
+ // For terminal entry.
AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
return mData1 & TERMINAL_LINK_MASK;
}
@@ -272,6 +279,7 @@ class TrieMap {
static const int ENTRY_SIZE;
static const uint32_t VALUE_FLAG;
static const uint32_t VALUE_MASK;
+ static const uint32_t INVALID_VALUE_IN_KEY_VALUE_ENTRY;
static const uint32_t TERMINAL_LINK_FLAG;
static const uint32_t TERMINAL_LINK_MASK;
static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
@@ -280,6 +288,7 @@ class TrieMap {
static const int ROOT_BITMAP_ENTRY_INDEX;
static const int ROOT_BITMAP_ENTRY_POS;
static const Entry EMPTY_BITMAP_ENTRY;
+ static const int TERMINAL_LINKED_ENTRY_COUNT;
static const int MAX_BUFFER_SIZE;
uint32_t getBitShuffledKey(const uint32_t key) const;
@@ -378,6 +387,8 @@ class TrieMap {
AK_FORCE_INLINE int getTailEntryIndex() const {
return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
}
+
+ bool removeInner(const Entry &bitmapEntry);
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 04cb6603a..52c4251f0 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -51,10 +51,10 @@ class TypingScoring : public Scoring {
}
if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
score += ScoringParams::EXACT_MATCH_PROMOTION;
- if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) {
+ if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
}
- if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) {
+ if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
}
if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 54f65c786..1d590c353 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -36,25 +36,34 @@ ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType cor
// Compare the node code point with original primary code point on the keyboard.
const ProximityInfoState *const pInfoState =
traverseSession->getProximityInfoState(0);
- const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ const int primaryCodePoint = pInfoState->getPrimaryCodePointAt(
dicNode->getInputIndex(0));
const int nodeCodePoint = dicNode->getNodeCodePoint();
- if (primaryOriginalCodePoint == nodeCodePoint) {
+ // TODO: Check whether the input code point is on the keyboard.
+ if (primaryCodePoint == nodeCodePoint) {
// Node code point is same as original code point on the keyboard.
return ErrorTypeUtils::NOT_AN_ERROR;
- } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ } else if (CharUtils::toLowerCase(primaryCodePoint) ==
CharUtils::toLowerCase(nodeCodePoint)) {
// Only cases of the code points are different.
- return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
- } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
- CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ return ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
+ } else if (primaryCodePoint == CharUtils::toBaseCodePoint(nodeCodePoint)) {
// Node code point is a variant of original code point.
- return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
- } else {
+ return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT;
+ } else if (CharUtils::toBaseCodePoint(primaryCodePoint)
+ == CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Base code points are the same but the code point is intentionally input.
+ return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT;
+ } else if (CharUtils::toLowerCase(primaryCodePoint)
+ == CharUtils::toBaseLowerCase(nodeCodePoint)) {
// Node code point is a variant of original code point and the cases are also
// different.
- return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
- | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT
+ | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
+ } else {
+ // Base code points are the same and the cases are different.
+ return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT
+ | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
}
}
break;
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
index c1ddc9812..53f2d2971 100644
--- a/native/jni/src/utils/int_array_view.h
+++ b/native/jni/src/utils/int_array_view.h
@@ -91,6 +91,11 @@ class IntArrayView {
return mPtr + mSize;
}
+ // Returns the view whose size is smaller than or equal to the given count.
+ const IntArrayView limit(const size_t maxSize) const {
+ return IntArrayView(mPtr, std::min(maxSize, mSize));
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);