aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp19
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp17
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp29
-rw-r--r--native/jni/src/utils/byte_array_view.h8
27 files changed, 181 insertions, 122 deletions
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
index b6bf7a98c..1e2494e92 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -19,17 +19,18 @@
namespace latinime {
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_CASE = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x80;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
- NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+ NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH;
const ErrorTypeUtils::ErrorType
ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index e3e76b238..fd1d5fcff 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -30,8 +30,9 @@ class ErrorTypeUtils {
typedef uint32_t ErrorType;
static const ErrorType NOT_AN_ERROR;
- static const ErrorType MATCH_WITH_CASE_ERROR;
- static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_WRONG_CASE;
+ static const ErrorType MATCH_WITH_MISSING_ACCENT;
+ static const ErrorType MATCH_WITH_WRONG_ACCENT;
static const ErrorType MATCH_WITH_DIGRAPH;
// Treat error as an intentional omission when the CorrectionType is omission and the node can
// be intentional omission.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index f7179f68d..97a8bcc98 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true;
}
+bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
+ if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ // Require historical info to suppress unigram entry.
+ return false;
+ }
+ const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
+ const ProbabilityEntry probabilityEntryToWrite =
+ ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
+}
+
} // namespace v402
} // namespace backward
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index d49d9a666..9d8a55bff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
+ // Suppress unigram not to use the word for generating suggestions. So, this method can be used
+ // only for dictionaries with historical info. Also, suppressed entries are included in unigram
+ // count. They will be removed from the dictionary during GC.
+ bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);
+
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 1296b8acd..9c6452e40 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -210,7 +210,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
- AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd",
shortcut.getTargetCodePoints()->size());
return false;
}
@@ -245,7 +245,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
- AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
@@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
}
+bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
+}
+
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) {
@@ -275,7 +289,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. "
- "length: %d", bigramProperty->getTargetCodePoints()->size());
+ "length: %zd", bigramProperty->getTargetCodePoints()->size());
return false;
}
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 9e989b268..d77499636 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
- bool removeUnigramEntry(const int *const word, const int length) {
- // Removing unigram entry is not supported.
- return false;
- }
+ bool removeUnigramEntry(const int *const word, const int length);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index e4ea3da16..9fa93efc9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -111,8 +111,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
return nullptr;
}
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
- mmappedBuffer->getReadOnlyByteArrayView().data(),
- mmappedBuffer->getReadOnlyByteArrayView().size());
+ mmappedBuffer->getReadOnlyByteArrayView());
switch (formatVersion) {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
@@ -174,8 +173,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
if (!mmappedBuffer) {
return nullptr;
}
- switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView().data(),
- mmappedBuffer->getReadOnlyByteArrayView().size())) {
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index 361dd2c74..20bae5943 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -17,7 +17,6 @@
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
#define LATINIME_BIGRAM_DICT_CONTENT_H
-#include <cstdint>
#include <cstdio>
#include "defines.h"
@@ -28,11 +27,12 @@
namespace latinime {
+class ReadWriteByteArrayView;
+
class BigramDictContent : public SparseTableDictContent {
public:
- BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
- : SparseTableDictContent(buffers, bufferSizes,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
+ : SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index f3bc4a0cb..bbcea2ee0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -45,12 +45,25 @@ ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
}
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
- const int terminalId, const ProbabilityEntry *const probabilityEntry) {
+ const int wordId, const ProbabilityEntry *const probabilityEntry) {
+ if (wordId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ return false;
+ }
const int bitmapEntryIndex = createAndGetBitmapEntryIndex(prevWordIds);
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return false;
}
- return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
+ return mTrieMap.put(wordId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
+}
+
+bool LanguageModelDictContent::removeNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ // Cannot find bitmap entry for the probability entry. The entry doesn't exist.
+ return false;
+ }
+ return mTrieMap.remove(wordId, bitmapEntryIndex);
}
bool LanguageModelDictContent::runGCInner(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 104ee2520..bd07f2f62 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -61,12 +61,18 @@ class LanguageModelDictContent {
return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
}
+ bool removeProbabilityEntry(const int wordId) {
+ return removeNgramProbabilityEntry(WordIdArrayView(), wordId);
+ }
+
ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int wordId) const;
bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
const ProbabilityEntry *const probabilityEntry);
+ bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId);
+
private:
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index ed77bd20e..3dfaba755 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -43,14 +43,13 @@ class ProbabilityEntry {
: mFlags(flags), mProbability(probability), mHistoricalInfo() {}
// Entry with historical information.
- ProbabilityEntry(const int flags, const int probability,
- const HistoricalInfo *const historicalInfo)
- : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+ ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {}
// Create from unigram property.
- // TODO: Set flags.
ProbabilityEntry(const UnigramProperty *const unigramProperty)
- : mFlags(0), mProbability(unigramProperty->getProbability()),
+ : mFlags(createFlags(unigramProperty->representsBeginningOfSentence())),
+ mProbability(unigramProperty->getProbability()),
mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
unigramProperty->getCount()) {}
@@ -61,15 +60,6 @@ class ProbabilityEntry {
mHistoricalInfo(bigramProperty->getTimestamp(), bigramProperty->getLevel(),
bigramProperty->getCount()) {}
- const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
- return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
- }
-
- const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
- const HistoricalInfo *const historicalInfo) const {
- return ProbabilityEntry(mFlags, mProbability, historicalInfo);
- }
-
bool isValid() const {
return (mProbability != NOT_A_PROBABILITY) || hasHistoricalInfo();
}
@@ -78,7 +68,7 @@ class ProbabilityEntry {
return mHistoricalInfo.isValid();
}
- int getFlags() const {
+ uint8_t getFlags() const {
return mFlags;
}
@@ -90,6 +80,10 @@ class ProbabilityEntry {
return &mHistoricalInfo;
}
+ bool representsBeginningOfSentence() const {
+ return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
+ }
+
uint64_t encode(const bool hasHistoricalInfo) const {
uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
if (hasHistoricalInfo) {
@@ -123,7 +117,7 @@ class ProbabilityEntry {
const int count = readFromEncodedEntry(encodedEntry,
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
const HistoricalInfo historicalInfo(timestamp, level, count);
- return ProbabilityEntry(flags, NOT_A_PROBABILITY, &historicalInfo);
+ return ProbabilityEntry(flags, &historicalInfo);
} else {
const int flags = readFromEncodedEntry(encodedEntry,
Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
@@ -138,7 +132,7 @@ class ProbabilityEntry {
// Copy constructor is public to use this class as a type of return value.
DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
- const int mFlags;
+ const uint8_t mFlags;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
@@ -146,6 +140,14 @@ class ProbabilityEntry {
return static_cast<int>(
(encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
}
+
+ static uint8_t createFlags(const bool representsBeginningOfSentence) {
+ uint8_t flags = 0;
+ if (representsBeginningOfSentence) {
+ flags ^= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
+ }
+ return flags;
+ }
};
} // namespace latinime
#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
index 7b12aff16..85c9ce8d8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
@@ -17,7 +17,6 @@
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
#define LATINIME_SHORTCUT_DICT_CONTENT_H
-#include <cstdint>
#include <cstdio>
#include "defines.h"
@@ -27,11 +26,12 @@
namespace latinime {
+class ReadWriteByteArrayView;
+
class ShortcutDictContent : public SparseTableDictContent {
public:
- ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes)
- : SparseTableDictContent(buffers, bufferSizes,
- Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ ShortcutDictContent(const ReadWriteByteArrayView *const buffers)
+ : SparseTableDictContent(buffers, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
ShortcutDictContent()
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
index 921774181..309c434cf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -17,7 +17,6 @@
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
#define LATINIME_SINGLE_DICT_CONTENT_H
-#include <cstdint>
#include <cstdio>
#include "defines.h"
@@ -30,9 +29,9 @@ namespace latinime {
class SingleDictContent {
public:
- SingleDictContent(uint8_t *const buffer, const int bufferSize)
- : mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+ SingleDictContent(const ReadWriteByteArrayView buffer)
+ : mExpandableContentBuffer(buffer,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
SingleDictContent()
: mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
index c98dd11fd..0ce2da7bf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -17,7 +17,6 @@
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
-#include <cstdint>
#include <cstdio>
#include "defines.h"
@@ -31,19 +30,13 @@ namespace latinime {
// TODO: Support multiple contents.
class SparseTableDictContent {
public:
- AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
+ AK_FORCE_INLINE SparseTableDictContent(const ReadWriteByteArrayView *const buffers,
const int sparseTableBlockSize, const int sparseTableDataSize)
- : mExpandableLookupTableBuffer(
- ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
- bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
+ : mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableAddressTableBuffer(
- ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
- bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
+ mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableContentBuffer(
- ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
- bufferSizes[CONTENT_BUFFER_INDEX]),
+ mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
index b2262bf1e..febcbe5b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -17,13 +17,13 @@
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
-#include <cstdint>
#include <cstdio>
#include <unordered_map>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -31,8 +31,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
public:
typedef std::unordered_map<int, int> TerminalIdMap;
- TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize)
- : SingleDictContent(buffer, bufferSize),
+ TerminalPositionLookupTable(const ReadWriteByteArrayView buffer)
+ : SingleDictContent(buffer),
mSize(getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 3c8008dc4..1f40e3dd2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -45,16 +45,13 @@ namespace latinime {
if (!bodyBuffer) {
return Ver4DictBuffersPtr(nullptr);
}
- std::vector<uint8_t *> buffers;
- std::vector<int> bufferSizes;
+ std::vector<ReadWriteByteArrayView> buffers;
const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
int position = 0;
while (position < static_cast<int>(buffer.size())) {
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
buffer.data(), &position);
- const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
- buffers.push_back(subBuffer.data());
- bufferSizes.push_back(subBuffer.size());
+ buffers.push_back(buffer.subView(position, bufferSize));
position += bufferSize;
if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
AKLOGE("The dict body file is corrupted.");
@@ -66,7 +63,7 @@ namespace latinime {
return Ver4DictBuffersPtr(nullptr);
}
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
- formatVersion, buffers, bufferSizes));
+ formatVersion, buffers));
}
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
@@ -178,29 +175,20 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
const FormatUtils::FORMAT_VERSION formatVersion,
- const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
+ const std::vector<ReadWriteByteArrayView> &contentBuffers)
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableTrieBuffer(
- ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
+ mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(
- contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
- contentBufferSizes[
- Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
- mLanguageModelDictContent(
- ReadWriteByteArrayView(
- contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
+ contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
+ mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
- &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
- mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
- &contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
+ mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mIsUpdatable(mDictBuffer->isUpdatable()) {}
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 68027dcb8..70a7983f1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -122,8 +122,7 @@ class Ver4DictBuffers {
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
const FormatUtils::FORMAT_VERSION formatVersion,
- const std::vector<uint8_t *> &contentBuffers,
- const std::vector<int> &contentBufferSizes);
+ const std::vector<ReadWriteByteArrayView> &contentBuffers);
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index e622442ba..b085a6661 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -54,6 +54,8 @@ const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
+
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 8d29f60d4..230b3052d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -20,6 +20,7 @@
#include "defines.h"
#include <cstddef>
+#include <cstdint>
namespace latinime {
@@ -48,6 +49,8 @@ class Ver4DictConstants {
static const int TIME_STAMP_FIELD_SIZE;
static const int WORD_LEVEL_FIELD_SIZE;
static const int WORD_COUNT_FIELD_SIZE;
+ // Flags in probability entry.
+ static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 2c848cb29..fb6840ba6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -164,8 +164,8 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
- const ProbabilityEntry probabilityEntry =
- originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ const ProbabilityEntry probabilityEntry(originalProbabilityEntry.getFlags(),
+ &historicalInfo);
if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
@@ -255,6 +255,14 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
const int wordId) {
+ // TODO: Support n-gram.
+ LanguageModelDictContent *const languageModelDictContent =
+ mBuffers->getMutableLanguageModelDictContent();
+ if (!languageModelDictContent->removeNgramProbabilityEntry(prevWordIds.limit(1 /* maxSize */),
+ wordId)) {
+ // TODO: Uncomment.
+ // return false;
+ }
// TODO: Remove.
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
@@ -375,18 +383,15 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry,
const ProbabilityEntry *const probabilityEntry) const {
- // TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(),
probabilityEntry->getProbability(), probabilityEntry->getHistoricalInfo(),
mHeaderPolicy);
- return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
- &updatedHistoricalInfo);
+ return ProbabilityEntry(probabilityEntry->getFlags(), &updatedHistoricalInfo);
} else {
- return originalProbabilityEntry->createEntryWithUpdatedProbability(
- probabilityEntry->getProbability());
+ return *probabilityEntry;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 723808399..04e3018da 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -200,7 +200,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
- AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd",
shortcut.getTargetCodePoints()->size());
return false;
}
@@ -235,7 +235,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
- AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
@@ -263,6 +263,11 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int
AKLOGE("Cannot remove unigram. ptNodePos: %d", ptNodePos);
return false;
}
+ if (!mBuffers->getMutableLanguageModelDictContent()->removeProbabilityEntry(
+ ptNodeParams.getTerminalId())) {
+ // TODO: Uncomment.
+ // return false;
+ }
if (!ptNodeParams.representsNonWordInfo()) {
mUnigramCount--;
}
@@ -286,7 +291,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert the ngram to the dictionary. "
- "length: %d", bigramProperty->getTargetCodePoints()->size());
+ "length: %zd", bigramProperty->getTargetCodePoints()->size());
return false;
}
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 1916ea560..e6e7167c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -23,7 +23,7 @@ namespace latinime {
const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
-const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
switch (formatVersion) {
@@ -40,14 +40,14 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
}
}
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
- const uint8_t *const dict, const int dictSize) {
+ const ReadOnlyByteArrayView dictBuffer) {
// The magic number is stored big-endian.
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
// understand this format.
- if (dictSize < DICTIONARY_MINIMUM_SIZE) {
+ if (dictBuffer.size() < DICTIONARY_MINIMUM_SIZE) {
return UNKNOWN_VERSION;
}
- const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
+ const uint32_t magicNumber = ByteArrayUtils::readUint32(dictBuffer.data(), 0);
switch (magicNumber) {
case MAGIC_NUMBER:
// The layout of the header is as follows:
@@ -58,7 +58,7 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// Conceptually this converts the hardcoded value of the bytes in the file into
// the symbolic value we use in the code. But we want the constants to be the
// same so we use them for both here.
- return getFormatVersion(ByteArrayUtils::readUint16(dict, 4));
+ return getFormatVersion(ByteArrayUtils::readUint16(dictBuffer.data(), 4));
default:
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 55ad5799f..51ad9877c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -20,6 +20,7 @@
#include <cstdint>
#include "defines.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -42,12 +43,12 @@ class FormatUtils {
static const uint32_t MAGIC_NUMBER;
static FORMAT_VERSION getFormatVersion(const int formatVersion);
- static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
+ static FORMAT_VERSION detectFormatVersion(const ReadOnlyByteArrayView dictBuffer);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
- static const int DICTIONARY_MINIMUM_SIZE;
+ static const size_t DICTIONARY_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
index e630aba9a..944a59c52 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
@@ -420,6 +420,10 @@ bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t val
bool TrieMap::removeInner(const Entry &bitmapEntry) {
const int tableSize = popCount(bitmapEntry.getBitmap());
+ if (tableSize <= 0) {
+ // The table is empty. No need to remove any entries.
+ return true;
+ }
for (int i = 0; i < tableSize; ++i) {
const int entryIndex = bitmapEntry.getTableIndex() + i;
const Entry entry = readEntry(entryIndex);
@@ -444,7 +448,7 @@ bool TrieMap::removeInner(const Entry &bitmapEntry) {
}
}
}
- return freeTable(bitmapEntry.getTableIndex(), tableSize);
+ return true;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 04cb6603a..52c4251f0 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -51,10 +51,10 @@ class TypingScoring : public Scoring {
}
if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
score += ScoringParams::EXACT_MATCH_PROMOTION;
- if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) {
+ if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
}
- if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) {
+ if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
}
if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 54f65c786..1d590c353 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -36,25 +36,34 @@ ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType cor
// Compare the node code point with original primary code point on the keyboard.
const ProximityInfoState *const pInfoState =
traverseSession->getProximityInfoState(0);
- const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ const int primaryCodePoint = pInfoState->getPrimaryCodePointAt(
dicNode->getInputIndex(0));
const int nodeCodePoint = dicNode->getNodeCodePoint();
- if (primaryOriginalCodePoint == nodeCodePoint) {
+ // TODO: Check whether the input code point is on the keyboard.
+ if (primaryCodePoint == nodeCodePoint) {
// Node code point is same as original code point on the keyboard.
return ErrorTypeUtils::NOT_AN_ERROR;
- } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ } else if (CharUtils::toLowerCase(primaryCodePoint) ==
CharUtils::toLowerCase(nodeCodePoint)) {
// Only cases of the code points are different.
- return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
- } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
- CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ return ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
+ } else if (primaryCodePoint == CharUtils::toBaseCodePoint(nodeCodePoint)) {
// Node code point is a variant of original code point.
- return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
- } else {
+ return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT;
+ } else if (CharUtils::toBaseCodePoint(primaryCodePoint)
+ == CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Base code points are the same but the code point is intentionally input.
+ return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT;
+ } else if (CharUtils::toLowerCase(primaryCodePoint)
+ == CharUtils::toBaseLowerCase(nodeCodePoint)) {
// Node code point is a variant of original code point and the cases are also
// different.
- return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
- | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT
+ | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
+ } else {
+ // Base code points are the same and the cases are different.
+ return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT
+ | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
}
}
break;
diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h
index 2c97c6d58..10d7ae278 100644
--- a/native/jni/src/utils/byte_array_view.h
+++ b/native/jni/src/utils/byte_array_view.h
@@ -77,10 +77,12 @@ class ReadWriteByteArrayView {
}
private:
- DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
+ // Default copy constructor and assignment operator are used for using this class with STL
+ // containers.
- uint8_t *const mPtr;
- const size_t mSize;
+ // These members cannot be const to have the assignment operator.
+ uint8_t *mPtr;
+ size_t mSize;
};
} // namespace latinime