aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/NativeFileList.mk7
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h11
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp11
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp64
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h25
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h4
-rw-r--r--native/jni/src/suggest/core/policy/scoring.h1
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp46
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h83
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp159
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h66
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h10
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h4
-rw-r--r--native/jni/src/utils/byte_array_view.h87
-rw-r--r--native/jni/src/utils/int_array_view.h105
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp59
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp60
-rw-r--r--native/jni/tests/utils/int_array_view_test.cpp61
52 files changed, 918 insertions, 553 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index ef6f3d69e..7a732a588 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -72,7 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \
ver4_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
bigram_dict_content.cpp \
- probability_dict_content.cpp \
+ language_model_dict_content.cpp \
shortcut_dict_content.cpp \
sparse_table_dict_content.cpp \
terminal_position_lookup_table.cpp) \
@@ -125,6 +125,9 @@ LATIN_IME_CORE_TEST_FILES := \
defines_test.cpp \
suggest/core/layout/normal_distribution_2d_test.cpp \
suggest/core/dictionary/bloom_filter_test.cpp \
+ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
+ suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
suggest/policyimpl/dictionary/utils/trie_map_test.cpp \
- utils/autocorrection_threshold_utils_test.cpp
+ utils/autocorrection_threshold_utils_test.cpp \
+ utils/int_array_view_test.cpp
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 92f39ea25..d1b2c87be 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -117,7 +117,7 @@ class DicNode {
int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
- newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
+ newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1];
}
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
@@ -208,12 +208,9 @@ class DicNode {
return mDicNodeProperties.getPtNodePos();
}
- // Used to get n-gram probability in DicNodeUtils. n is 1-indexed.
- int getNthPrevWordTerminalPtNodePos(const int n) const {
- if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
- return NOT_A_DICT_POS;
- }
- return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
+ // TODO: Use view class to return PtNodePos array.
+ const int *getPrevWordsTerminalPtNodePos() const {
+ return mDicNodeProperties.getPrevWordsTerminalPtNodePos();
}
// Used in DicNodeUtils
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 4445f4aaf..69ea67418 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -85,17 +85,10 @@ namespace latinime {
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = dicNode->getProbability();
- const int ptNodePos = dicNode->getPtNodePos();
- const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
- if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
- // Note: Normally wordPos comes from the dictionary and should never equal
- // NOT_A_VALID_WORD_POS.
- return dictionaryStructurePolicy->getProbability(unigramProbability,
- NOT_A_PROBABILITY);
- }
if (multiBigramMap) {
+ const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos();
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
- prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
+ prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
index 012e4dc9c..91f33a8dd 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
@@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP =
// Also caches the bigrams if there is space remaining and they have not been cached already.
int MultiBigramMap::getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability) {
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability) {
+ if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
+ }
std::unordered_map<int, BigramMap>::const_iterator mapPosition =
- mBigramMaps.find(wordPosition);
+ mBigramMaps.find(prevWordsPtNodePos[0]);
if (mapPosition != mBigramMaps.end()) {
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(structurePolicy, wordPosition);
- return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
+ addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos);
+ return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy,
nextWordPosition, unigramProbability);
}
- return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos,
nextWordPosition, unigramProbability);
}
void MultiBigramMap::BigramMap::init(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- structurePolicy->getBigramsIteratorOfPtNode(nodePos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
- mBloomFilter.setInFilter(bigramsIt.getBigramPos());
- }
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos) {
+ structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */);
}
int MultiBigramMap::BigramMap::getBigramProbability(
@@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability(
return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
+void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability,
+ const int targetPtNodePos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return;
+ }
+ mBigramMap[targetPtNodePos] = ngramProbability;
+ mBloomFilter.setInFilter(targetPtNodePos);
+}
+
void MultiBigramMap::addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
- mBigramMaps[position].init(structurePolicy, position);
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos) {
+ if (prevWordsPtNodePos) {
+ mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos);
+ }
}
int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability) {
- int bigramProbability = NOT_A_PROBABILITY;
- BinaryDictionaryBigramsIterator bigramsIt =
- structurePolicy->getBigramsIteratorOfPtNode(nodePos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPosition) {
- bigramProbability = bigramsIt.getProbability();
- break;
- }
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability) {
+ const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos,
+ nextWordPosition);
+ if (bigramProbability != NOT_A_PROBABILITY) {
+ return bigramProbability;
}
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
+ return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
}
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index 195b5e22f..ad36dde83 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -23,6 +23,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/bloom_filter.h"
+#include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
@@ -38,7 +39,8 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability);
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability);
void clear() {
mBigramMaps.clear();
@@ -47,32 +49,35 @@ class MultiBigramMap {
private:
DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
- class BigramMap {
+ class BigramMap : public NgramListener {
public:
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
- ~BigramMap() {}
+ // Copy constructor needed for std::unordered_map.
+ BigramMap(const BigramMap &bigramMap)
+ : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
+ virtual ~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nodePos);
-
+ const int *const prevWordsPtNodePos);
int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nextWordPosition, const int unigramProbability) const;
+ virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
private:
- // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
- // copy constructor is needed for use in hash_map.
static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
std::unordered_map<int, int> mBigramMap;
BloomFilter mBloomFilter;
};
void addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos);
int readBigramProbabilityFromBinaryDictionary(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability);
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int *const prevWordsPtNodePos, const int nextWordPosition,
+ const int unigramProbability);
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
std::unordered_map<int, BigramMap> mBigramMaps;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 7e3bf3ff6..e91f07682 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -20,14 +20,12 @@
#include <memory>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/property/word_property.h"
namespace latinime {
class DicNode;
class DicNodeVector;
-class DictionaryBigramsStructurePolicy;
class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
class NgramListener;
@@ -67,8 +65,6 @@ class DictionaryStructureWithBufferPolicy {
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
- virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
-
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h
index 292194bf2..9e75cace4 100644
--- a/native/jni/src/suggest/core/policy/scoring.h
+++ b/native/jni/src/suggest/core/policy/scoring.h
@@ -37,7 +37,6 @@ class Scoring {
DicNode *const terminals, const int size) const = 0;
virtual float getDoubleLetterDemotionDistanceCost(
const DicNode *const terminalDicNode) const = 0;
- virtual bool doesAutoCorrectValidWord() const = 0;
virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
virtual bool sameAsTyped(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
index 7b0e7e1b4..0b99b75ec 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -117,8 +117,7 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int finalScore = scoringPolicy->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
terminalDicNode->getContainedErrorTypes(),
- (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
- || (isValidWord && scoringPolicy->doesAutoCorrectValidWord()),
+ (forceCommitMultiWords && terminalDicNode->hasMultipleWords()),
boostExactMatches);
// Don't output invalid or blocked offensive words. However, we still need to submit their
@@ -145,12 +144,7 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
- const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
- scoringPolicy->calculateFinalScore(compoundDistance,
- traverseSession->getInputSize(),
- terminalDicNode->getContainedErrorTypes(),
- true /* forceCommit */, boostExactMatches) : finalScore;
- outputShortcuts(&shortcutIt, shortcutBaseScore, sameAsTyped, outSuggestionResults);
+ outputShortcuts(&shortcutIt, finalScore, sameAsTyped, outSuggestionResults);
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
index 6433650b0..49f446814 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
@@ -30,6 +30,7 @@
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
namespace backward {
@@ -40,8 +41,9 @@ class SingleDictContent : public DictContent {
SingleDictContent(const char *const dictPath, const char *const contentFileName,
const bool isUpdatable)
: mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
- mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
- mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
+ mExpandableContentBuffer(
+ mMmappedBuffer ? mMmappedBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mIsValid(mMmappedBuffer) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
index c7233edd3..3c626df11 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
@@ -31,6 +31,7 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+#include "utils/byte_array_view.h"
namespace latinime {
namespace backward {
@@ -50,15 +51,16 @@ class SparseTableDictContent : public DictContent {
mContentBuffer(
MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
mExpandableLookupTableBuffer(
- mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
- mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
+ mLookupTableBuffer ? mLookupTableBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableAddressTableBuffer(
- mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
- mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
+ mAddressTableBuffer ? mAddressTableBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
- mContentBuffer ? mContentBuffer->getBufferSize() : 0,
+ mExpandableContentBuffer(
+ mContentBuffer ? mContentBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
index 93f192976..3dfbd1c94 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
@@ -30,6 +30,7 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
namespace backward {
@@ -130,12 +131,12 @@ Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
: mHeaderBuffer(std::move(headerBuffer)),
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
- mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
- mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
- mHeaderPolicy.getSize(),
+ mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
- mDictBuffer ? mDictBuffer->getBufferSize() : 0,
+ mExpandableTrieBuffer(
+ mDictBuffer ? mDictBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(dictPath, isUpdatable),
mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 4220a9561..278f2b199 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
&probabilityEntryToWrite);
}
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
}
- if (!sourcePtNodeParams->hasBigrams()) {
+ const int ptNodePos =
+ mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
+ const PtNodeParams sourcePtNodeParams =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag.
- return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
- sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
- sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
+ return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
+ sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
+ sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */,
- sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
}
return true;
}
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index 08226ea26..d49d9a666 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "utils/int_array_view.h"
namespace latinime {
namespace backward {
@@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
- mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
- mShortcutPolicy(shortcutPolicy) {}
+ mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram);
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
@@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
+ const PtNodeReader *const mPtNodeReader;
DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 994c42505..1296b8acd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -142,8 +142,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == ptNodePos
@@ -161,7 +161,8 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod
if (!prevWordsPtNodePos) {
return;
}
- BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
@@ -180,12 +181,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
-BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
- const int ptNodePos) const {
- const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
- return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
-}
-
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
@@ -314,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
- &addedNewBigram)) {
+ if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
+ word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -355,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ if (mUpdatingHelper.removeNgramEntry(
+ PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
mBigramCount--;
return true;
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index ff69de7c0..9e989b268 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -97,8 +97,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
- BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
-
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index e4b5fa267..e4ea3da16 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -31,6 +31,7 @@
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -110,7 +111,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
return nullptr;
}
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
- mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
+ mmappedBuffer->getReadOnlyByteArrayView().data(),
+ mmappedBuffer->getReadOnlyByteArrayView().size());
switch (formatVersion) {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
@@ -172,8 +174,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
if (!mmappedBuffer) {
return nullptr;
}
- switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
- mmappedBuffer->getBufferSize())) {
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView().data(),
+ mmappedBuffer->getReadOnlyByteArrayView().size())) {
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
index 2e05bf397..b7262581a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -26,7 +26,6 @@
namespace latinime {
-class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy;
class PtNodeArrayReader;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index f31c914d2..3c62e2e56 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
unigramProperty, &pos);
}
-bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
- bigramProperty, outAddedNewBigram);
+bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewEntry) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
}
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
+bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
}
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index f10d15a9b..97c05c1ea 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper {
const int *const wordCodePoints, const int codePointCount,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
+ // Add an n-gram entry.
+ bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
+ // Remove an n-gram entry.
+ bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
// Add a shortcut target.
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
index a8029f73f..955d779ac 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -70,12 +71,10 @@ class PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram) = 0;
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam) = 0;
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 53415aeb6..ea32eb2a9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -311,8 +311,8 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodeP
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == ptNodePos
@@ -330,7 +330,8 @@ void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos
if (!prevWordsPtNodePos) {
return;
}
- BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
@@ -344,12 +345,6 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}
-BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode(
- const int ptNodePos) const {
- const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
- return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition);
-}
-
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 07cb72b23..70351d147 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -39,9 +40,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
: mMmappedBuffer(std::move(mmappedBuffer)),
- mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
- mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
+ FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
+ + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
+ - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
@@ -70,8 +74,6 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
- BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
-
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
deleted file mode 100644
index c264aeac4..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICT_CONTENT_H
-#define LATINIME_DICT_CONTENT_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class DictContent {
- public:
- virtual ~DictContent() {}
-
- protected:
- DictContent() {}
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictContent);
-};
-} // namespace latinime
-#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
new file mode 100644
index 000000000..5dc91ba10
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+
+namespace latinime {
+
+bool LanguageModelDictContent::save(FILE *const file) const {
+ return mTrieMap.save(file);
+}
+
+bool LanguageModelDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const LanguageModelDictContent *const originalContent,
+ int *const outNgramCount) {
+ return runGCInner(terminalIdMap, originalContent->mTrieMap.getEntriesInRootLevel(),
+ 0 /* nextLevelBitmapEntryIndex */, outNgramCount);
+}
+
+ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
+ const WordIdArrayView prevWordIds, const int wordId) const {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return ProbabilityEntry();
+ }
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ // Not found.
+ return ProbabilityEntry();
+ }
+ return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
+}
+
+bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int terminalId, const ProbabilityEntry *const probabilityEntry) {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return false;
+ }
+ return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
+}
+
+bool LanguageModelDictContent::runGCInner(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const TrieMap::TrieMapRange trieMapRange,
+ const int nextLevelBitmapEntryIndex, int *const outNgramCount) {
+ for (auto &entry : trieMapRange) {
+ const auto it = terminalIdMap->find(entry.key());
+ if (it == terminalIdMap->end() || it->second == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ // The word has been removed.
+ continue;
+ }
+ if (!mTrieMap.put(it->second, entry.value(), nextLevelBitmapEntryIndex)) {
+ return false;
+ }
+ if (outNgramCount) {
+ *outNgramCount += 1;
+ }
+ if (entry.hasNextLevelMap()) {
+ if (!runGCInner(terminalIdMap, entry.getEntriesInNextLevel(),
+ mTrieMap.getNextLevelBitmapEntryIndex(it->second, nextLevelBitmapEntryIndex),
+ outNgramCount)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
+ int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
+ for (const int wordId : prevWordIds) {
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ return TrieMap::INVALID_INDEX;
+ }
+ bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
+ }
+ return bitmapEntryIndex;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
new file mode 100644
index 000000000..18f2e0170
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
+#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
+
+#include <cstdio>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/trie_map.h"
+#include "utils/byte_array_view.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+
+/**
+ * Class representing language model.
+ *
+ * This class provides methods to get and store unigram/n-gram probability information and flags.
+ */
+class LanguageModelDictContent {
+ public:
+ LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
+ const bool hasHistoricalInfo)
+ : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ explicit LanguageModelDictContent(const bool hasHistoricalInfo)
+ : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ bool isNearSizeLimit() const {
+ return mTrieMap.isNearSizeLimit();
+ }
+
+ bool save(FILE *const file) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const LanguageModelDictContent *const originalContent,
+ int *const outNgramCount);
+
+ ProbabilityEntry getProbabilityEntry(const int wordId) const {
+ return getNgramProbabilityEntry(WordIdArrayView(), wordId);
+ }
+
+ bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
+ return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
+ }
+
+ ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int wordId) const;
+
+ bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const ProbabilityEntry *const probabilityEntry);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
+
+ TrieMap mTrieMap;
+ const bool mHasHistoricalInfo;
+
+ bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
+ int *const outNgramCount);
+
+ int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
+};
+} // namespace latinime
+#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
deleted file mode 100644
index 2425b3b2f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
- if (terminalId < 0 || terminalId >= mSize) {
- // This method can be called with invalid terminal id during GC.
- return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
- }
- const BufferWithExtendableBuffer *const buffer = getBuffer();
- int entryPos = getEntryPos(terminalId);
- const int flags = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
- const int probability = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
- if (mHasHistoricalInfo) {
- const int timestamp = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
- const int level = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
- const int count = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
- const HistoricalInfo historicalInfo(timestamp, level, count);
- return ProbabilityEntry(flags, probability, &historicalInfo);
- } else {
- return ProbabilityEntry(flags, probability);
- }
-}
-
-bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
- const ProbabilityEntry *const probabilityEntry) {
- if (terminalId < 0) {
- return false;
- }
- const int entryPos = getEntryPos(terminalId);
- if (terminalId >= mSize) {
- ProbabilityEntry dummyEntry;
- // Write new entry.
- int writingPos = getBuffer()->getTailPosition();
- while (writingPos <= entryPos) {
- // Fulfilling with dummy entries until writingPos.
- if (!writeEntry(&dummyEntry, writingPos)) {
- AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
- return false;
- }
- writingPos += getEntrySize();
- mSize++;
- }
- }
- return writeEntry(probabilityEntry, entryPos);
-}
-
-bool ProbabilityDictContent::flushToFile(FILE *const file) const {
- if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
- ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
- for (int i = 0; i < mSize; ++i) {
- const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
- if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
- return false;
- }
- }
- return probabilityDictContentToWrite.flush(file);
- } else {
- return flush(file);
- }
-}
-
-bool ProbabilityDictContent::runGC(
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent) {
- mSize = 0;
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const ProbabilityEntry probabilityEntry =
- originalProbabilityDictContent->getProbabilityEntry(it->first);
- if (!setProbabilityEntry(it->second, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
- return false;
- }
- mSize++;
- }
- return true;
-}
-
-int ProbabilityDictContent::getEntrySize() const {
- if (mHasHistoricalInfo) {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE
- + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
- + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
- + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
- } else {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE;
- }
-}
-
-int ProbabilityDictContent::getEntryPos(const int terminalId) const {
- return terminalId * getEntrySize();
-}
-
-bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
- const int entryPos) {
- BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
- int writingPos = entryPos;
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
- AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
- AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (mHasHistoricalInfo) {
- const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
deleted file mode 100644
index 80e992c1c..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
-#define LATINIME_PROBABILITY_DICT_CONTENT_H
-
-#include <cstdint>
-#include <cstdio>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-class ProbabilityEntry;
-
-class ProbabilityDictContent : public SingleDictContent {
- public:
- ProbabilityDictContent(uint8_t *const buffer, const int bufferSize,
- const bool hasHistoricalInfo)
- : SingleDictContent(buffer, bufferSize),
- mHasHistoricalInfo(hasHistoricalInfo),
- mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
-
- ProbabilityDictContent(const bool hasHistoricalInfo)
- : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
-
- const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
-
- bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
-
- bool flushToFile(FILE *const file) const;
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
-
- int getEntrySize() const;
-
- int getEntryPos(const int terminalId) const;
-
- bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
-
- bool mHasHistoricalInfo;
- int mSize;
-};
-} // namespace latinime
-#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index 36ba82be1..feff6b57f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -17,6 +17,9 @@
#ifndef LATINIME_PROBABILITY_ENTRY_H
#define LATINIME_PROBABILITY_ENTRY_H
+#include <climits>
+#include <cstdint>
+
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
@@ -67,6 +70,50 @@ class ProbabilityEntry {
return &mHistoricalInfo;
}
+ uint64_t encode(const bool hasHistoricalInfo) const {
+ uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
+ if (hasHistoricalInfo) {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getCount());
+ } else {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mProbability);
+ }
+ return encodedEntry;
+ }
+
+ static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
+ if (hasHistoricalInfo) {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int timestamp = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int level = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int count = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, NOT_A_PROBABILITY, &historicalInfo);
+ } else {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::PROBABILITY_SIZE);
+ const int probability = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
+ return ProbabilityEntry(flags, probability);
+ }
+ }
+
private:
// Copy constructor is public to use this class as a type of return value.
DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
@@ -74,6 +121,11 @@ class ProbabilityEntry {
const int mFlags;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
+
+ static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
+ return static_cast<int>(
+ (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
+ }
};
} // namespace latinime
#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
index 69a11425f..921774181 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -21,17 +21,17 @@
#include <cstdio>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
-class SingleDictContent : public DictContent {
+class SingleDictContent {
public:
SingleDictContent(uint8_t *const buffer, const int bufferSize)
- : mExpandableContentBuffer(buffer, bufferSize,
+ : mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
SingleDictContent()
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
index cdf870bd2..c98dd11fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -21,26 +21,29 @@
#include <cstdio>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+#include "utils/byte_array_view.h"
namespace latinime {
// TODO: Support multiple contents.
-class SparseTableDictContent : public DictContent {
+class SparseTableDictContent {
public:
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
const int sparseTableBlockSize, const int sparseTableDataSize)
- : mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
- bufferSizes[LOOKUP_TABLE_BUFFER_INDEX],
+ : mExpandableLookupTableBuffer(
+ ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
+ bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
- bufferSizes[ADDRESS_TABLE_BUFFER_INDEX],
+ mExpandableAddressTableBuffer(
+ ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
+ bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
- bufferSizes[CONTENT_BUFFER_INDEX],
+ mExpandableContentBuffer(
+ ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
+ bufferSizes[CONTENT_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 36ab9963a..3c8008dc4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -26,6 +26,7 @@
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -46,14 +47,16 @@ namespace latinime {
}
std::vector<uint8_t *> buffers;
std::vector<int> bufferSizes;
- uint8_t *const buffer = bodyBuffer->getBuffer();
+ const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
int position = 0;
- while (position < bodyBuffer->getBufferSize()) {
- const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(buffer, &position);
- buffers.push_back(buffer + position);
- bufferSizes.push_back(bufferSize);
+ while (position < static_cast<int>(buffer.size())) {
+ const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
+ buffer.data(), &position);
+ const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
+ buffers.push_back(subBuffer.data());
+ bufferSizes.push_back(subBuffer.size());
position += bufferSize;
- if (bufferSize < 0 || position < 0 || position > bodyBuffer->getBufferSize()) {
+ if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
AKLOGE("The dict body file is corrupted.");
return Ver4DictBuffersPtr(nullptr);
}
@@ -118,7 +121,7 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
}
FILE *const file = fdopen(fd, "wb");
if (!file) {
- AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno);
+ AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno);
ASSERT(false);
return false;
}
@@ -146,27 +149,27 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
// Write trie.
if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
- AKLOGE("Trie cannot be written. %s", tmpDirPath);
+ AKLOGE("Trie cannot be written.");
return false;
}
// Write terminal position lookup table.
if (!mTerminalPositionLookupTable.flushToFile(file)) {
- AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
+ AKLOGE("Terminal position lookup table cannot be written.");
return false;
}
- // Write probability dict content.
- if (!mProbabilityDictContent.flushToFile(file)) {
- AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
+ // Write language model content.
+ if (!mLanguageModelDictContent.save(file)) {
+ AKLOGE("Language model dict content cannot be written.");
return false;
}
// Write bigram dict content.
if (!mBigramDictContent.flushToFile(file)) {
- AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
+ AKLOGE("Bigram dict content cannot be written.");
return false;
}
// Write shortcut dict content.
if (!mShortcutDictContent.flushToFile(file)) {
- AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
+ AKLOGE("Shortcut dict content cannot be written.");
return false;
}
return true;
@@ -177,20 +180,21 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
const FormatUtils::FORMAT_VERSION formatVersion,
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
- mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
- mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
- mHeaderPolicy.getSize(),
+ mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX],
+ mExpandableTrieBuffer(
+ ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
+ contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
contentBufferSizes[
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
- mProbabilityDictContent(
- contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
+ mLanguageModelDictContent(
+ ReadWriteByteArrayView(
+ contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
+ contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
mHeaderPolicy.hasHistoricalInfoOfWords()),
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
@@ -203,7 +207,7 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
- mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 433411cb8..68027dcb8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -23,7 +23,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -52,7 +52,7 @@ class Ver4DictBuffers {
AK_FORCE_INLINE bool isNearSizeLimit() const {
return mExpandableTrieBuffer.isNearSizeLimit()
|| mTerminalPositionLookupTable.isNearSizeLimit()
- || mProbabilityDictContent.isNearSizeLimit()
+ || mLanguageModelDictContent.isNearSizeLimit()
|| mBigramDictContent.isNearSizeLimit()
|| mShortcutDictContent.isNearSizeLimit();
}
@@ -81,12 +81,12 @@ class Ver4DictBuffers {
return &mTerminalPositionLookupTable;
}
- AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
- return &mProbabilityDictContent;
+ AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() {
+ return &mLanguageModelDictContent;
}
- AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
- return &mProbabilityDictContent;
+ AK_FORCE_INLINE const LanguageModelDictContent *getLanguageModelDictContent() const {
+ return &mLanguageModelDictContent;
}
AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
@@ -135,7 +135,7 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer mExpandableHeaderBuffer;
BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable;
- ProbabilityDictContent mProbabilityDictContent;
+ LanguageModelDictContent mLanguageModelDictContent;
BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent;
const int mIsUpdatable;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index d45dfe377..93d4e562d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -27,18 +27,20 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
// limited to 1MB to prevent from inefficient traversing.
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
-// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability.
+// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
+// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
- NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3
+ NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
+ + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
-const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX =
+const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
- PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
+ LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
@@ -73,5 +75,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index e8f6739ba..6950ca70f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -35,7 +35,7 @@ class Ver4DictConstants {
static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE;
static const int TRIE_BUFFER_INDEX;
static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX;
- static const int PROBABILITY_BUFFER_INDEX;
+ static const int LANGUAGE_MODEL_BUFFER_INDEX;
static const int BIGRAM_BUFFERS_INDEX;
static const int SHORTCUT_BUFFERS_INDEX;
@@ -71,6 +71,7 @@ class Ver4DictConstants {
static const size_t NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
static const size_t NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
+ static const size_t NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
};
} // namespace latinime
#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 0a435e91c..731092efd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -18,7 +18,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -61,8 +61,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
}
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ // TODO: Quit reading probability here.
const ProbabilityEntry probabilityEntry =
- mProbabilityDictContent->getProbabilityEntry(terminalId);
+ mLanguageModelDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
index 22ed4a6c0..a91ad5728 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -25,18 +25,18 @@ namespace latinime {
class BufferWithExtendableBuffer;
class HeaderPolicy;
-class ProbabilityDictContent;
+class LanguageModelDictContent;
/*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
- * node and reads node attributes including probability form probabilityBuffer.
+ * node and reads node attributes including probability form language model.
*/
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const ProbabilityDictContent *const probabilityDictContent,
+ const LanguageModelDictContent *const languageModelDictContent,
const HeaderPolicy *const headerPolicy)
- : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
+ : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
mHeaderPolicy(headerPolicy) {}
~Ver4PatriciaTrieNodeReader() {}
@@ -50,7 +50,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
- const ProbabilityDictContent *const mProbabilityDictContent;
+ const LanguageModelDictContent *const mLanguageModelDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 3d8da9173..857222f5d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -143,11 +143,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
return false;
}
const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
unigramProperty);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
@@ -158,14 +158,14 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
return false;
}
const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
- if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
@@ -218,26 +218,23 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, unigramProperty);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
- &probabilityEntryToWrite);
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
+ terminalId, &probabilityEntryToWrite);
}
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
- sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ prevWordIds[0], wordId);
return false;
}
return true;
}
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 162dc9b1d..6703dba04 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram);
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 22f7e1182..723808399 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -132,8 +132,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == ptNodePos
@@ -151,7 +151,8 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod
if (!prevWordsPtNodePos) {
return;
}
- BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
@@ -170,12 +171,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
-BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
- const int ptNodePos) const {
- const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
- return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
-}
-
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
@@ -297,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSearch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
@@ -324,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
- bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
- &addedNewBigram)) {
- if (addedNewBigram) {
+ bool addedNewEntry = false;
+ if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
+ &addedNewEntry)) {
+ if (addedNewEntry) {
mBigramCount++;
}
return true;
@@ -357,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSerch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false;
@@ -366,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
mBigramCount--;
return true;
} else {
@@ -457,7 +454,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index c5b6a80c0..faad4290d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -46,7 +46,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
- mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
+ mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
@@ -79,8 +79,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
- BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
-
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 0e658f8e3..4220312e0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -75,7 +75,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
- mBuffers->getProbabilityDictContent(), headerPolicy);
+ mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
@@ -138,7 +138,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
- buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
@@ -154,8 +154,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false;
}
// Run GC for probability dict content.
- if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
- mBuffers->getProbabilityDictContent())) {
+ if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
+ mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
return false;
}
// Run GC for bigram dict content.
@@ -201,7 +201,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
continue;
}
const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 825b72c6a..833063c17 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -25,7 +25,7 @@ const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 12
uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
- const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
+ const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBuffer.size() : pos;
return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
}
@@ -40,12 +40,12 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC
int *const outCodePoints, int *outCodePointCount, int *const pos) const {
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
if (readingPosIsInAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
+ *pos -= mOriginalBuffer.size();
}
*outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
if (readingPosIsInAdditionalBuffer) {
- *pos += mOriginalBufferSize;
+ *pos += mOriginalBuffer.size();
}
}
@@ -69,13 +69,14 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
return false;
}
const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
- uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ uint8_t *const buffer =
+ usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data();
if (usesAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
+ *pos -= mOriginalBuffer.size();
}
ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos);
if (usesAdditionalBuffer) {
- *pos += mOriginalBufferSize;
+ *pos += mOriginalBuffer.size();
}
return true;
}
@@ -88,14 +89,15 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return false;
}
const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
- uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ uint8_t *const buffer =
+ usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data();
if (usesAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
+ *pos -= mOriginalBuffer.size();
}
ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePoints, codePointCount,
writesTerminator, pos);
if (usesAdditionalBuffer) {
- *pos += mOriginalBufferSize;
+ *pos += mOriginalBuffer.size();
}
return true;
}
@@ -119,7 +121,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
const size_t totalRequiredSize = static_cast<size_t>(pos + size);
if (!isInAdditionalBuffer(pos)) {
// Here don't need to care about the additional buffer.
- if (static_cast<size_t>(mOriginalBufferSize) < totalRequiredSize) {
+ if (mOriginalBuffer.size() < totalRequiredSize) {
// Violate the boundary.
return false;
}
@@ -137,7 +139,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return false;
}
const size_t extendSize = totalRequiredSize -
- std::min(mAdditionalBuffer.size() + mOriginalBufferSize, totalRequiredSize);
+ std::min(mAdditionalBuffer.size() + mOriginalBuffer.size(), totalRequiredSize);
if (extendSize > 0 && !extendBuffer(extendSize)) {
// Failed to extend the buffer.
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 5e1362eee..fad83aa25 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -23,6 +23,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -34,20 +35,18 @@ class BufferWithExtendableBuffer {
public:
static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
- BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
+ BufferWithExtendableBuffer(const ReadWriteByteArrayView originalBuffer,
const int maxAdditionalBufferSize)
- : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
+ : mOriginalBuffer(originalBuffer), mAdditionalBuffer(), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
// Without original buffer.
BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
- : mOriginalBuffer(0), mOriginalBufferSize(0),
- mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
+ : mOriginalBuffer(), mAdditionalBuffer(), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
- return mOriginalBufferSize + mUsedAdditionalBufferSize;
+ return mOriginalBuffer.size() + mUsedAdditionalBufferSize;
}
AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
@@ -58,16 +57,16 @@ class BufferWithExtendableBuffer {
* For reading.
*/
AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
- return position >= mOriginalBufferSize;
+ return position >= static_cast<int>(mOriginalBuffer.size());
}
// TODO: Resolve the issue that the address can be changed when the vector is resized.
// CAVEAT!: Be careful about array out of bound access with buffers
AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
if (usesAdditionalBuffer) {
- return &mAdditionalBuffer[0];
+ return mAdditionalBuffer.data();
} else {
- return mOriginalBuffer;
+ return mOriginalBuffer.data();
}
}
@@ -79,7 +78,7 @@ class BufferWithExtendableBuffer {
int *const outCodePoints, int *outCodePointCount, int *const pos) const;
AK_FORCE_INLINE int getOriginalBufferSize() const {
- return mOriginalBufferSize;
+ return mOriginalBuffer.size();
}
AK_FORCE_INLINE bool isNearSizeLimit() const {
@@ -110,8 +109,7 @@ class BufferWithExtendableBuffer {
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
- uint8_t *const mOriginalBuffer;
- const int mOriginalBufferSize;
+ const ReadWriteByteArrayView mOriginalBuffer;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
const size_t mMaxAdditionalBufferSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 8460087ab..e25310373 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -21,6 +21,7 @@
#include <memory>
#include "defines.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -39,12 +40,12 @@ class MmappedBuffer {
~MmappedBuffer();
- AK_FORCE_INLINE uint8_t *getBuffer() const {
- return mBuffer;
+ ReadWriteByteArrayView getReadWriteByteArrayView() const {
+ return mByteArrayView;
}
- AK_FORCE_INLINE int getBufferSize() const {
- return mBufferSize;
+ ReadOnlyByteArrayView getReadOnlyByteArrayView() const {
+ return mByteArrayView.getReadOnlyView();
}
AK_FORCE_INLINE bool isUpdatable() const {
@@ -55,18 +56,17 @@ class MmappedBuffer {
AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize,
void *const mmappedBuffer, const int alignedSize, const int mmapFd,
const bool isUpdatable)
- : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
+ : mByteArrayView(buffer, bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
// Empty file. We have to handle an empty file as a valid part of a dictionary.
AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
- : mBuffer(nullptr), mBufferSize(0), mMmappedBuffer(nullptr), mAlignedSize(0),
+ : mByteArrayView(), mMmappedBuffer(nullptr), mAlignedSize(0),
mMmapFd(0), mIsUpdatable(isUpdatable) {}
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
- uint8_t *const mBuffer;
- const int mBufferSize;
+ const ReadWriteByteArrayView mByteArrayView;
void *const mMmappedBuffer;
const int mAlignedSize;
const int mMmapFd;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
index c70047638..407b8efd0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
@@ -16,6 +16,8 @@
#include "suggest/policyimpl/dictionary/utils/trie_map.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+
namespace latinime {
const int TrieMap::INVALID_INDEX = -1;
@@ -41,6 +43,9 @@ TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
}
+TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
+ : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+
void TrieMap::dump(const int from, const int to) const {
AKLOGI("BufSize: %d", mBuffer.getTailPosition());
for (int i = from; i < to; ++i) {
@@ -98,6 +103,11 @@ bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryInde
return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
readEntry(bitmapEntryIndex), 0 /* level */);
}
+
+bool TrieMap::save(FILE *const file) const {
+ return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
+}
+
/**
* Iterate next entry in a certain level.
*
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
index b5bcc3bc8..3e5c4010c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -19,10 +19,12 @@
#include <climits>
#include <cstdint>
+#include <cstdio>
#include <vector>
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -159,12 +161,18 @@ class TrieMap {
static const uint64_t MAX_VALUE;
TrieMap();
+ // Construct TrieMap using existing data in the memory region written by save().
+ TrieMap(const ReadWriteByteArrayView buffer);
void dump(const int from = 0, const int to = 0) const;
bool isNearSizeLimit() const {
return mBuffer.isNearSizeLimit();
}
+ int getRootBitmapEntryIndex() const {
+ return ROOT_BITMAP_ENTRY_INDEX;
+ }
+
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
int getNextLevelBitmapEntryIndex(const int key) {
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
@@ -192,6 +200,8 @@ class TrieMap {
return TrieMapRange(this, bitmapEntryIndex);
}
+ bool save(FILE *const file) const;
+
private:
DISALLOW_COPY_AND_ASSIGN(TrieMap);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 66ea62406..04cb6603a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -69,10 +69,6 @@ class TypingScoring : public Scoring {
return 0.0f;
}
- AK_FORCE_INLINE bool doesAutoCorrectValidWord() const {
- return false;
- }
-
AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
return true;
}
diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h
new file mode 100644
index 000000000..2c97c6d58
--- /dev/null
+++ b/native/jni/src/utils/byte_array_view.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BYTE_ARRAY_VIEW_H
+#define LATINIME_BYTE_ARRAY_VIEW_H
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "defines.h"
+
+namespace latinime {
+
+/**
+ * Helper class used to keep track of read accesses for a given memory region.
+ */
+class ReadOnlyByteArrayView {
+ public:
+ ReadOnlyByteArrayView() : mPtr(nullptr), mSize(0) {}
+
+ ReadOnlyByteArrayView(const uint8_t *const ptr, const size_t size)
+ : mPtr(ptr), mSize(size) {}
+
+ AK_FORCE_INLINE size_t size() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE const uint8_t *data() const {
+ return mPtr;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView);
+
+ const uint8_t *const mPtr;
+ const size_t mSize;
+};
+
+/**
+ * Helper class used to keep track of read-write accesses for a given memory region.
+ */
+class ReadWriteByteArrayView {
+ public:
+ ReadWriteByteArrayView() : mPtr(nullptr), mSize(0) {}
+
+ ReadWriteByteArrayView(uint8_t *const ptr, const size_t size)
+ : mPtr(ptr), mSize(size) {}
+
+ AK_FORCE_INLINE size_t size() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE uint8_t *data() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE ReadOnlyByteArrayView getReadOnlyView() const {
+ return ReadOnlyByteArrayView(mPtr, mSize);
+ }
+
+ ReadWriteByteArrayView subView(const size_t start, const size_t n) const {
+ ASSERT(start + n <= mSize);
+ return ReadWriteByteArrayView(mPtr + start, n);
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
+
+ uint8_t *const mPtr;
+ const size_t mSize;
+};
+
+} // namespace latinime
+#endif // LATINIME_BYTE_ARRAY_VIEW_H
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
new file mode 100644
index 000000000..c1ddc9812
--- /dev/null
+++ b/native/jni/src/utils/int_array_view.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_INT_ARRAY_VIEW_H
+#define LATINIME_INT_ARRAY_VIEW_H
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+#include "defines.h"
+
+namespace latinime {
+
+/**
+ * Helper class used to provide a read-only view of a given range of integer array. This class
+ * does not take ownership of the underlying integer array but is designed to be a lightweight
+ * object that obeys value semantics.
+ *
+ * Example:
+ * <code>
+ * bool constinsX(IntArrayView view) {
+ * for (size_t i = 0; i < view.size(); ++i) {
+ * if (view[i] == 'X') {
+ * return true;
+ * }
+ * }
+ * return false;
+ * }
+ *
+ * const int codePointArray[] = { 'A', 'B', 'X', 'Z' };
+ * auto view = IntArrayView(codePointArray, NELEMS(codePointArray));
+ * const bool hasX = constinsX(view);
+ * </code>
+ */
+class IntArrayView {
+ public:
+ IntArrayView() : mPtr(nullptr), mSize(0) {}
+
+ IntArrayView(const int *const ptr, const size_t size)
+ : mPtr(ptr), mSize(size) {}
+
+ explicit IntArrayView(const std::vector<int> &vector)
+ : mPtr(vector.data()), mSize(vector.size()) {}
+
+ template <int N>
+ AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
+ return IntArrayView(array, N);
+ }
+
+ // Returns a view that points one int object. Does not take ownership of the given object.
+ AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
+ return IntArrayView(object, 1);
+ }
+
+ AK_FORCE_INLINE int operator[](const size_t index) const {
+ ASSERT(index < mSize);
+ return mPtr[index];
+ }
+
+ AK_FORCE_INLINE bool empty() const {
+ return size() == 0;
+ }
+
+ AK_FORCE_INLINE size_t size() const {
+ return mSize;
+ }
+
+ AK_FORCE_INLINE const int *data() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE const int *begin() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE const int *end() const {
+ return mPtr + mSize;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
+
+ const int *const mPtr;
+ const size_t mSize;
+};
+
+using WordIdArrayView = IntArrayView;
+using PtNodePosArrayView = IntArrayView;
+
+} // namespace latinime
+#endif // LATINIME_MEMORY_VIEW_H
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
new file mode 100644
index 000000000..6eef2040b
--- /dev/null
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+
+#include <gtest/gtest.h>
+
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace {
+
+TEST(LanguageModelDictContentTest, TestUnigramProbability) {
+ LanguageModelDictContent LanguageModelDictContent(false /* useHistoricalInfo */);
+
+ const int flag = 0xFF;
+ const int probability = 10;
+ const int wordId = 100;
+ const ProbabilityEntry probabilityEntry(flag, probability);
+ LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
+ const ProbabilityEntry entry =
+ LanguageModelDictContent.getProbabilityEntry(wordId);
+ EXPECT_EQ(flag, entry.getFlags());
+ EXPECT_EQ(probability, entry.getProbability());
+}
+
+TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
+ LanguageModelDictContent LanguageModelDictContent(true /* useHistoricalInfo */);
+
+ const int flag = 0xF0;
+ const int timestamp = 0x3FFFFFFF;
+ const int level = 3;
+ const int count = 10;
+ const int wordId = 100;
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ const ProbabilityEntry probabilityEntry(flag, NOT_A_PROBABILITY, &historicalInfo);
+ LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
+ const ProbabilityEntry entry = LanguageModelDictContent.getProbabilityEntry(wordId);
+ EXPECT_EQ(flag, entry.getFlags());
+ EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp());
+ EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
+ EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());
+}
+
+} // namespace
+} // namespace latinime
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
new file mode 100644
index 000000000..db94550ef
--- /dev/null
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+
+#include <gtest/gtest.h>
+
+#include "defines.h"
+
+namespace latinime {
+namespace {
+
+TEST(ProbabilityEntryTest, TestEncodeDecode) {
+ const int flag = 0xFF;
+ const int probability = 10;
+
+ const ProbabilityEntry entry(flag, probability);
+ const uint64_t encodedEntry = entry.encode(false /* hasHistoricalInfo */);
+ const ProbabilityEntry decodedEntry =
+ ProbabilityEntry::decode(encodedEntry, false /* hasHistoricalInfo */);
+ EXPECT_EQ(0xFF0Aull, encodedEntry);
+ EXPECT_EQ(flag, decodedEntry.getFlags());
+ EXPECT_EQ(probability, decodedEntry.getProbability());
+}
+
+TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) {
+ const int flag = 0xF0;
+ const int timestamp = 0x3FFFFFFF;
+ const int level = 3;
+ const int count = 10;
+
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ const ProbabilityEntry entry(flag, NOT_A_PROBABILITY, &historicalInfo);
+
+ const uint64_t encodedEntry = entry.encode(true /* hasHistoricalInfo */);
+ EXPECT_EQ(0xF03FFFFFFF030Aull, encodedEntry);
+ const ProbabilityEntry decodedEntry =
+ ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
+
+ EXPECT_EQ(flag, decodedEntry.getFlags());
+ EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimeStamp());
+ EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
+ EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
+}
+
+} // namespace
+} // namespace latinime
diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp
new file mode 100644
index 000000000..bd843ab02
--- /dev/null
+++ b/native/jni/tests/utils/int_array_view_test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/int_array_view.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace latinime {
+namespace {
+
+TEST(IntArrayViewTest, TestAccess) {
+ const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
+ IntArrayView intArrayView(intVector);
+ EXPECT_EQ(intVector.size(), intArrayView.size());
+ for (int i = 0; i < static_cast<int>(intVector.size()); ++i) {
+ EXPECT_EQ(intVector[i], intArrayView[i]);
+ }
+}
+
+TEST(IntArrayViewTest, TestIteration) {
+ const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
+ IntArrayView intArrayView(intVector);
+ size_t expectedIndex = 0;
+ for (const int element : intArrayView) {
+ EXPECT_EQ(intVector[expectedIndex], element);
+ ++expectedIndex;
+ }
+ EXPECT_EQ(expectedIndex, intArrayView.size());
+}
+
+TEST(IntArrayViewTest, TestConstructFromArray) {
+ const size_t ARRAY_SIZE = 100;
+ int intArray[ARRAY_SIZE];
+ const auto intArrayView = IntArrayView::fromFixedSizeArray(intArray);
+ EXPECT_EQ(ARRAY_SIZE, intArrayView.size());
+}
+
+TEST(IntArrayViewTest, TestConstructFromObject) {
+ const int object = 10;
+ const auto intArrayView = IntArrayView::fromObject(&object);
+ EXPECT_EQ(1, intArrayView.size());
+ EXPECT_EQ(object, intArrayView[0]);
+}
+
+} // namespace
+} // namespace latinime