aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h20
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp42
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h16
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h41
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp40
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h26
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp34
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h69
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp16
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.h11
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.cpp6
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h7
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp5
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h3
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp8
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h27
-rw-r--r--native/jni/src/suggest/core/suggest.cpp22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h65
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp124
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp558
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h138
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h71
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp78
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h235
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h59
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp)29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h)21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp)62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h)39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp107
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp236
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h82
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp)191
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h)34
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp)150
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h)177
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp279
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h93
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp139
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h85
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h63
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h83
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h57
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h92
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp48
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h56
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h111
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp38
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h40
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp41
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h52
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
-rw-r--r--native/jni/src/utils/exclusive_ownership_pointer.h91
93 files changed, 3912 insertions, 1722 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 742e388e4..fbcd612b7 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -392,24 +392,4 @@ typedef enum {
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
-
-// ErrorType is mainly decided by CorrectionType but it is also depending on if
-// the correction has really been performed or not.
-typedef enum {
- // Substitution, omission and transposition
- ET_EDIT_CORRECTION,
- // Proximity error
- ET_PROXIMITY_CORRECTION,
- // Completion
- ET_COMPLETION,
- // New word
- // TODO: Remove.
- // A new word error should be an edit correction error or a proximity correction error.
- ET_NEW_WORD,
- // Treat error as an intentional omission when the CorrectionType is omission and the node can
- // be intentional omission.
- ET_INTENTIONAL_OMISSION,
- // Not treated as an error. Tracked for checking exact match
- ET_NOT_AN_ERROR
-} ErrorType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 49cfdecac..0b2b4a9e8 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -99,7 +99,7 @@ class DicNode {
virtual ~DicNode() {}
// Init for copy
- void initByCopy(const DicNode *dicNode) {
+ void initByCopy(const DicNode *const dicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
@@ -107,25 +107,25 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // Init for root with prevWordNodePos which is used for bigram
- void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
+ // Init for root with prevWordPtNodePos which is used for bigram
+ void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
- mDicNodeState.init(prevWordNodePos);
+ mDicNodeState.init(prevWordPtNodePos);
PROF_NODE_RESET(mProfiler);
}
// Init for root with previous word
- void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
+ void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@@ -138,7 +138,7 @@ class DicNode {
mDicNodeState.mDicNodeStatePrevWord.init(
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
dicNode->mDicNodeProperties.getProbability(),
- dicNode->mDicNodeProperties.getPos(),
+ dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
@@ -148,26 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- void initAsPassingChild(DicNode *parentNode) {
+ void initAsPassingChild(DicNode *parentDicNode) {
mIsUsed = true;
- mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
- const int c = parentNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
- mDicNodeState.init(&parentNode->mDicNodeState);
- PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
+ mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
+ const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
+ mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
+ mDicNodeState.init(&parentDicNode->mDicNodeState);
+ PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
}
- void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void initAsChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
- isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
+ mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -234,7 +235,7 @@ class DicNode {
}
bool isFirstWord() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@@ -246,29 +247,30 @@ class DicNode {
}
// Used to get bigram probability in DicNodeUtils
- int getPos() const {
- return mDicNodeProperties.getPos();
+ int getPtNodePos() const {
+ return mDicNodeProperties.getPtNodePos();
}
// Used to get bigram probability in DicNodeUtils
- int getPrevWordPos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ int getPrevWordTerminalPtNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
// Used in DicNodeUtils
- int getChildrenPos() const {
- return mDicNodeProperties.getChildrenPos();
+ int getChildrenPtNodeArrayPos() const {
+ return mDicNodeProperties.getChildrenPtNodeArrayPos();
}
int getProbability() const {
return mDicNodeProperties.getProbability();
}
- AK_FORCE_INLINE bool isTerminalWordNode() const {
- const bool isTerminalNodes = mDicNodeProperties.isTerminal();
- const int currentNodeDepth = getNodeCodePointCount();
- const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
- return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
+ AK_FORCE_INLINE bool isTerminalDicNode() const {
+ const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
+ const int currentDicNodeDepth = getNodeCodePointCount();
+ const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
+ return isTerminalPtNode && currentDicNodeDepth > 0
+ && currentDicNodeDepth == terminalDicNodeDepth;
}
bool shouldBeFilteredBySafetyNetForBigram() const {
@@ -374,8 +376,8 @@ class DicNode {
}
// Used to commit input partially
- int getPrevWordNodePos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ int getPrevWordPtNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
@@ -410,7 +412,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const {
- const int codePoint = mDicNodeProperties.getNodeCodePoint();
+ const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@@ -423,8 +425,8 @@ class DicNode {
// Utils for cost calculation //
////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
- return mDicNodeProperties.getNodeCodePoint()
- == dicNode->mDicNodeProperties.getNodeCodePoint();
+ return mDicNodeProperties.getDicNodeCodePoint()
+ == dicNode->mDicNodeProperties.getDicNodeCodePoint();
}
// TODO: remove
@@ -574,7 +576,8 @@ class DicNode {
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
- const bool doNormalization, const int inputSize, const ErrorType errorType) {
+ const bool doNormalization, const int inputSize,
+ const ErrorTypeUtils::ErrorType errorType) {
if (DEBUG_GEO_FULL) {
LOGI_SHOW_ADD_COST_PROP;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index ec65114c7..5540b6df5 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -22,7 +22,6 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/char_utils.h"
namespace latinime {
@@ -32,19 +31,20 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
+ const int prevWordPtNodePos, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *const prevWordLastNode, DicNode *const newRootNode) {
- newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRootWithPreviousWord(
+ prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
}
-/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
- destNode->initByCopy(srcNode);
+/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
+ DicNode *const destDicNode) {
+ destDicNode->initByCopy(srcDicNode);
}
///////////////////////////////////
@@ -52,14 +52,14 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNodeVector *childDicNodes) {
+ DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode);
} else {
- dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
+ dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
}
}
@@ -71,11 +71,11 @@ namespace latinime {
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@@ -85,19 +85,19 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- const int unigramProbability = node->getProbability();
- const int wordPos = node->getPos();
- const int prevWordPos = node->getPrevWordPos();
- if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ const int unigramProbability = dicNode->getProbability();
+ const int ptNodePos = dicNode->getPtNodePos();
+ const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
+ if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
- wordPos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
+ prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
@@ -109,7 +109,7 @@ namespace latinime {
// TODO: Move to char_utils?
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, int *dest) {
+ const int *const src1, const int16_t length1, int *const dest) {
int actualLength0 = 0;
for (int i = 0; i < length0; ++i) {
if (src0[i] == 0) {
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 3fb351a61..3f1514a52 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -31,20 +31,20 @@ class MultiBigramMap;
class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
- const int16_t length1, int *dest);
+ const int16_t length1, int *const dest);
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *newRootNode);
+ const int prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *prevWordLastNode, DicNode *newRootNode);
- static void initByCopy(DicNode *srcNode, DicNode *destNode);
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
+ static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
static void getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *const multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@@ -53,7 +53,7 @@ class DicNodeUtils {
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 42addae8d..9364e7751 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -62,14 +62,14 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
- hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
index 9e0f62ceb..c41a7243a 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
@@ -24,15 +24,14 @@
namespace latinime {
/**
- * Node for traversing the lexicon trie.
+ * PtNode information related to the DicNode from the lexicon trie.
*/
-// TODO: Introduce a dictionary node class which has attribute members required to understand the
-// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
- mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
+ : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
+ mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
+ mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
@@ -40,57 +39,57 @@ class DicNodeProperties {
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) {
- mPos = pos;
- mChildrenPos = childrenPos;
- mNodeCodePoint = nodeCodePoint;
+ mPtNodePos = pos;
+ mChildrenPtNodeArrayPos = childrenPos;
+ mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
- mHasChildren = hasChildren;
+ mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
// Init for copy
- void init(const DicNodeProperties *const nodeProp) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = nodeProp->mNodeCodePoint;
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth;
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth;
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
}
// Init as passing child
- void init(const DicNodeProperties *const nodeProp, const int codePoint) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
}
- int getPos() const {
- return mPos;
+ int getPtNodePos() const {
+ return mPtNodePos;
}
- int getChildrenPos() const {
- return mChildrenPos;
+ int getChildrenPtNodeArrayPos() const {
+ return mChildrenPtNodeArrayPos;
}
int getProbability() const {
return mProbability;
}
- int getNodeCodePoint() const {
- return mNodeCodePoint;
+ int getDicNodeCodePoint() const {
+ return mDicNodeCodePoint;
}
uint16_t getDepth() const {
@@ -107,7 +106,7 @@ class DicNodeProperties {
}
bool hasChildren() const {
- return mHasChildren || mDepth != mLeavingDepth;
+ return mHasChildrenPtNodes || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
@@ -118,12 +117,12 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
- int mPos;
- int mChildrenPos;
+ int mPtNodePos;
+ int mChildrenPtNodeArrayPos;
int mProbability;
- int mNodeCodePoint;
+ int mDicNodeCodePoint;
bool mIsTerminal;
- bool mHasChildren;
+ bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index b8986203d..dba57056b 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
+ mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
}
@@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = NOT_A_DICT_POS;
+ mPrevWordPtNodePos = NOT_A_DICT_POS;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = prevWordNodePos;
+ mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
- mPrevWordNodePos = prevWord->mPrevWordNodePos;
+ mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
@@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
- mPrevWordNodePos = prevWordNodePos;
+ mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
@@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
return mPrevWordStart;
}
- int getPrevWordNodePos() const {
- return mPrevWordNodePos;
+ int getPrevWordPtNodePos() const {
+ return mPrevWordPtNodePos;
}
int getPrevWordCodePointAt(const int id) const {
@@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
- int mPrevWordNodePos;
+ int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 3c85d0e9d..74f9eee92 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -31,7 +32,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mExactMatch(true),
+ mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
@@ -47,7 +48,7 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
- mExactMatch = true;
+ mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -59,34 +60,21 @@ class DicNodeStateScoring {
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
- mExactMatch = scoring->mExactMatch;
+ mContainingErrorTypes = scoring->mContainingErrorTypes;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
- const int inputSize, const int totalInputIndex, const ErrorType errorType) {
+ const int inputSize, const int totalInputIndex,
+ const ErrorTypeUtils::ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
- switch (errorType) {
- case ET_EDIT_CORRECTION:
- ++mEditCorrectionCount;
- mExactMatch = false;
- break;
- case ET_PROXIMITY_CORRECTION:
- ++mProximityCorrectionCount;
- mExactMatch = false;
- break;
- case ET_COMPLETION:
- mExactMatch = false;
- break;
- case ET_NEW_WORD:
- mExactMatch = false;
- break;
- case ET_INTENTIONAL_OMISSION:
- mExactMatch = false;
- break;
- case ET_NOT_AN_ERROR:
- break;
+ mContainingErrorTypes = mContainingErrorTypes | errorType;
+ if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
+ ++mEditCorrectionCount;
+ }
+ if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
+ ++mProximityCorrectionCount;
}
}
@@ -182,7 +170,7 @@ class DicNodeStateScoring {
}
bool isExactMatch() const {
- return mExactMatch;
+ return ErrorTypeUtils::isExactMatch(mContainingErrorTypes);
}
private:
@@ -199,7 +187,8 @@ class DicNodeStateScoring {
float mSpatialDistance;
float mLanguageDistance;
float mRawLength;
- bool mExactMatch;
+ // All accumulated error types so far
+ ErrorTypeUtils::ErrorType mContainingErrorTypes;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 71f4ef6ea..c2a15a312 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
+ int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
@@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
- int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
+ int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 59ead1894..7b83f27df 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -21,9 +21,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
@@ -35,22 +33,15 @@ namespace latinime {
const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
-Dictionary::Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
+Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ &dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
- mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
+ mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
logDictionaryInfo(env);
}
-Dictionary::~Dictionary() {
- delete mBigramDictionary;
- delete mGestureSuggest;
- delete mTypingSuggest;
- delete mDictionaryStructureWithBufferPolicy;
-}
-
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
@@ -60,7 +51,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
if (suggestOptions->isGesture()) {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence);
if (DEBUG_DICT) {
@@ -70,7 +61,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
} else {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
outWords, frequencies, spaceIndices, outputTypes,
outputAutoCommitFirstWordConfidence);
@@ -84,11 +75,12 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
int *outputTypes) const {
if (length <= 0) return 0;
- return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
+ return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies,
+ outputTypes);
}
int Dictionary::getProbability(const int *word, int length) const {
- int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
+ int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;
@@ -98,39 +90,39 @@ int Dictionary::getProbability(const int *word, int length) const {
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
int length1) const {
- return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
+ return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
- mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
+ mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
- mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
+ mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1,
probability);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
+ mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1);
}
void Dictionary::flush(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flush(filePath);
+ mDictionaryStructureWithBufferPolicy.get()->flush(filePath);
}
void Dictionary::flushWithGC(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+ mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
- return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
+ return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC);
}
void Dictionary::getProperty(const char *const query, char *const outResult,
const int maxResultLength) {
- return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
+ return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, outResult, maxResultLength);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 0195d5bf0..e52a40f1d 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,14 +21,16 @@
#include "defines.h"
#include "jni.h"
+#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/suggest_interface.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
-class BigramDictionary;
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
-class SuggestInterface;
class SuggestOptions;
class Dictionary {
@@ -53,8 +55,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
+ Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ &dictionaryStructureWithBufferPoilcy);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
@@ -87,20 +89,22 @@ class Dictionary {
const int maxResultLength);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
- return mDictionaryStructureWithBufferPolicy;
+ return mDictionaryStructureWithBufferPolicy.get();
}
- virtual ~Dictionary();
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+ typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr;
+ typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr;
+
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
- DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
- const BigramDictionary *const mBigramDictionary;
- const SuggestInterface *const mGestureSuggest;
- const SuggestInterface *const mTypingSuggest;
+ const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ mDictionaryStructureWithBufferPolicy;
+ const BigramDictionaryPtr mBigramDictionary;
+ const SuggestInterfacePtr mGestureSuggest;
+ const SuggestInterfacePtr mTypingSuggest;
void logDictionaryInfo(JNIEnv *const env) const;
};
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
new file mode 100644
index 000000000..0635fef7e
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
+ NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
new file mode 100644
index 000000000..ab4a65e48
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ERROR_TYPE_UTILS_H
+#define LATINIME_ERROR_TYPE_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ErrorTypeUtils {
+ public:
+ // ErrorType is mainly decided by CorrectionType but it is also depending on if
+ // the correction has really been performed or not.
+ typedef uint32_t ErrorType;
+
+ static const ErrorType NOT_AN_ERROR;
+ static const ErrorType MATCH_WITH_CASE_ERROR;
+ static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_DIGRAPH;
+ // Treat error as an intentional omission when the CorrectionType is omission and the node can
+ // be intentional omission.
+ static const ErrorType INTENTIONAL_OMISSION;
+ // Substitution, omission and transposition
+ static const ErrorType EDIT_CORRECTION;
+ // Proximity error
+ static const ErrorType PROXIMITY_CORRECTION;
+ // Completion
+ static const ErrorType COMPLETION;
+ // New word
+ // TODO: Remove.
+ // A new word error should be an edit correction error or a proximity correction error.
+ static const ErrorType NEW_WORD;
+
+ // TODO: Differentiate errors.
+ static bool isExactMatch(const ErrorType containingErrors) {
+ return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
+ }
+
+ static bool isEditCorrectionError(const ErrorType errorType) {
+ return (errorType & EDIT_CORRECTION) != 0;
+ }
+
+ static bool isProximityCorrectionError(const ErrorType errorType) {
+ return (errorType & PROXIMITY_CORRECTION) != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
+
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+};
+} // namespace latinime
+#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index e64476d82..ee8e59ef9 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
&& sweetSpotCenterYs && sweetSpotRadii),
mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
/* proximityCharsLength */]),
- mCodeToKeyMap() {
+ mLowerCodePointToKeyMap() {
/* Let's check the input array length here to make sure */
const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
@@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
- return mKeyIndexToCodePointG[keyIndex];
+ return mKeyIndexToLowerCodePointG[keyIndex];
+}
+
+int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToOriginalCodePoint[keyIndex];
}
void ProximityInfo::initializeG() {
@@ -164,8 +171,9 @@ void ProximityInfo::initializeG() {
const float gapY = sweetSpotCenterY - mCenterYsG[i];
mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
}
- mCodeToKeyMap[lowerCode] = i;
- mKeyIndexToCodePointG[i] = lowerCode;
+ mLowerCodePointToKeyMap[lowerCode] = i;
+ mKeyIndexToOriginalCodePoint[i] = code;
+ mKeyIndexToLowerCodePointG[i] = lowerCode;
}
for (int i = 0; i < KEY_COUNT; i++) {
mKeyKeyDistancesG[i][i] = 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h
index f25949001..a91b9d674 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.h
+++ b/native/jni/src/suggest/core/layout/proximity_info.h
@@ -39,6 +39,7 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const bool isGeometric) const;
int getCodePointOf(const int keyIndex) const;
+ int getOriginalCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@@ -76,11 +77,11 @@ class ProximityInfo {
ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
- KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
+ KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
}
AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
- return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
}
AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
@@ -117,9 +118,9 @@ class ProximityInfo {
// Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
- hash_map_compat<int, int> mCodeToKeyMap;
-
- int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ hash_map_compat<int, int> mLowerCodePointToKeyMap;
+ int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index fbabd92f2..bb4b41714 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -30,6 +30,12 @@
namespace latinime {
+int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
+ const int primaryCodePoint = getPrimaryCodePointAt(index);
+ const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
+ return mProximityInfo->getOriginalCodePointOf(keyIndex);
+}
+
// TODO: Remove the dependency of "isGeometric"
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index c94060fa9..e941e43d8 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -65,6 +65,8 @@ class ProximityInfoState {
return getProximityCodePointsAt(index)[0];
}
+ int getPrimaryOriginalCodePointAt(const int index) const;
+
inline bool sameAsTyped(const int *word, int length) const {
if (length != mSampledInputSize) {
return false;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 41f82049f..e649844dc 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -18,6 +18,7 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@@ -33,18 +34,20 @@ class DictionaryShortcutsStructurePolicy;
*/
class DictionaryStructureWithBufferPolicy {
public:
+ typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePoilcyPtr;
+
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0;
- virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
+ virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
- virtual int getTerminalNodePositionOfWord(const int *const inWord,
+ virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index 0c4016893..c202b81fe 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -20,6 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
@@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
traverseSession, parentDicNode, dicNode, &inputStateG);
const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
traverseSession, parentDicNode, dicNode, multiBigramMap);
- const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession,
- parentDicNode, dicNode);
+ const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
+ traverseSession, parentDicNode, dicNode);
profile(correctionType, dicNode);
if (inputStateG.mNeedsToUpdateInputStateG) {
dicNode->updateInputIndexG(&inputStateG);
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index 2d49e98a6..bd6b3cf41 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -18,6 +18,7 @@
#define LATINIME_WEIGHTING_H
#include "defines.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -84,7 +85,7 @@ class Weighting {
virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
- virtual ErrorType getErrorType(const CorrectionType correctionType,
+ virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 50f2bbd8d..5070491f4 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
- mPrevWordPos = NOT_A_DICT_POS;
+ mPrevWordPtNodePos = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPos == NOT_A_DICT_POS) {
+ if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index e0b1c67d9..6e4dda44d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -59,7 +59,7 @@ class DicTraverseSession {
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
- : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
+ : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
@@ -86,11 +86,9 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
- int getPrevWordPos() const { return mPrevWordPos; }
+ int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
// TODO: REMOVE
- void setPrevWordPos(int pos) { mPrevWordPos = pos; }
- // TODO: Use proper parameter when changed
- int getDicRootPos() const { return 0; }
+ void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@@ -119,26 +117,13 @@ class DicTraverseSession {
return true;
}
- void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
- for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
- if (!mProximityInfoStates[i].isUsed()) {
- continue;
- }
- const int pointerId = node->getInputIndex(i);
- const std::vector<int> *const searchKeyVector =
- mProximityInfoStates[i].getSearchKeyVector(pointerId);
- outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
- searchKeyVector->end());
- }
- }
-
- ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
+ ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
- const int pointerId = node->getInputIndex(i);
+ const int pointerId = dicNode->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic
@@ -192,7 +177,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
- int mPrevWordPos;
+ int mPrevWordPtNodePos;
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 73ccebc88..2eda414f4 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Continue suggestion after partial commit.
DicNode *topDicNode =
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
- traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
+ traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
traverseSession->getDicTraverseCache()->continueSearch();
traverseSession->setPartiallyCommited();
}
@@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
- traverseSession->getPrevWordPos(), &rootNode);
+ traverseSession->getPrevWordPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}
@@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
@@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
break;
case UNRELATED_CHAR:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
default:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
}
}
- // Push the node for look-ahead correction
+ // Push the dicNode for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
}
@@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return;
}
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
@@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode(
/**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
- * (by the space omission error correction) search path if input dicNode is on a terminal node.
+ * (by the space omission error correction) search path if input dicNode is on a terminal.
*/
void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
@@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode);
}
-// Process the node codepoint as a digraph. This means that composite glyphs like the German
+// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any
- * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
+ * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions.
*/
@@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
}
/**
- * Weight child node by aligning it to the key
+ * Weight child dicNode by aligning it to the key
*/
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index 1926b9831..de9fc9bbc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index b1170e251..83a32fb0b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -17,8 +17,8 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -157,8 +157,9 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
}
const int bigramTargetNodePos =
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
- if (nodeReader.isDeleted() || !nodeReader.isTerminal()
+ const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(
+ bigramTargetNodePos));
+ if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal()
|| bigramTargetNodePos == NOT_A_DICT_POS) {
// The target is no longer valid terminal. Invalidate the current bigram entry.
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
@@ -342,20 +343,22 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
if (originalBigramPos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ int currentPos = NOT_A_DICT_POS;
int bigramLinkCount = 0;
- while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
- currentPos = nodeReader.getBigramLinkedNodePos();
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ int bigramLinkedNodePos = originalBigramPos;
+ do {
+ currentPos = bigramLinkedNodePos;
+ const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos));
+ bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
bigramLinkCount++;
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
ASSERT(false);
return NOT_A_DICT_POS;
}
- }
+ bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
+ } while (bigramLinkedNodePos != NOT_A_DICT_POS);
return currentPos;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index 0504b59d5..5de456656 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -22,7 +22,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
new file mode 100644
index 000000000..875a0ff9b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mBigramDictContent(bigramDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable) {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ int bigramFlags = 0;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId,
+ bigramEntryPos);
+ if (outProbability) {
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
+ }
+ if (outHasNext) {
+ *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
+ }
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos =
+ mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
+ }
+ }
+
+ void skipAllBigrams(int *const pos) const {
+ // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
+
+ const BigramDictContent *const mBigramDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
deleted file mode 100644
index ff80dd2f6..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
- const int size, const bool isUpdatable) {
- // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
- // impl classes of DictionaryStructureWithBufferPolicy.
- const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
- isUpdatable);
- if (!mmapedBuffer) {
- return 0;
- }
- switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
- mmapedBuffer->getBufferSize())) {
- case FormatUtils::VERSION_2:
- return new PatriciaTriePolicy(mmapedBuffer);
- case FormatUtils::VERSION_3:
- return new DynamicPatriciaTriePolicy(mmapedBuffer);
- default:
- AKLOGE("DICT: dictionary format is unknown, bad magic number");
- delete mmapedBuffer;
- ASSERT(false);
- return 0;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
deleted file mode 100644
index 2fa3111d3..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
- ptNodePos, mBuffer->getTailPosition());
- ASSERT(false);
- invalidatePtNodeInfo();
- return;
- }
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- int pos = ptNodePos;
- mHeadPos = ptNodePos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPosOffset =
- DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
- &pos);
- mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
- if (outCodePoints != 0) {
- mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
- } else {
- mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
- }
- if (isTerminal()) {
- mProbabilityFieldPos = pos;
- if (usesAdditionalBuffer) {
- mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
- }
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
- } else {
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- }
- mChildrenPosFieldPos = pos;
- if (usesAdditionalBuffer) {
- mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
- }
- mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, &pos);
- if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
- mChildrenPos += mBuffer->getOriginalBufferSize();
- }
- if (mSiblingPos == NOT_A_DICT_POS) {
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- mBigramLinkedNodePos = mChildrenPos;
- } else {
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- }
- }
- if (usesAdditionalBuffer) {
- pos += mBuffer->getOriginalBufferSize();
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
- mShortcutPos = pos;
- mShortcutsPolicy->skipAllShortcuts(&pos);
- } else {
- mShortcutPos = NOT_A_DICT_POS;
- }
- if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
- mBigramPos = pos;
- mBigramsPolicy->skipAllBigrams(&pos);
- } else {
- mBigramPos = NOT_A_DICT_POS;
- }
- // Update siblingPos if needed.
- if (mSiblingPos == NOT_A_DICT_POS) {
- // Sibling position is the tail position of current node.
- mSiblingPos = pos;
- }
- // Read destination node if the read node is a moved node.
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- // The destination position is stored at the same place as the parent position.
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
- outCodePoints);
- }
-}
-
-void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
- mHeadPos = NOT_A_DICT_POS;
- mFlags = 0;
- mParentPos = NOT_A_DICT_POS;
- mCodePointCount = 0;
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- mChildrenPosFieldPos = NOT_A_DICT_POS;
- mChildrenPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- mShortcutPos = NOT_A_DICT_POS;
- mBigramPos = NOT_A_DICT_POS;
- mSiblingPos = NOT_A_DICT_POS;
-}
-
-}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
deleted file mode 100644
index 3b36d425f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryBigramsStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
- * node and reads node attributes.
- */
-class DynamicPatriciaTrieNodeReader {
- public:
- DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
- mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
- mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
- mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_DICT_POS) {}
-
- ~DynamicPatriciaTrieNodeReader() {}
-
- // Reads PtNode information from dictionary buffer and updates members with the information.
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
- fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
- 0 /* maxCodePointCount */, 0 /* outCodePoints */);
- }
-
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- mSiblingPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
- }
-
- // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
- AK_FORCE_INLINE int getHeadPos() const {
- return mHeadPos;
- }
-
- // Flags
- AK_FORCE_INLINE bool isDeleted() const {
- return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
- }
-
- AK_FORCE_INLINE bool hasChildren() const {
- return mChildrenPos != NOT_A_DICT_POS;
- }
-
- AK_FORCE_INLINE bool isTerminal() const {
- return PatriciaTrieReadingUtils::isTerminal(mFlags);
- }
-
- AK_FORCE_INLINE bool isBlacklisted() const {
- return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
- }
-
- AK_FORCE_INLINE bool isNotAWord() const {
- return PatriciaTrieReadingUtils::isNotAWord(mFlags);
- }
-
- // Parent node position
- AK_FORCE_INLINE int getParentPos() const {
- return mParentPos;
- }
-
- // Number of code points
- AK_FORCE_INLINE uint8_t getCodePointCount() const {
- return mCodePointCount;
- }
-
- // Probability
- AK_FORCE_INLINE int getProbabilityFieldPos() const {
- return mProbabilityFieldPos;
- }
-
- AK_FORCE_INLINE int getProbability() const {
- return mProbability;
- }
-
- // Children PtNode array position
- AK_FORCE_INLINE int getChildrenPosFieldPos() const {
- return mChildrenPosFieldPos;
- }
-
- AK_FORCE_INLINE int getChildrenPos() const {
- return mChildrenPos;
- }
-
- // Bigram linked node position.
- AK_FORCE_INLINE int getBigramLinkedNodePos() const {
- return mBigramLinkedNodePos;
- }
-
- // Shortcutlist position
- AK_FORCE_INLINE int getShortcutPos() const {
- return mShortcutPos;
- }
-
- // Bigrams position
- AK_FORCE_INLINE int getBigramsPos() const {
- return mBigramPos;
- }
-
- // Sibling node position
- AK_FORCE_INLINE int getSiblingNodePos() const {
- return mSiblingPos;
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
-
- const BufferWithExtendableBuffer *const mBuffer;
- const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
- const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- int mHeadPos;
- DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
- int mParentPos;
- uint8_t mCodePointCount;
- int mProbabilityFieldPos;
- int mProbability;
- int mChildrenPosFieldPos;
- int mChildrenPos;
- int mBigramLinkedNodePos;
- int mShortcutPos;
- int mBigramPos;
- int mSiblingPos;
-
- void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
- const int maxCodePointCount, int *const outCodePoints);
-
- void invalidatePtNodeInfo();
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
deleted file mode 100644
index 052558bfc..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
-// TODO: Make MAX_DICTIONARY_SIZE 8MB.
-const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
-
-bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
- DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- bool *const outAddedNewUnigram) {
- int parentPos = NOT_A_DICT_POS;
- while (!readingHelper->isEnd()) {
- const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
- if (!readingHelper->isMatchedCodePoint(0 /* index */,
- wordCodePoints[matchedCodePointCount])) {
- // The first code point is different from target code point. Skip this node and read
- // the next sibling node.
- readingHelper->readNextSiblingNode();
- continue;
- }
- // Check following merged node code points.
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
- const int nodeCodePointCount = nodeReader->getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- const int nextIndex = matchedCodePointCount + j;
- if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
- wordCodePoints[matchedCodePointCount + j])) {
- *outAddedNewUnigram = true;
- return reallocatePtNodeAndAddNewPtNodes(nodeReader,
- readingHelper->getMergedNodeCodePoints(), j,
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
- probability),
- wordCodePoints + matchedCodePointCount,
- codePointCount - matchedCodePointCount);
- }
- }
- // All characters are matched.
- if (codePointCount == readingHelper->getTotalCodePointCount()) {
- return setPtNodeProbability(nodeReader, probability,
- readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
- }
- if (!nodeReader->hasChildren()) {
- *outAddedNewUnigram = true;
- return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- wordCodePoints + readingHelper->getTotalCodePointCount(),
- codePointCount - readingHelper->getTotalCodePointCount());
- }
- // Advance to the children nodes.
- parentPos = nodeReader->getHeadPos();
- readingHelper->readChildNode();
- }
- if (readingHelper->isError()) {
- // The dictionary is invalid.
- return false;
- }
- int pos = readingHelper->getPosOfLastForwardLinkField();
- *outAddedNewUnigram = true;
- return createAndInsertNodeIntoPtNodeArray(parentPos,
- wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
- codePointCount - readingHelper->getPrevTotalCodePointCount(),
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability, bool *const outAddedNewBigram) {
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
- mMergedNodeCodePoints);
- // Move node to add bigram entry.
- const int newNodePos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
- return false;
- }
- int writingPos = newNodePos;
- // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
- mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
- &writingPos)) {
- return false;
- }
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
- if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
- // Insert a new bigram entry into the existing bigram list.
- int bigramListPos = nodeReader.getBigramsPos();
- return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
- outAddedNewBigram);
- } else {
- // The PtNode doesn't have a bigram list.
- *outAddedNewBigram = true;
- // First, Write a bigram entry at the tail position of the PtNode.
- if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
- return false;
- }
- // Then, Mark as the PtNode having bigram list in the flags.
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
- nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
- nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
- nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
- writingPos = newNodePos;
- // Write updated flags into the moved PtNode's flags field.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos);
- }
-}
-
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
- if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
- return false;
- }
- return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
-}
-
-void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
- const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
- mBuffer->getUsedAdditionalBufferSize();
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
- false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
- return;
- }
- DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
-}
-
-void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
- const char *const fileName, const HeaderPolicy *const headerPolicy) {
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
- MAX_DICTIONARY_SIZE);
- int unigramCount = 0;
- int bigramCount = 0;
- if (mNeedsToDecay) {
- ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
- }
- if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
- return;
- }
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
- return;
- }
- DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
-}
-
-bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
- const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
- int pos = nodeToUpdate->getHeadPos();
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
- true /* isDeleted */);
- int writingPos = nodeToUpdate->getHeadPos();
- // Update flags.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
- const int bigramLinkedNodePos) {
- int pos = originalNode->getHeadPos();
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
- false /* isDeleted */);
- int writingPos = originalNode->getHeadPos();
- // Update flags.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos)) {
- return false;
- }
- // Update moved position, which is stored in the parent offset field.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
- return false;
- }
- // Update bigram linked node position, which is stored in the children position field.
- int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
- return false;
- }
- if (originalNode->hasChildren()) {
- // Update children's parent position.
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
- while (!readingHelper.isEnd()) {
- int parentOffsetFieldPos = nodeReader->getHeadPos()
- + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
- &parentOffsetFieldPos)) {
- // Parent offset cannot be written because of a bug or a broken dictionary; thus,
- // we give up to update dictionary.
- return false;
- }
- readingHelper.readNextSiblingNode();
- }
- }
- return true;
-}
-
-// Write new PtNode at writingPos.
-bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
- BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
- const bool isNotAWord, const int parentPos, const int *const codePoints,
- const int codePointCount, const int probability, const int childrenPos,
- const int originalBigramListPos, const int originalShortcutListPos,
- int *const writingPos) {
- const int nodePos = *writingPos;
- // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
- // PtNode writing.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
- 0 /* nodeFlags */, writingPos)) {
- return false;
- }
- // Calculate a parent offset and write the offset.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
- parentPos, nodePos, writingPos)) {
- return false;
- }
- // Write code points
- if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
- codePoints, codePointCount, writingPos)) {
- return false;
- }
- // Write probability when the probability is a valid probability, which means this node is
- // terminal.
- if (probability != NOT_A_PROBABILITY) {
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
- probability, writingPos)) {
- return false;
- }
- }
- // Write children position
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
- childrenPos, writingPos)) {
- return false;
- }
- // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
- if (originalShortcutListPos != NOT_A_DICT_POS) {
- int fromPos = originalShortcutListPos;
- if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
- writingPos)) {
- return false;
- }
- }
- // Copy bigram list when the originalBigramListPos is valid dictionary position.
- int bigramCount = 0;
- if (originalBigramListPos != NOT_A_DICT_POS) {
- int fromPos = originalBigramListPos;
- if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
- return false;
- }
- }
- // Create node flags and write them.
- PatriciaTrieReadingUtils::NodeFlags nodeFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
- probability != NOT_A_PROBABILITY /* isTerminal */,
- originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
- bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
- CHILDREN_POSITION_FIELD_SIZE);
- int flagsFieldPos = nodePos;
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
- &flagsFieldPos)) {
- return false;
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
- BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
- false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
- NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
- NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
- BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
- originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
- originalNode->getChildrenPos(), originalNode->getBigramsPos(),
- originalNode->getShortcutPos(), writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
- const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
- int *const forwardLinkFieldPos) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, forwardLinkFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
- probability);
-}
-
-bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
- const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
- const int *const codePoints, bool *const outAddedNewUnigram) {
- if (originalPtNode->isTerminal()) {
- // Overwrites the probability.
- *outAddedNewUnigram = false;
- const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
- probability);
- int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
- probabilityToWrite, &probabilityFieldPos)) {
- return false;
- }
- } else {
- // Make the node terminal and write the probability.
- *outAddedNewUnigram = true;
- int movedPos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
- return false;
- }
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
- originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- &movedPos)) {
- return false;
- }
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, &childrenPosFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
- codePointCount, probability);
-}
-
-bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
- const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
- const int probability) {
- int writingPos = mBuffer->getTailPosition();
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- 1 /* arraySize */, &writingPos)) {
- return false;
- }
- if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
- probability, &writingPos)) {
- return false;
- }
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- return true;
-}
-
-// Returns whether the dictionary updating was succeeded or not.
-bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
- const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
- const int newNodeCodePointCount) {
- // When addsExtraChild is true, split the reallocating PtNode and add new child.
- // Reallocating PtNode: abcde, newNode: abcxy.
- // abc (1st, not terminal) __ de (2nd)
- // \_ xy (extra child, terminal)
- // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
- // Reallocating PtNode: abcde, newNode: abc.
- // abc (1st, terminal) __ de (2nd)
- const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
- const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
- int writingPos = firstPartOfReallocatedPtNodePos;
- // Write the 1st part of the reallocating node. The children position will be updated later
- // with actual children position.
- const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
- if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
- reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
- &writingPos)) {
- return false;
- }
- const int actualChildrenPos = writingPos;
- // Create new children PtNode array.
- const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- newPtNodeCount, &writingPos)) {
- return false;
- }
- // Write the 2nd part of the reallocating node.
- const int secondPartOfReallocatedPtNodePos = writingPos;
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
- firstPartOfReallocatedPtNodePos,
- reallocatingPtNodeCodePoints + overlappingCodePointCount,
- reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
- reallocatingPtNode->getProbability(), &writingPos)) {
- return false;
- }
- if (addsExtraChild) {
- if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
- newNodeCodePoints + overlappingCodePointCount,
- newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
- &writingPos)) {
- return false;
- }
- }
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Update original reallocatingPtNode as moved.
- if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
- secondPartOfReallocatedPtNodePos)) {
- return false;
- }
- // Load node info. Information of the 1st part will be fetched.
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
- // Update children position.
- int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
- actualChildrenPos, &childrenPosFieldPos)) {
- return false;
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
- int *const outUnigramCount, int *const outBigramCount) {
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- headerPolicy, this, mBuffer, mNeedsToDecay);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
- return false;
- }
- if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
- // TODO: Remove more unigrams.
- }
-
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- traversePolicyToUpdateBigramProbability(mBigramPolicy);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateBigramProbability)) {
- return false;
- }
- if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
- > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
- // TODO: Remove more bigrams.
- }
-
- // Mapping from positions in mBuffer to positions in bufferToWrite.
- DictPositionRelocationMap dictPositionRelocationMap;
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
- &dictPositionRelocationMap);
- if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
- return false;
- }
-
- // Create policy instance for the GCed dictionary.
- DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
- mNeedsToDecay);
- // Create reading helper for the GCed dictionary.
- DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
- &newDictShortcutPolicy);
- newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
- &dictPositionRelocationMap);
- if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToUpdateAllPositionFields)) {
- return false;
- }
- *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
- *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
- return true;
-}
-
-int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
- const int newProbability) {
- if (mNeedsToDecay) {
- return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
- newProbability);
- } else {
- return newProbability;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
deleted file mode 100644
index ca8664729..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DynamicBigramListPolicy;
-class DynamicPatriciaTrieNodeReader;
-class DynamicPatriciaTrieReadingHelper;
-class DynamicShortcutListPolicy;
-class HeaderPolicy;
-
-class DynamicPatriciaTrieWritingHelper {
- public:
- typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
- typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
- struct DictPositionRelocationMap {
- public:
- DictPositionRelocationMap()
- : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
-
- PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
- PtNodePositionRelocationMap mPtNodePositionRelocationMap;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
- };
-
- static const size_t MAX_DICTIONARY_SIZE;
-
- DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
- DynamicBigramListPolicy *const bigramPolicy,
- DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
- mNeedsToDecay(needsToDecay) {}
-
- ~DynamicPatriciaTrieWritingHelper() {}
-
- // Add a word to the dictionary. If the word already exists, update the probability.
- bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- bool *const outAddedNewUnigram);
-
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
- bool *const outAddedNewBigram);
-
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
-
- void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
- const int unigramCount, const int bigramCount);
-
- void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
- const HeaderPolicy *const headerPolicy);
-
- // CAVEAT: This method must be called only from inner classes of
- // DynamicPatriciaTrieGcEventListeners.
- bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
-
- // CAVEAT: This method must be called only from this class or inner classes of
- // DynamicPatriciaTrieGcEventListeners.
- bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
-
- static const int CHILDREN_POSITION_FIELD_SIZE;
-
- BufferWithExtendableBuffer *const mBuffer;
- DynamicBigramListPolicy *const mBigramPolicy;
- DynamicShortcutListPolicy *const mShortcutPolicy;
- const bool mNeedsToDecay;
-
- bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
- const int movedPos, const int bigramLinkedNodePos);
-
- bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool isBlacklisted, const bool isNotAWord,
- const int parentPos, const int *const codePoints, const int codePointCount,
- const int probability, const int childrenPos, const int originalBigramListPos,
- const int originalShortcutListPos, int *const writingPos);
-
- bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const int parentPos, const int *const codePoints, const int codePointCount,
- const int probability, int *const writingPos);
-
- bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
-
- bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
- const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
-
- bool createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount);
-
- bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const int probability);
-
- bool reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
- const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
- const int newNodeCodePointCount);
-
- bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
- BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
- int *const outBigramCount);
-
- int getUpdatedProbability(const int originalProbability, const int newProbability);
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index a9c7805a8..7c06a7117 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -30,8 +30,8 @@ namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
// Reads information from existing dictionary buffer.
- HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
- : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
+ : mDictFormatVersion(formatVersion),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 5ded8f6a1..5ef8e50b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -118,6 +118,9 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
case FormatUtils::VERSION_3:
return buffer->writeUintAndAdvancePosition(3 /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
+ case FormatUtils::VERSION_4:
+ return buffer->writeUintAndAdvancePosition(4 /* data */,
+ HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
new file mode 100644
index 000000000..12d3579fd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ Ver4ShortcutListPolicy(const ShortcutDictContent *const shortcutDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mShortcutDictContent(shortcutDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable) {}
+
+ ~Ver4ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ // The first shortcut entry is located at the head position of the shortcut list.
+ return pos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ int shortcutFlags = 0;
+ if (outCodePoint && outCodePointCount) {
+ mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
+ outCodePoint, outCodePointCount, &shortcutFlags, pos);
+ }
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(shortcutFlags);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
+
+ const ShortcutDictContent *const mShortcutDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..3ab6a8e21
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <stdint.h>
+#include <string>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(const char *const path,
+ const int bufOffset, const int size, const bool isUpdatable) {
+ // Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of
+ // MmappedBufferWrapper if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable));
+ if (!mmappedBuffer.get()) {
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(),
+ mmappedBuffer.get()->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
+ new PatriciaTriePolicy(mmappedBuffer));
+ case FormatUtils::VERSION_3:
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
+ new DynamicPatriciaTriePolicy(mmappedBuffer));
+ case FormatUtils::VERSION_4: {
+ std::string dictDirPath(path);
+ const std::string::size_type pos =
+ dictDirPath.rfind(Ver4DictConstants::TRIE_FILE_EXTENSION);
+ if (pos == std::string::npos) {
+ // Dictionary file name is not valid as a version 4 dictionary.
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+ // Removing extension to get the base path.
+ dictDirPath.erase(pos);
+ const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer));
+ if (!dictBuffers.get()->isValid()) {
+ AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
+ new Ver4PatriciaTriePolicy(dictBuffers));
+ }
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number");
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
index 8cebc3b16..1359575f1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
@@ -21,13 +21,15 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
- static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
- const char *const path, const int bufOffset, const int size, const bool isUpdatable);
+ static DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
new file mode 100644
index 000000000..6a4cfee3c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_PARAMS_H
+#define LATINIME_PT_NODE_PARAMS_H
+
+#include <cstring>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+// This class has information of a PtNode. This class is immutable.
+class PtNodeParams {
+ public:
+ // Invalid PtNode.
+ PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mSiblingPos(NOT_A_DICT_POS) {}
+
+ PtNodeParams(const PtNodeParams& ptNodeParams)
+ : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
+ mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
+ mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
+ mTerminalId(ptNodeParams.mTerminalId),
+ mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
+ mProbability(ptNodeParams.mProbability),
+ mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
+ mChildrenPos(ptNodeParams.mChildrenPos),
+ mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
+ mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
+ mSiblingPos(ptNodeParams.mSiblingPos) {
+ memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode without terminal id.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
+ const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos,
+ const int bigramPos, const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
+ mBigramPos(bigramPos), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode with a terminal id.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int terminalIdFieldPos, const int terminalId, const int probability,
+ const int childrenPosFieldPos, const int childrenPos, const int bigramLinkedNodePos,
+ const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(terminalId),
+ mBigramPos(terminalId), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // Construct new params by updating existing PtNode params.
+ PtNodeParams(const PtNodeParams *const ptNodeParams,
+ const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
+ mTerminalId(ptNodeParams->getTerminalId()),
+ mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
+ mProbability(probability),
+ mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
+ mChildrenPos(ptNodeParams->getChildrenPos()),
+ mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
+ mShortcutPos(ptNodeParams->getShortcutPos()),
+ mBigramPos(ptNodeParams->getBigramsPos()),
+ mSiblingPos(ptNodeParams->getSiblingNodePos()) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mCodePointCount > 0;
+ }
+
+ // Head position of the PtNode
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ AK_FORCE_INLINE const int *getCodePoints() const {
+ return mCodePoints;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getTerminalIdFieldPos() const {
+ return mTerminalIdFieldPos;
+ }
+
+ AK_FORCE_INLINE int getTerminalId() const {
+ return mTerminalId;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ // This class have a public copy constructor to be used as a return value.
+
+ // Disallowing the assignment operator.
+ PtNodeParams &operator=(PtNodeParams &ptNodeParams);
+
+ const int mHeadPos;
+ const PatriciaTrieReadingUtils::NodeFlags mFlags;
+ const int mParentPos;
+ const uint8_t mCodePointCount;
+ int mCodePoints[MAX_WORD_LENGTH];
+ const int mTerminalIdFieldPos;
+ const int mTerminalId;
+ const int mProbabilityFieldPos;
+ const int mProbability;
+ const int mChildrenPosFieldPos;
+ const int mChildrenPos;
+ const int mBigramLinkedNodePos;
+ const int mShortcutPos;
+ const int mBigramPos;
+ const int mSiblingPos;
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_PARAMS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
new file mode 100644
index 000000000..c6b2a8bed
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_READER_H
+#define LATINIME_PT_NODE_READER_H
+
+#include "defines.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode information.
+class PtNodeReader {
+ public:
+ virtual ~PtNodeReader() {}
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0;
+
+ protected:
+ PtNodeReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
new file mode 100644
index 000000000..0f1e635bc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_WRITER_H
+#define LATINIME_PT_NODE_WRITER_H
+
+#include "defines.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+// Interface class used to write PtNode information.
+class PtNodeWriter {
+ public:
+ virtual ~PtNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) = 0;
+
+ virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int probability) = 0;
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition) = 0;
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos) = 0;
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const int probability,
+ bool *const outAddedNewBigram) = 0;
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam) = 0;
+
+ protected:
+ PtNodeWriter() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeWriter);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 8a84bd261..960c1b936 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -15,22 +15,22 @@
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
-void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- int nextPos = dicNode->getChildrenPos();
+ int nextPos = dicNode->getChildrenPtNodeArrayPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize);
@@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
-// it is possible to check for this with advantageous complexity. For each node, we search
+// it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
-// than the position we look for, and we have to descend the z node).
+// than the position we look for, and we have to descend the z PtNode).
/* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
@@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
- // only traverse nodes that are actually a part of the terminal we are searching, so each time
- // we enter this loop we are one depth level further than last time.
- // The only reason we count nodes is because we want to reduce the probability of infinite
+ // only traverse PtNodes that are actually a part of the terminal we are searching, so each
+ // time we enter this loop we are one depth level further than last time.
+ // The only reason we count PtNodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
@@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true;
} else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the
- // position we should descend THIS node. So we trick the lastCandidatePtNodePos
- // so that we will descend this PtNode, not the previous one.
+ // position we should descend THIS PtNode. So we trick the
+ // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
+ // one.
lastCandidatePtNodePos = startPos;
found = true;
} else {
@@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false;
}
} else {
- // Even if we don't have children here, we could still be on the last PtNode of /
+ // Even if we don't have children here, we could still be on the last PtNode of
// this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount);
@@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0;
}
-// This function gets the position of the terminal node of the exact matching word in the
+// This function gets the position of the terminal PtNode of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
-int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition();
int wordPos = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 0f8662aea..5d99632a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,28 +34,26 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ PatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
+ : mMmappedBuffer(mmappedBuffer),
+ mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
+ - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
- ~PatriciaTriePolicy() {
- delete mBuffer;
- }
-
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildNodes(const DicNode *const dicNode,
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(const int *const inWord,
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -124,7 +123,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- const MmappedBuffer *const mBuffer;
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const int mDictBufferSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
index 7df55815f..82b3593c8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
index 8420ee95a..8420ee95a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
index 5724c5d88..a252c6fb2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -14,25 +14,27 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
bool DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
- bool isUselessPtNode = !node->isTerminal();
- if (node->isTerminal() && mIsDecayingDict) {
+ bool isUselessPtNode = !ptNodeParams->isTerminal();
+ if (ptNodeParams->isTerminal() && mIsDecayingDict) {
const int newProbability =
- ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
+ ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(),
mHeaderPolicy);
- int writingPos = node->getProbabilityFieldPos();
+ int writingPos = ptNodeParams->getProbabilityFieldPos();
// Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
mBuffer, newProbability, &writingPos)) {
@@ -44,9 +46,9 @@ bool DynamicPatriciaTrieGcEventListeners
}
if (mChildrenValue > 0) {
isUselessPtNode = false;
- } else if (node->isTerminal()) {
+ } else if (ptNodeParams->isTerminal()) {
// Remove children as all children are useless.
- int writingPos = node->getChildrenPosFieldPos();
+ int writingPos = ptNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
return false;
@@ -54,12 +56,12 @@ bool DynamicPatriciaTrieGcEventListeners
}
if (isUselessPtNode) {
// Current PtNode is no longer needed. Mark it as deleted.
- if (!mWritingHelper->markNodeAsDeleted(node)) {
+ if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) {
return false;
}
} else {
mValueStack.back() += 1;
- if (node->isTerminal()) {
+ if (ptNodeParams->isTerminal()) {
mValidUnigramCount += 1;
}
}
@@ -67,10 +69,9 @@ bool DynamicPatriciaTrieGcEventListeners
}
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (!node->isDeleted()) {
- int pos = node->getBigramsPos();
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isDeleted()) {
+ int pos = ptNodeParams->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
@@ -117,31 +118,27 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (node->isDeleted()) {
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isDeleted()) {
// Current PtNode is not written in new buffer because it has been deleted.
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), NOT_A_DICT_POS));
+ ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
return true;
}
int writingPos = mBufferToWrite->getTailPosition();
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), writingPos));
+ ptNodeParams->getHeadPos(), writingPos));
mValidPtNodeCount++;
// Writes current PtNode.
- return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
- node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
- node->getProbability(), &writingPos);
+ return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
}
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
// Updates parent position.
- int parentPos = node->getParentPos();
+ int parentPos = ptNodeParams->getParentPos();
if (parentPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
@@ -149,15 +146,16 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
parentPos = it->second;
}
}
- int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ int writingPos = ptNodeParams->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
// Write updated parent offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
- parentPos, node->getHeadPos(), &writingPos)) {
+ parentPos, ptNodeParams->getHeadPos(), &writingPos)) {
return false;
}
// Updates children position.
- int childrenPos = node->getChildrenPos();
+ int childrenPos = ptNodeParams->getChildrenPos();
if (childrenPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
@@ -165,14 +163,14 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
childrenPos = it->second;
}
}
- writingPos = node->getChildrenPosFieldPos();
+ writingPos = ptNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
childrenPos, &writingPos)) {
return false;
}
// Updates bigram target PtNode positions in the bigram list.
- int bigramsPos = node->getBigramsPos();
+ int bigramsPos = ptNodeParams->getBigramsPos();
if (bigramsPos != NOT_A_DICT_POS) {
int bigramEntryCount;
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
@@ -181,7 +179,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
}
mBigramCount += bigramEntryCount;
}
- if (node->isTerminal()) {
+ if (ptNodeParams->isTerminal()) {
mUnigramCount++;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
index 9755120b0..a0b3b4d3a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
@@ -21,15 +21,16 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/hash_map_compat.h"
namespace latinime {
class DictionaryHeaderStructurePolicy;
+class PtNodeWriter;
+class PtNodeParams;
class DynamicPatriciaTrieGcEventListeners {
public:
@@ -41,9 +42,9 @@ class DynamicPatriciaTrieGcEventListeners {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
const DictionaryHeaderStructurePolicy *const headerPolicy,
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
- : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
+ PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer,
+ const bool isDecayingDict)
+ : mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer),
mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
@@ -66,8 +67,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidUnigramCount() const {
return mValidUnigramCount;
@@ -78,7 +78,7 @@ class DynamicPatriciaTrieGcEventListeners {
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBuffer;
const bool mIsDecayingDict;
std::vector<int> mValueStack;
@@ -101,8 +101,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;
@@ -119,11 +118,10 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const bufferToWrite,
+ PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite,
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
dictPositionRelocationMap)
- : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
+ : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
@@ -133,13 +131,12 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail();
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBufferToWrite;
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
mDictPositionRelocationMap;
@@ -151,13 +148,11 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateAllPositionFields(
- DynamicPatriciaTrieWritingHelper *const writingHelper,
DynamicBigramListPolicy *const bigramPolicy,
BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
dictPositionRelocationMap)
- : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
- mBufferToWrite(bufferToWrite),
+ : mBigramPolicy(bigramPolicy), mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
mBigramCount(0) {};
@@ -167,8 +162,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getUnigramCount() const {
return mUnigramCount;
@@ -181,7 +175,6 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
DynamicBigramListPolicy *const mBigramPolicy;
BufferWithExtendableBuffer *const mBufferToWrite;
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..3393ce662
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ const int parentPos =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int probability = NOT_A_PROBABILITY;
+ int probabilityFieldPos = NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ probabilityFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ probabilityFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ int newBigramLinkedNodePos = bigramLinkedNodePos;
+ if (siblingNodePos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ newBigramLinkedNodePos = childrenPos;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ int shortcutsPos = NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ shortcutsPos = pos;
+ mShortcutsPolicy->skipAllShortcuts(&pos);
+ }
+ int bigramsPos = NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ bigramsPos = pos;
+ mBigramsPolicy->skipAllBigrams(&pos);
+ }
+ int newSiblingNodePos = siblingNodePos;
+ if (siblingNodePos == NOT_A_DICT_POS) {
+ // Sibling position is the tail position of current node.
+ newSiblingNodePos = pos;
+ }
+ // Read destination node if the read node is a moved node.
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
+ newBigramLinkedNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ probabilityFieldPos, probability, childrenPosFieldPos, childrenPos,
+ newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos);
+ }
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
new file mode 100644
index 000000000..ef0067b48
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
+ * node and reads node attributes.
+ */
+class DynamicPatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
+ mShortcutsPolicy(shortcutsPolicy) {}
+
+ ~DynamicPatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos, const int bigramLinkedNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp
new file mode 100644
index 000000000..3b1e16cc7
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int DynamicPatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool DynamicPatriciaTrieNodeWriter::markPtNodeAsDeleted(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ // Update bigram linked node position, which is stored in the children position field.
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
+ // Update children's parent position.
+ mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
+ while (!mReadingHelper.isEnd()) {
+ const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ mReadingHelper.readNextSiblingNode(childPtNodeParams);
+ }
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ return false;
+ }
+ int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
+ return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
+ newProbability, &probabilityFieldPos);
+}
+
+bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ newChildrenPosition, &childrenPosFieldPos);
+}
+
+bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
+ const int nodePos = *ptNodeWritingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
+ 0 /* nodeFlags */, ptNodeWritingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
+ ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
+ ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
+ return false;
+ }
+ // Write probability when the probability is a valid probability, which means this node is
+ // terminal.
+ if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
+ ptNodeParams->getProbability(), ptNodeWritingPos)) {
+ return false;
+ }
+ }
+ // Write children position
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
+ return false;
+ }
+ // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
+ if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
+ int fromPos = ptNodeParams->getShortcutPos();
+ if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
+ ptNodeWritingPos)) {
+ return false;
+ }
+ }
+ // Copy bigram list when the originalBigramListPos is valid dictionary position.
+ int bigramCount = 0;
+ if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
+ int fromPos = ptNodeParams->getBigramsPos();
+ if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
+ return false;
+ }
+ }
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
+ ptNodeParams->isNotAWord(),
+ ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */,
+ ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
+ bigramCount > 0 /* hasBigrams */,
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ int flagsFieldPos = nodePos;
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
+ &flagsFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const int probability,
+ bool *const outAddedNewBigram) {
+ const int newNodePos = mBuffer->getTailPosition();
+ int writingPos = newNodePos;
+ // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
+ if (!writePtNodeAndAdvancePosition(sourcePtNodeParams, &writingPos)) {
+ return false;
+ }
+ if (!markPtNodeAsMoved(sourcePtNodeParams, newNodePos, newNodePos)) {
+ return false;
+ }
+ const PtNodeParams newPtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(newNodePos));
+ if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
+ // Insert a new bigram entry into the existing bigram list.
+ int bigramListPos = newPtNodeParams.getBigramsPos();
+ return mBigramPolicy->addNewBigramEntryToBigramList(targetPtNodeParam->getHeadPos(),
+ probability, &bigramListPos, outAddedNewBigram);
+ } else {
+ // The PtNode doesn't have a bigram list.
+ *outAddedNewBigram = true;
+ // First, Write a bigram entry at the tail position of the PtNode.
+ if (!mBigramPolicy->writeNewBigramEntry(targetPtNodeParam->getHeadPos(), probability,
+ &writingPos)) {
+ return false;
+ }
+ // Then, Mark as the PtNode having bigram list in the flags.
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
+ newPtNodeParams.isNotAWord(),
+ newPtNodeParams.getProbability() != NOT_A_PROBABILITY,
+ newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
+ newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
+ writingPos = newNodePos;
+ // Write updated flags into the moved PtNode's flags field.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+ }
+}
+
+bool DynamicPatriciaTrieNodeWriter::removeBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
+ if (sourcePtNodeParams->getBigramsPos() == NOT_A_DICT_POS) {
+ return false;
+ }
+ return mBigramPolicy->removeBigram(sourcePtNodeParams->getBigramsPos(),
+ targetPtNodeParam->getHeadPos());
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h
new file mode 100644
index 000000000..87ed1b159
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicBigramListPolicy;
+class DynamicShortcutListPolicy;
+
+/*
+ * This class is used for helping to writes nodes of dynamic patricia trie.
+ */
+class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
+ public:
+ DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
+ const DynamicPatriciaTrieNodeReader *const ptNodeReader,
+ DynamicBigramListPolicy *const bigramPolicy,
+ DynamicShortcutListPolicy *const shortcutPolicy)
+ : mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+
+ virtual ~DynamicPatriciaTrieNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newProbability);
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition);
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos);
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const int probability,
+ bool *const outAddedNewBigram);
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const DynamicPatriciaTrieNodeReader *const mPtNodeReader;
+ DynamicPatriciaTrieReadingHelper mReadingHelper;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ DynamicShortcutListPolicy *const mShortcutPolicy;
+
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
index 495b146c2..6b58f5fb7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
#include <cstdio>
#include <cstring>
@@ -23,11 +23,12 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -45,118 +46,49 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) {
- bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted();
+ const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
- isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY)
+ isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
!= NOT_A_PROBABILITY;
}
- childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
- nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal,
- nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
- nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
- readingHelper.readNextSiblingNode();
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(),
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ readingHelper.readNextSiblingNode(ptNodeParams);
}
}
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- // This method traverses parent nodes from the terminal by following parent pointers; thus,
- // node code points are stored in the buffer in the reverse order.
- int reverseCodePoints[maxCodePointCount];
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- // First, read the terminal node and get its probability.
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodePos(ptNodePos);
- if (!readingHelper.isValidTerminalNode()) {
- // Node at the ptNodePos is not a valid terminal node.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store terminal node probability.
- *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
- // Then, following parent node link to the dictionary root and fetch node code points.
- while (!readingHelper.isEnd()) {
- if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
- // The ptNodePos is not a valid terminal node position in the dictionary.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store node code points to buffer in the reverse order.
- readingHelper.fetchMergedNodeCodePointsInReverseOrder(
- readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
- // Follow parent node toward the root node.
- readingHelper.readParentNode();
- }
- if (readingHelper.isError()) {
- // The node position or the dictionary is invalid.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Reverse the stored code points to output them.
- const int codePointCount = readingHelper.getTotalCodePointCount();
- for (int i = 0; i < codePointCount; ++i) {
- outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
- }
- return codePointCount;
+ return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount,
+ outCodePoints, outUnigramProbability);
}
-int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- int searchCodePoints[length];
- for (int i = 0; i < length; ++i) {
- searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- while (!readingHelper.isEnd()) {
- const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
- if (readingHelper.getTotalCodePointCount() > length
- || !readingHelper.isMatchedCodePoint(0 /* index */,
- searchCodePoints[matchedCodePointCount])) {
- // Current node has too many code points or its first code point is different from
- // target code point. Skip this node and read the next sibling node.
- readingHelper.readNextSiblingNode();
- continue;
- }
- // Check following merged node code points.
- const int nodeCodePointCount = nodeReader->getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- if (!readingHelper.isMatchedCodePoint(
- j, searchCodePoints[matchedCodePointCount + j])) {
- // Different code point is found. The given word is not included in the dictionary.
- return NOT_A_DICT_POS;
- }
- }
- // All characters are matched.
- if (length == readingHelper.getTotalCodePointCount()) {
- // Terminal position is found.
- return nodeReader->getHeadPos();
- }
- if (!nodeReader->hasChildren()) {
- return NOT_A_DICT_POS;
- }
- // Advance to the children nodes.
- readingHelper.readChildNode();
- }
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- return NOT_A_DICT_POS;
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
@@ -179,44 +111,38 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
- return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
- return nodeReader.getShortcutPos();
+ return ptNodeParams.getShortcutPos();
}
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
- return nodeReader.getBigramsPos();
+ return ptNodeParams.getBigramsPos();
}
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
@@ -225,13 +151,10 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
bool addedNewUnigram = false;
- if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
&addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
@@ -244,7 +167,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
@@ -253,20 +176,18 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
bool addedNewBigram = false;
- if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
+ if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -278,7 +199,7 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
return false;
}
@@ -287,19 +208,17 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
mBigramCount--;
return true;
} else {
@@ -308,17 +227,17 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
}
void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return;
}
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
+ &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
}
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
@@ -327,14 +246,14 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
&mShortcutListPolicy, needsToDecay);
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
+ &mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
mNeedsToDecayForTesting = false;
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
index be97ee1a5..454698430 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
@@ -22,7 +22,11 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -32,32 +36,37 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
- mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ DynamicPatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
+ : mMmappedBuffer(mmappedBuffer),
+ mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_3),
+ mBufferWithExtendableBuffer(mMmappedBuffer.get()->getBuffer()
+ + mHeaderPolicy.getSize(), mMmappedBuffer.get()->getBufferSize()
+ - mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer
+ ::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
+ mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
+ mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
+ &mShortcutListPolicy),
+ mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter,
+ mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
- ~DynamicPatriciaTriePolicy() {
- delete mBuffer;
- }
-
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildNodes(const DicNode *const dicNode,
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(const int *const inWord,
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -108,11 +117,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
- const MmappedBuffer *const mBuffer;
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
DynamicShortcutListPolicy mShortcutListPolicy;
DynamicBigramListPolicy mBigramListPolicy;
+ DynamicPatriciaTrieNodeReader mNodeReader;
+ DynamicPatriciaTrieNodeWriter mNodeWriter;
+ DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
int mUnigramCount;
int mBigramCount;
int mNeedsToDecayForTesting;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
index f108c219f..c3fe03d37 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
@@ -14,15 +14,18 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "utils/char_utils.h"
namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner.
@@ -37,22 +40,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
return false;
}
while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
if (!alreadyVisitedChildren) {
- if (mNodeReader.hasChildren()) {
+ if (ptNodeParams.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode();
+ readChildNode(ptNodeParams);
} else {
alreadyVisitedChildren = true;
}
} else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false;
}
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent.
@@ -101,10 +108,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
}
pushReadingStateToStack();
while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
if (alreadyVisitedAllPtNodesInArray) {
if (alreadyVisitedChildren) {
// Move to next sibling PtNode's children.
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
// Return to the parent PTNode.
if (!listener->onAscend()) {
@@ -120,13 +131,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
alreadyVisitedChildren = false;
}
} else {
- if (mNodeReader.hasChildren()) {
+ if (ptNodeParams.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode();
+ readChildNode(ptNodeParams);
// Push state to return the head of PtNode array.
pushReadingStateToStack();
alreadyVisitedAllPtNodesInArray = false;
@@ -136,10 +147,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
}
}
} else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false;
}
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
if (!listener->onReadingPtNodeArrayTail()) {
return false;
@@ -158,6 +169,89 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
return !isError();
}
+int DynamicPatriciaTrieReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ const PtNodeParams terminalPtNodeParams(getPtNodeParams());
+ // First, read the terminal node and get its probability.
+ if (!isValidTerminalNode(terminalPtNodeParams)) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = terminalPtNodeParams.getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ int totalCodePointCount = 0;
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ totalCodePointCount = getTotalCodePointCount(ptNodeParams);
+ if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
+ reverseCodePoints);
+ // Follow parent node toward the root node.
+ readParentNode(ptNodeParams);
+ }
+ if (isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ for (int i = 0; i < totalCodePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
+ }
+ return totalCodePointCount;
+}
+
+int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ const int matchedCodePointCount = getPrevTotalCodePointCount();
+ if (getTotalCodePointCount(ptNodeParams) > length
+ || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == getTotalCodePointCount(ptNodeParams)) {
+ // Terminal position is found.
+ return ptNodeParams.getHeadPos();
+ }
+ if (!ptNodeParams.hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readChildNode(ptNodeParams);
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
@@ -170,35 +264,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
+ mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
- mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
- dictBuf, &mReadingState.mPos);
+ mReadingState.mRemainingPtNodeCountInThisArray =
+ PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
+ &mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
// Count up nodes and node arrays to avoid infinite loop.
- mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
- mReadingState.mNodeArrayCount++;
- if (mReadingState.mNodeCount < 0
- || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
- || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ mReadingState.mTotalPtNodeIndexInThisArrayChain +=
+ mReadingState.mRemainingPtNodeCountInThisArray;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain++;
+ if (mReadingState.mRemainingPtNodeCountInThisArray < 0
+ || mReadingState.mTotalPtNodeIndexInThisArrayChain
+ > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mPtNodeArrayIndexInThisArrayChain
+ > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
- mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
- MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
- MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ mReadingState.mRemainingPtNodeCountInThisArray,
+ mReadingState.mTotalPtNodeIndexInThisArrayChain,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
+ mReadingState.mPtNodeArrayIndexInThisArrayChain,
+ MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false);
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- if (mReadingState.mNodeCount == 0) {
+ if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
// Empty node array. Try following forward link.
followForwardLink();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
index a71c06971..197027313 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
@@ -21,9 +21,8 @@
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
namespace latinime {
@@ -35,6 +34,7 @@ class DictionaryShortcutsStructurePolicy;
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
*/
+// TODO: Move to pt_common.
class DynamicPatriciaTrieReadingHelper {
public:
class TraversingEventListener {
@@ -51,8 +51,7 @@ class DynamicPatriciaTrieReadingHelper {
virtual bool onReadingPtNodeArrayTail() = 0;
// Returns whether the event handling was succeeded or not.
- virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) = 0;
+ virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
protected:
TraversingEventListener() {};
@@ -62,10 +61,9 @@ class DynamicPatriciaTrieReadingHelper {
};
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ const PtNodeReader *const ptNodeReader)
: mIsError(false), mReadingState(), mBuffer(buffer),
- mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
+ mPtNodeReader(ptNodeReader), mReadingStateStack() {}
~DynamicPatriciaTrieReadingHelper() {}
@@ -84,15 +82,12 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodeArrayPos;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear();
nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
}
}
@@ -103,94 +98,88 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodePos;
- mReadingState.mNodeCount = 1;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
- fetchPtNodeInfo();
}
}
- AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
- return &mNodeReader;
+ AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
+ if (isEnd()) {
+ return PtNodeParams();
+ }
+ return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos);
}
- AK_FORCE_INLINE bool isValidTerminalNode() const {
- return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
+ AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
+ return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
}
- AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
- return mMergedNodeCodePoints[index] == codePoint;
+ AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
+ const int codePoint) const {
+ return ptNodeParams.getCodePoints()[index] == codePoint;
}
// Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount;
+ return mReadingState.mTotalCodePointCountSinceInitialization;
}
// Return code point count include the last read node's code points.
- AK_FORCE_INLINE int getTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
+ AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
+ return mReadingState.mTotalCodePointCountSinceInitialization
+ + ptNodeParams.getCodePointCount();
}
- AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
const int index, int *const outCodePoints) const {
- const int nodeCodePointCount = mNodeReader.getCodePointCount();
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ const int *const nodeCodePoints = ptNodeParams.getCodePoints();
for (int i = 0; i < nodeCodePointCount; ++i) {
- outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
+ outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
}
}
- AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
- return mMergedNodeCodePoints;
- }
-
- AK_FORCE_INLINE void readNextSiblingNode() {
- mReadingState.mNodeCount -= 1;
- mReadingState.mPos = mNodeReader.getSiblingNodePos();
- if (mReadingState.mNodeCount <= 0) {
+ AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
+ mReadingState.mRemainingPtNodeCountInThisArray -= 1;
+ mReadingState.mPos = ptNodeParams.getSiblingNodePos();
+ if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
// All nodes in the current node array have been read.
followForwardLink();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- } else {
- fetchPtNodeInfo();
}
}
// Read the first child node of the current node.
- AK_FORCE_INLINE void readChildNode() {
- if (mNodeReader.hasChildren()) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
- mReadingState.mPos = mNodeReader.getChildrenPos();
+ AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.hasChildren()) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPos = ptNodeParams.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
}
// Read the parent node of the current node.
- AK_FORCE_INLINE void readParentNode() {
- if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
- mReadingState.mNodeCount = 1;
- mReadingState.mPos = mNodeReader.getParentPos();
+ AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mPos = ptNodeParams.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
- fetchPtNodeInfo();
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
@@ -201,13 +190,7 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
- return mReadingState.mPosOfLastPtNodeArrayHead;
- }
-
- AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
+ return mReadingState.mPosOfThisPtNodeArrayHead;
}
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
@@ -215,53 +198,54 @@ class DynamicPatriciaTrieReadingHelper {
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener);
+ int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
+ int *const outCodePoints, int *const outUnigramProbability);
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
+ const bool forceLowerCaseSearch);
+
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
- class ReadingState {
+ // This class encapsulates the reading state of a position in the dictionary. It points at a
+ // specific PtNode in the dictionary.
+ class PtNodeReadingState {
public:
// Note that copy constructor and assignment operator are used for this class to use
// std::vector.
- ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
- mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
- mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
+ PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
+ mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
+ mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
int mPos;
- // Node count of a node array.
- int mNodeCount;
- int mPrevTotalCodePointCount;
- int mTotalNodeCount;
- int mNodeArrayCount;
+ // Remaining node count in the current array.
+ int mRemainingPtNodeCountInThisArray;
+ int mTotalCodePointCountSinceInitialization;
+ // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
+ int mTotalPtNodeIndexInThisArrayChain;
+ // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
+ // PtNode arrays.
+ int mPtNodeArrayIndexInThisArrayChain;
int mPosOfLastForwardLinkField;
- int mPosOfLastPtNodeArrayHead;
+ int mPosOfThisPtNodeArrayHead;
};
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
- static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
// TODO: Introduce error code to track what caused the error.
bool mIsError;
- ReadingState mReadingState;
+ PtNodeReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
- DynamicPatriciaTrieNodeReader mNodeReader;
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- std::vector<ReadingState> mReadingStateStack;
+ const PtNodeReader *const mPtNodeReader;
+ std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray();
void followForwardLink();
- AK_FORCE_INLINE void fetchPtNodeInfo() {
- mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
- MAX_WORD_LENGTH, mMergedNodeCodePoints);
- if (mNodeReader.getCodePointCount() <= 0) {
- // Empty node is not allowed.
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- }
- }
-
AK_FORCE_INLINE void pushReadingStateToStack() {
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
@@ -279,9 +263,6 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
}
}
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp
index d68446db6..e94925365 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h
index 67c3cc57e..67c3cc57e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp
new file mode 100644
index 000000000..cbc0d2304
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
+ DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ bool *const outAddedNewUnigram) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
+ probability),
+ wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
+ return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
+ }
+ if (!ptNodeParams.hasChildren()) {
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
+ }
+ // Advance to the children nodes.
+ parentPos = ptNodeParams.getHeadPos();
+ readingHelper->readChildNode(ptNodeParams);
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
+ return createAndInsertNodeIntoPtNodeArray(parentPos,
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
+}
+
+bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
+ const int probability, bool *const outAddedNewBigram) {
+ const PtNodeParams sourcePtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
+ const PtNodeParams targetPtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
+ return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
+ outAddedNewBigram);
+}
+
+// Remove a bigram relation from word0Pos to word1Pos.
+bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
+ const PtNodeParams sourcePtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
+ const PtNodeParams targetPtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
+ return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
+}
+
+bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
+ const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
+ int *const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
+ probability);
+}
+
+bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
+ const PtNodeParams *const originalPtNodeParams, const int probability,
+ bool *const outAddedNewUnigram) {
+ if (originalPtNodeParams->isTerminal()) {
+ // Overwrites the probability.
+ *outAddedNewUnigram = false;
+ const int probabilityToWrite = getUpdatedProbability(
+ originalPtNodeParams->getProbability(), probability);
+ return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
+ } else {
+ // Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
+ const int movedPos = mBuffer->getTailPosition();
+ int writingPos = movedPos;
+ const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
+ originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
+ originalPtNodeParams->getCodePoints(),
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability)));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
+ return false;
+ }
+ if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const PtNodeParams *const parentPtNodeParams, const int probability,
+ const int *const codePoints, const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
+ codePointCount, probability);
+}
+
+bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
+ const int probability) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
+ return false;
+ }
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be updated later
+ // with actual children position.
+ const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints(), newProbability));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
+ return false;
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
+ firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
+ reallocatingPtNodeParams->getProbability()));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
+ firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
+ newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocating PtNode as moved.
+ if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ const PtNodeParams ptNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
+ // Update children position.
+ return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
+}
+
+int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability,
+ const int newProbability) const {
+ if (mNeedsToDecay) {
+ return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ newProbability);
+ } else {
+ return newProbability;
+ }
+}
+
+const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
+ const PtNodeParams *const originalPtNodeParams, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
+ probability != NOT_A_PROBABILITY /* isTerminal */,
+ originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
+ originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */,
+ codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
+ probability);
+}
+
+const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ false /* isBlacklisted */, false /* isNotAWord */,
+ probability != NOT_A_PROBABILITY /* isTerminal */,
+ false /* hasShortcutTargets */, false /* hasBigrams */,
+ codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h
new file mode 100644
index 000000000..b9800cd80
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicPatriciaTrieReadingHelper;
+class PtNodeReader;
+class PtNodeWriter;
+
+// TODO: Move to pt_common.
+class DynamicPatriciaTrieUpdatingHelper {
+ public:
+ DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer,
+ const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
+ const bool needsToDecay)
+ : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
+ mNeedsToDecay(needsToDecay) {}
+
+ ~DynamicPatriciaTrieUpdatingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ bool *const outAddedNewUnigram);
+
+ // Add a bigram relation from word0Pos to word1Pos.
+ bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
+ bool *const outAddedNewBigram);
+
+ // Remove a bigram relation from word0Pos to word1Pos.
+ bool removeBigramWords(const int word0Pos, const int word1Pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieUpdatingHelper);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const PtNodeReader *const mPtNodeReader;
+ PtNodeWriter *const mPtNodeWriter;
+ const bool mNeedsToDecay;
+
+ bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
+
+ bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
+ bool *const outAddedNewUnigram);
+
+ bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
+ const int probability, const int *const codePoints, const int codePointCount);
+
+ bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const int probability);
+
+ bool reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount);
+
+ int getUpdatedProbability(const int originalProbability, const int newProbability) const;
+
+ const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probability) const;
+
+ const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
+ const int *const codePoints, const int probability) const;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..d97cec53b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
+ const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
+ mBuffer->getUsedAdditionalBufferSize();
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
+ false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const fileName, const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (mNeedsToDecay) {
+ ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
+ }
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
+ return;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
+ mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
+}
+
+// TODO: Make this method version independent.
+bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more unigrams.
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(mBigramPolicy);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
+ > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more bigrams.
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite,
+ &ptNodeReader, mBigramPolicy, mShortcutPolicy);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instance for the GCed dictionary.
+ DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
+ DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
+ mNeedsToDecay);
+ // Create reading node reader and reading helper for the GCed dictionary.
+ DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy,
+ &newDictShortcutPolicy);
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newDictBigramPolicy, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..7223dea8b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicBigramListPolicy;
+class DynamicPatriciaTrieReadingHelper;
+class DynamicShortcutListPolicy;
+class HeaderPolicy;
+class PtNodeReader;
+class PtNodeWriter;
+
+// TODO: Make it independent from a particular format and move to pt_common.
+class DynamicPatriciaTrieWritingHelper {
+ public:
+ typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
+ typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
+ static const size_t MAX_DICTIONARY_SIZE;
+
+ DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
+ const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
+ DynamicBigramListPolicy *const bigramPolicy,
+ DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
+ : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
+ mNeedsToDecay(needsToDecay) {}
+
+ ~DynamicPatriciaTrieWritingHelper() {}
+
+ void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
+ const int unigramCount, const int bigramCount);
+
+ void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
+ const HeaderPolicy *const headerPolicy);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const PtNodeReader *const mPtNodeReader;
+ PtNodeWriter *const mPtNodeWriter;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ DynamicShortcutListPolicy *const mShortcutPolicy;
+ const bool mNeedsToDecay;
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp
index 30ff10cd6..67733660b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include <cstddef>
#include <cstdlib>
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h
index af76bc6b5..5654105ee 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h
@@ -20,7 +20,7 @@
#include <cstddef>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
new file mode 100644
index 000000000..5eed13e70
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BIGRAM_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class BigramDictContent : public SparseTableDictContent {
+ public:
+ BigramDictContent(const char *const dictDirPath, const bool isUpdatable)
+ : SparseTableDictContent(dictDirPath,
+ Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getBigramEntryAndAdvancePosition(int *const outBigramFlags,
+ int *const outTargetTerminalId, int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ if (outBigramFlags) {
+ *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ }
+ if (outTargetTerminalId) {
+ *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ }
+ }
+
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
new file mode 100644
index 000000000..0c2f47073
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_CONTENT_H
+#define LATINIME_DICT_CONTENT_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class DictContent {
+ public:
+ virtual ~DictContent() {}
+ virtual bool isValid() const = 0;
+
+ protected:
+ DictContent() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictContent);
+};
+} // namespace latinime
+#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
new file mode 100644
index 000000000..daaf08f61
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_PROBABILITY_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class ProbabilityDictContent : public SingleDictContent {
+ public:
+ ProbabilityDictContent(const char *const dictDirPath, const bool isUpdatable)
+ : SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
+ isUpdatable) {}
+
+ int getProbability(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= getSize()) {
+ return NOT_A_PROBABILITY;
+ }
+ return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);
+
+ int getSize() const {
+ return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE);
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
new file mode 100644
index 000000000..c10fbcb2a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_SHORTCUT_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class ShortcutDictContent : public SparseTableDictContent {
+ public:
+ ShortcutDictContent(const char *const dictDirPath, const bool isUpdatable)
+ : SparseTableDictContent(dictDirPath,
+ Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
+ int *const shortcutEntryPos) const {
+ const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
+ if (outShortcutFlags) {
+ *outShortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ }
+ if (outCodePoint && outCodePointCount) {
+ shortcutListBuffer->readCodePointsAndAdvancePosition(
+ maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
+ }
+ }
+
+ // Returns head position of shortcut list for a PtNode specified by terminalId.
+ int getShortcutListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent);
+};
+} // namespace latinime
+#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
new file mode 100644
index 000000000..4cb96da6a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SINGLE_DICT_CONTENT_H
+#define LATINIME_SINGLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class SingleDictContent : public DictContent {
+ public:
+ SingleDictContent(const char *const dictDirPath, const char *const contentFileName,
+ const bool isUpdatable)
+ : mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
+ mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
+ mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+
+ virtual ~SingleDictContent() {}
+
+ virtual bool isValid() const {
+ return mMmappedBuffer.get() != 0;
+ }
+
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
new file mode 100644
index 000000000..71868e9ca
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent : public DictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(const char *const dictDirPath,
+ const char *const lookupTableFileName, const char *const addressTableFileName,
+ const char *const contentFileName, const bool isUpdatable,
+ const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(
+ MmappedBuffer::openBuffer(dictDirPath, lookupTableFileName, isUpdatable)),
+ mAddressTableBuffer(
+ MmappedBuffer::openBuffer(dictDirPath, addressTableFileName, isUpdatable)),
+ mContentBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
+ mExpandableLookupTableBuffer(
+ mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBuffer() : 0,
+ mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBuffer() : 0,
+ mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0,
+ mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ virtual bool isValid() const {
+ return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
+ && mContentBuffer.get() != 0;
+ }
+
+ protected:
+ const SparseTable *getAddressLookupTable() const {
+ return &mAddressLookupTable;
+ }
+
+ const BufferWithExtendableBuffer *getContentBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ // TODO: Have sparse table.
+ const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mContentBuffer;
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ SparseTable mAddressLookupTable;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
new file mode 100644
index 000000000..173d0da05
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class TerminalPositionLookupTable : public SingleDictContent {
+ public:
+ // TODO: Quit using headerRegionSize.
+ TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable,
+ const int headerRegionSize)
+ : SingleDictContent(dictDirPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
+ mSize(getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
+ mHeaderRegionSize(headerRegionSize) {}
+
+ int getTerminalPtNodePosition(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ return NOT_A_DICT_POS;
+ }
+ const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
+ readingPos) - mHeaderRegionSize;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
+
+ const int mSize;
+ const int mHeaderRegionSize;
+};
+} // namespace latinime
+#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
new file mode 100644
index 000000000..6ee6e63e4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_BUFFER_H
+#define LATINIME_VER4_DICT_BUFFER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class Ver4DictBuffers {
+ public:
+ typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
+ const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
+ && mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
+ && mShortcutDictContent.isValid();
+ }
+
+ AK_FORCE_INLINE uint8_t *getRawDictBuffer() const {
+ return mDictBuffer.get()->getBuffer();
+ }
+
+ AK_FORCE_INLINE int getRawDictBufferSize() const {
+ return mDictBuffer.get()->getBufferSize();
+ }
+
+ AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
+ return &mShortcutDictContent;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
+
+ AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
+ : mDictBuffer(dictBuffer),
+ // TODO: Quit using getHeaderSize.
+ mTerminalPositionLookupTable(dictDirPath, isUpdatable,
+ HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
+ mProbabilityDictContent(dictDirPath, isUpdatable),
+ mBigramDictContent(dictDirPath, isUpdatable),
+ mShortcutDictContent(dictDirPath, isUpdatable) {}
+
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ TerminalPositionLookupTable mTerminalPositionLookupTable;
+ ProbabilityDictContent mProbabilityDictContent;
+ BigramDictContent mBigramDictContent;
+ ShortcutDictContent mShortcutDictContent;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
new file mode 100644
index 000000000..20adb927f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
+const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
+// tat = Terminal Address Table
+const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
+const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
+const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
+const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
+const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
+ ".shortcut_index_shortcut";
+
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+
+const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
new file mode 100644
index 000000000..522581873
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_CONSTANTS_H
+#define LATINIME_VER4_DICT_CONSTANTS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const TRIE_FILE_EXTENSION;
+ static const char *const FREQ_FILE_EXTENSION;
+ static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_FILE_EXTENSION;
+ static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_FILE_EXTENSION;
+ static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+
+ static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+
+ static const int BIGRAM_FLAGS_FIELD_SIZE;
+ static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+
+ static const int SHORTCUT_FLAGS_FIELD_SIZE;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..b18bdc943
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ probability = mProbabilityDictContent->getProbability(terminalId);
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ int newBigramLinkedNodePos = bigramLinkedNodePos;
+ if (siblingNodePos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ newBigramLinkedNodePos = childrenPos;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
+ newBigramLinkedNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newBigramLinkedNodePos, newSiblingNodePos);
+ }
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
new file mode 100644
index 000000000..bdae0de7e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class ProbabilityDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form probabilityBuffer.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const ProbabilityDictContent *const probabilityDictContent)
+ : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const ProbabilityDictContent *const mProbabilityDictContent;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos, const int bigramLinkedNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
new file mode 100644
index 000000000..ae5a094d1
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy.isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
+ != NOT_A_PROBABILITY;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ }
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy.isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ // bigramProbability is a bigram probability delta.
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+ }
+}
+
+int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
+ const int probability) {
+ // TODO: Implement.
+ return false;
+}
+
+bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1, const int probability) {
+ // TODO: Implement.
+ return false;
+}
+
+bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) {
+ // TODO: Implement.
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ // TODO: Implement.
+}
+
+void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ // TODO: Implement.
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ // TODO: Implement.
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) {
+ // TODO: Implement.
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
new file mode 100644
index 000000000..10b9125f0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+// TODO: Implement.
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
+ : mBuffers(buffers),
+ mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4),
+ mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
+ mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mBigramPolicy(mBuffers.get()->getBigramDictContent(),
+ mBuffers.get()->getTerminalPositionLookupTable()),
+ mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
+ mBuffers.get()->getTerminalPositionLookupTable()),
+ mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability);
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability);
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1);
+
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mDictBuffer;
+ const Ver4BigramListPolicy mBigramPolicy;
+ const Ver4ShortcutListPolicy mShortcutPolicy;
+ Ver4PatriciaTrieNodeReader mNodeReader;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..321fd6d40
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getProbability(
+ const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) {
+ const int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE)
+ + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
+ return probabilityBuffer->readUint(Ver4DictConstants::PROBABILITY_SIZE, pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..19e6dd331
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer,
+ const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f692882f2..f17a0d1c0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,11 +18,37 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
+uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
+ const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
+ return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
+}
+
+uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
+ int *const pos) const {
+ const int value = readUint(size, *pos);
+ *pos += size;
+ return value;
+}
+
+void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
+ getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePointCount, pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+}
+
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
@@ -46,7 +72,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
}
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
- const int codePointCount, const bool writesTerminator ,int *const pos) {
+ const int codePointCount, const bool writesTerminator, int *const pos) {
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
codePoints, codePointCount, writesTerminator);
if (!checkAndPrepareWriting(*pos, size)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 9dc34823c..13dce9b61 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,12 +32,20 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
+ static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
+
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
- const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
+ const int maxAdditionalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+ // Without original buffer.
+ BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
+ : mOriginalBuffer(0), mOriginalBufferSize(0),
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
@@ -63,6 +71,13 @@ class BufferWithExtendableBuffer {
}
}
+ uint32_t readUint(const int size, const int pos) const;
+
+ uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
+
+ void readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const;
+
AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize;
}
@@ -86,7 +101,6 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 0c1576818..ebdd523e1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -114,6 +114,24 @@ class ByteArrayUtils {
return buffer[(*pos)++];
}
+ static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
+ const int size, const int pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ return ByteArrayUtils::readUint8(buffer, pos);
+ case 2:
+ return ByteArrayUtils::readUint16(buffer, pos);
+ case 3:
+ return ByteArrayUtils::readUint24(buffer, pos);
+ case 4:
+ return ByteArrayUtils::readUint32(buffer, pos);
+ default:
+ return 0;
+ }
+ }
+
/**
* Code Point Reading
*
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 994826fa8..b48e5b005 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -20,7 +20,7 @@
#include <cstring>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
@@ -33,6 +33,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
switch (dictVersion) {
case 3:
return createEmptyV3DictFile(filePath, attributeMap);
+ case 4:
+ // TODO: Support version 4 dictionary format.
+ return false;
default:
// Only version 3 dictionary is supported for now.
return false;
@@ -41,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
0 /* extendedRegionSize */);
- BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer bodyBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
new file mode 100644
index 000000000..59b894fa6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FILE_UTILS_H
+#define LATINIME_FILE_UTILS_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class FileUtils {
+ public:
+ // Returns -1 on error.
+ static int getFileSize(const char *const filePath) {
+ const int fd = open(filePath, O_RDONLY);
+ if (fd == -1) {
+ return -1;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return static_cast<int>(statBuf.st_size);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_FILE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 1d77d5c27..4843650ad 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -45,6 +45,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
return VERSION_2;
} else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
return VERSION_3;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 4) {
+ return VERSION_4;
} else {
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 79ed0de29..b90393a53 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,6 +31,7 @@ class FormatUtils {
enum FORMAT_VERSION {
VERSION_2,
VERSION_3,
+ VERSION_4,
UNKNOWN_VERSION
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
new file mode 100644
index 000000000..28af97351
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const int bufferOffset, const int bufferSize,
+ const bool isUpdatable) {
+ const int mmapFd = open(path, O_RDONLY);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return MmappedBufferPtr(0);
+ }
+ const int pagesize = getpagesize();
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return MmappedBufferPtr(0);
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return MmappedBufferPtr(0);
+ }
+ return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
+ mmapFd, isUpdatable));
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const bool isUpdatable) {
+ const int fileSize = FileUtils::getFileSize(path);
+ if (fileSize == -1) {
+ return MmappedBufferPtr(0);
+ } else if (fileSize == 0) {
+ return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
+ } else {
+ return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
+ }
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const dirPath, const char *const fileName, const bool isUpdatable) {
+ const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
+ char filePath[filePathBufferSize];
+ const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
+ fileName);
+ if (filePathLength >= filePathBufferSize) {
+ return 0;
+ }
+ return openBuffer(filePath, isUpdatable);
+}
+
+MmappedBuffer::~MmappedBuffer() {
+ if (mAlignedSize == 0) {
+ return;
+ }
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6b69116eb..73a733b0c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -17,58 +17,27 @@
#ifndef LATINIME_MMAPPED_BUFFER_H
#define LATINIME_MMAPPED_BUFFER_H
-#include <cerrno>
-#include <fcntl.h>
#include <stdint.h>
-#include <sys/mman.h>
-#include <unistd.h>
#include "defines.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
- const int bufferSize, const bool isUpdatable) {
- const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
- const int mmapFd = open(path, openMode);
- if (mmapFd < 0) {
- AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
- return 0;
- }
- const int pagesize = getpagesize();
- const int offset = bufferOffset % pagesize;
- int alignedOffset = bufferOffset - offset;
- int alignedSize = bufferSize + offset;
- const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
- void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
- alignedOffset);
- if (mmappedBuffer == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
- close(mmapFd);
- return 0;
- }
- uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
- if (!buffer) {
- AKLOGE("DICT: buffer is null");
- close(mmapFd);
- return 0;
- }
- return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
- isUpdatable);
- }
+ typedef ExclusiveOwnershipPointer<MmappedBuffer> MmappedBufferPtr;
- ~MmappedBuffer() {
- int ret = munmap(mMmappedBuffer, mAlignedSize);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
- }
- ret = close(mMmapFd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
- }
- }
+ static MmappedBufferPtr openBuffer(const char *const path,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable);
+
+ // Mmap entire file.
+ static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable);
+
+ static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName,
+ const bool isUpdatable);
+
+ ~MmappedBuffer();
AK_FORCE_INLINE uint8_t *getBuffer() const {
return mBuffer;
@@ -89,6 +58,11 @@ class MmappedBuffer {
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+ // Empty file. We have to handle an empty file as a valid part of a dictionary.
+ AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
+ : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
+ mIsUpdatable(isUpdatable) {}
+
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
new file mode 100644
index 000000000..2678b8c7b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+const int SparseTable::NOT_EXIST = -1;
+
+bool SparseTable::contains(const int id) const {
+ const int readingPos = id / mBlockSize * mDataSize;
+ if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
+ return false;
+ }
+ const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
+ return index != NOT_EXIST;
+}
+
+uint32_t SparseTable::get(const int id) const {
+ const int indexTableIndex = id / mBlockSize;
+ int readingPos = indexTableIndex * mDataSize;
+ const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
+ const int offset = id % mBlockSize;
+ readingPos = (index * mDataSize + offset) * mBlockSize;
+ return mContentTableBuffer->readUint(mDataSize, readingPos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
new file mode 100644
index 000000000..d71756c63
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_H
+#define LATINIME_SPARSE_TABLE_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+// Note that there is a corresponding implementation in SparseTable.java.
+// TODO: Support multiple content buffers.
+class SparseTable {
+ public:
+ SparseTable(BufferWithExtendableBuffer *const indexTableBuffer,
+ BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize,
+ const int dataSize)
+ : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer),
+ mBlockSize(blockSize), mDataSize(dataSize) {}
+
+ bool contains(const int id) const;
+
+ uint32_t get(const int id) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
+
+ static const int NOT_EXIST;
+
+ BufferWithExtendableBuffer *const mIndexTableBuffer;
+ BufferWithExtendableBuffer *const mContentTableBuffer;
+ const int mBlockSize;
+ const int mDataSize;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 007c19e0a..fd0ac9eb6 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int point0Index = dicNode->getInputIndex(0);
- return dicNode->isTerminalWordNode()
+ return dicNode->isTerminalDicNode()
&& traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index);
}
@@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) {
return false;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return false;
}
const int16_t pointIndex = dicNode->getInputIndex(0);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 5b6b5e874..54f65c786 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -23,39 +23,64 @@ namespace latinime {
const TypingWeighting TypingWeighting::sInstance;
-ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
+ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
switch (correctionType) {
case CT_MATCH:
if (isProximityDicNode(traverseSession, dicNode)) {
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ } else if (dicNode->isInDigraph()) {
+ return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
} else {
- return ET_NOT_AN_ERROR;
+ // Compare the node code point with original primary code point on the keyboard.
+ const ProximityInfoState *const pInfoState =
+ traverseSession->getProximityInfoState(0);
+ const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ dicNode->getInputIndex(0));
+ const int nodeCodePoint = dicNode->getNodeCodePoint();
+ if (primaryOriginalCodePoint == nodeCodePoint) {
+ // Node code point is same as original code point on the keyboard.
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ CharUtils::toLowerCase(nodeCodePoint)) {
+ // Only cases of the code points are different.
+ return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
+ CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Node code point is a variant of original code point.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
+ } else {
+ // Node code point is a variant of original code point and the cases are also
+ // different.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
+ | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ }
}
+ break;
case CT_ADDITIONAL_PROXIMITY:
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
case CT_OMISSION:
if (parentDicNode->canBeIntentionalOmission()) {
- return ET_INTENTIONAL_OMISSION;
+ return ErrorTypeUtils::INTENTIONAL_OMISSION;
} else {
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
}
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
- return ET_NEW_WORD;
+ return ErrorTypeUtils::NEW_WORD;
case CT_TERMINAL:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
case CT_COMPLETION:
- return ET_COMPLETION;
+ return ErrorTypeUtils::COMPLETION;
default:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
}
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 9f0a331e3..41314ef52 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -204,7 +205,7 @@ class TypingWeighting : public Weighting {
return cost * traverseSession->getMultiWordCostMultiplier();
}
- ErrorType getErrorType(const CorrectionType correctionType,
+ ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const;
diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h
new file mode 100644
index 000000000..3cf78954a
--- /dev/null
+++ b/native/jni/src/utils/exclusive_ownership_pointer.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
+#define LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+template<class T>
+class ExclusiveOwnershipPointer {
+ public:
+ // This instance become an owner of the raw pointer.
+ ExclusiveOwnershipPointer(T *const rawPointer)
+ : mPointer(rawPointer),
+ mSharedOwnerPtr(new (ExclusiveOwnershipPointer<T> *)(this)) {}
+
+ // Move the ownership.
+ ExclusiveOwnershipPointer(const ExclusiveOwnershipPointer<T> &pointer)
+ : mPointer(pointer.mPointer), mSharedOwnerPtr(pointer.mSharedOwnerPtr) {
+ transferOwnership(&pointer);
+ }
+
+ ~ExclusiveOwnershipPointer() {
+ deletePointersIfHavingOwnership();
+ }
+
+ // Move the ownership.
+ ExclusiveOwnershipPointer<T> &operator=(const ExclusiveOwnershipPointer<T> &pointer) {
+ // Delete pointers when this is an owner of another pointer.
+ deletePointersIfHavingOwnership();
+ mPointer = pointer.mPointer;
+ mSharedOwnerPtr = pointer.mSharedOwnerPtr;
+ transferOwnership(pointer);
+ return *this;
+ }
+
+ T *get() const {
+ return mPointer;
+ }
+
+ private:
+ // This class allows to copy and assign and ensures only one instance has the ownership of the
+ // managed pointer.
+
+ ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {}
+
+ void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
+ if (*mSharedOwnerPtr != src) {
+ AKLOGE("Failed to transfer the ownership because src is not the current owner."
+ "src: %p, owner: %p", src, *mSharedOwnerPtr);
+ ASSERT(false);
+ return;
+ }
+ // Transfer the ownership from src to this instance.
+ *mSharedOwnerPtr = this;
+ }
+
+ void deletePointersIfHavingOwnership() {
+ if (mSharedOwnerPtr && *mSharedOwnerPtr == this) {
+ if (mPointer) {
+ if (DEBUG_DICT) {
+ AKLOGI("Releasing pointer: %p", mPointer);
+ }
+ delete mPointer;
+ }
+ delete mSharedOwnerPtr;
+ }
+ }
+
+ T *mPointer;
+ // mSharedOwnerPtr points a shared memory space where the instance which has the ownership is
+ // stored.
+ ExclusiveOwnershipPointer<T> **mSharedOwnerPtr;
+};
+} // namespace latinime
+#endif /* LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H */