aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h20
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp42
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h16
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h41
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp40
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h26
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp34
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h69
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp16
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.h11
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.cpp6
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h7
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp5
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h3
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp8
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h27
-rw-r--r--native/jni/src/suggest/core/suggest.cpp22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp124
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp78
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h)6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h185
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp)29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h)21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp)62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h)19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp107
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp)140
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h)26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp)66
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h)171
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp)187
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h)21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h49
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h43
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp96
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h96
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h60
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
-rw-r--r--native/jni/src/utils/exclusive_ownership_pointer.h91
73 files changed, 2014 insertions, 1048 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 742e388e4..fbcd612b7 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -392,24 +392,4 @@ typedef enum {
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
-
-// ErrorType is mainly decided by CorrectionType but it is also depending on if
-// the correction has really been performed or not.
-typedef enum {
- // Substitution, omission and transposition
- ET_EDIT_CORRECTION,
- // Proximity error
- ET_PROXIMITY_CORRECTION,
- // Completion
- ET_COMPLETION,
- // New word
- // TODO: Remove.
- // A new word error should be an edit correction error or a proximity correction error.
- ET_NEW_WORD,
- // Treat error as an intentional omission when the CorrectionType is omission and the node can
- // be intentional omission.
- ET_INTENTIONAL_OMISSION,
- // Not treated as an error. Tracked for checking exact match
- ET_NOT_AN_ERROR
-} ErrorType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 49cfdecac..0b2b4a9e8 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -99,7 +99,7 @@ class DicNode {
virtual ~DicNode() {}
// Init for copy
- void initByCopy(const DicNode *dicNode) {
+ void initByCopy(const DicNode *const dicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
@@ -107,25 +107,25 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // Init for root with prevWordNodePos which is used for bigram
- void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
+ // Init for root with prevWordPtNodePos which is used for bigram
+ void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
- mDicNodeState.init(prevWordNodePos);
+ mDicNodeState.init(prevWordPtNodePos);
PROF_NODE_RESET(mProfiler);
}
// Init for root with previous word
- void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
+ void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@@ -138,7 +138,7 @@ class DicNode {
mDicNodeState.mDicNodeStatePrevWord.init(
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
dicNode->mDicNodeProperties.getProbability(),
- dicNode->mDicNodeProperties.getPos(),
+ dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
@@ -148,26 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- void initAsPassingChild(DicNode *parentNode) {
+ void initAsPassingChild(DicNode *parentDicNode) {
mIsUsed = true;
- mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
- const int c = parentNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
- mDicNodeState.init(&parentNode->mDicNodeState);
- PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
+ mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
+ const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
+ mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
+ mDicNodeState.init(&parentDicNode->mDicNodeState);
+ PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
}
- void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void initAsChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
- isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
+ mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -234,7 +235,7 @@ class DicNode {
}
bool isFirstWord() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@@ -246,29 +247,30 @@ class DicNode {
}
// Used to get bigram probability in DicNodeUtils
- int getPos() const {
- return mDicNodeProperties.getPos();
+ int getPtNodePos() const {
+ return mDicNodeProperties.getPtNodePos();
}
// Used to get bigram probability in DicNodeUtils
- int getPrevWordPos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ int getPrevWordTerminalPtNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
// Used in DicNodeUtils
- int getChildrenPos() const {
- return mDicNodeProperties.getChildrenPos();
+ int getChildrenPtNodeArrayPos() const {
+ return mDicNodeProperties.getChildrenPtNodeArrayPos();
}
int getProbability() const {
return mDicNodeProperties.getProbability();
}
- AK_FORCE_INLINE bool isTerminalWordNode() const {
- const bool isTerminalNodes = mDicNodeProperties.isTerminal();
- const int currentNodeDepth = getNodeCodePointCount();
- const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
- return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
+ AK_FORCE_INLINE bool isTerminalDicNode() const {
+ const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
+ const int currentDicNodeDepth = getNodeCodePointCount();
+ const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
+ return isTerminalPtNode && currentDicNodeDepth > 0
+ && currentDicNodeDepth == terminalDicNodeDepth;
}
bool shouldBeFilteredBySafetyNetForBigram() const {
@@ -374,8 +376,8 @@ class DicNode {
}
// Used to commit input partially
- int getPrevWordNodePos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ int getPrevWordPtNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
@@ -410,7 +412,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const {
- const int codePoint = mDicNodeProperties.getNodeCodePoint();
+ const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@@ -423,8 +425,8 @@ class DicNode {
// Utils for cost calculation //
////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
- return mDicNodeProperties.getNodeCodePoint()
- == dicNode->mDicNodeProperties.getNodeCodePoint();
+ return mDicNodeProperties.getDicNodeCodePoint()
+ == dicNode->mDicNodeProperties.getDicNodeCodePoint();
}
// TODO: remove
@@ -574,7 +576,8 @@ class DicNode {
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
- const bool doNormalization, const int inputSize, const ErrorType errorType) {
+ const bool doNormalization, const int inputSize,
+ const ErrorTypeUtils::ErrorType errorType) {
if (DEBUG_GEO_FULL) {
LOGI_SHOW_ADD_COST_PROP;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index ec65114c7..5540b6df5 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -22,7 +22,6 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/char_utils.h"
namespace latinime {
@@ -32,19 +31,20 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
+ const int prevWordPtNodePos, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *const prevWordLastNode, DicNode *const newRootNode) {
- newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRootWithPreviousWord(
+ prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
}
-/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
- destNode->initByCopy(srcNode);
+/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
+ DicNode *const destDicNode) {
+ destDicNode->initByCopy(srcDicNode);
}
///////////////////////////////////
@@ -52,14 +52,14 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNodeVector *childDicNodes) {
+ DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode);
} else {
- dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
+ dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
}
}
@@ -71,11 +71,11 @@ namespace latinime {
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@@ -85,19 +85,19 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- const int unigramProbability = node->getProbability();
- const int wordPos = node->getPos();
- const int prevWordPos = node->getPrevWordPos();
- if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ const int unigramProbability = dicNode->getProbability();
+ const int ptNodePos = dicNode->getPtNodePos();
+ const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
+ if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
- wordPos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
+ prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
@@ -109,7 +109,7 @@ namespace latinime {
// TODO: Move to char_utils?
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, int *dest) {
+ const int *const src1, const int16_t length1, int *const dest) {
int actualLength0 = 0;
for (int i = 0; i < length0; ++i) {
if (src0[i] == 0) {
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 3fb351a61..3f1514a52 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -31,20 +31,20 @@ class MultiBigramMap;
class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
- const int16_t length1, int *dest);
+ const int16_t length1, int *const dest);
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *newRootNode);
+ const int prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *prevWordLastNode, DicNode *newRootNode);
- static void initByCopy(DicNode *srcNode, DicNode *destNode);
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
+ static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
static void getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *const multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@@ -53,7 +53,7 @@ class DicNodeUtils {
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 42addae8d..9364e7751 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -62,14 +62,14 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
- hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
index 9e0f62ceb..c41a7243a 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
@@ -24,15 +24,14 @@
namespace latinime {
/**
- * Node for traversing the lexicon trie.
+ * PtNode information related to the DicNode from the lexicon trie.
*/
-// TODO: Introduce a dictionary node class which has attribute members required to understand the
-// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
- mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
+ : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
+ mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
+ mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
@@ -40,57 +39,57 @@ class DicNodeProperties {
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) {
- mPos = pos;
- mChildrenPos = childrenPos;
- mNodeCodePoint = nodeCodePoint;
+ mPtNodePos = pos;
+ mChildrenPtNodeArrayPos = childrenPos;
+ mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
- mHasChildren = hasChildren;
+ mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
// Init for copy
- void init(const DicNodeProperties *const nodeProp) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = nodeProp->mNodeCodePoint;
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth;
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth;
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
}
// Init as passing child
- void init(const DicNodeProperties *const nodeProp, const int codePoint) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
}
- int getPos() const {
- return mPos;
+ int getPtNodePos() const {
+ return mPtNodePos;
}
- int getChildrenPos() const {
- return mChildrenPos;
+ int getChildrenPtNodeArrayPos() const {
+ return mChildrenPtNodeArrayPos;
}
int getProbability() const {
return mProbability;
}
- int getNodeCodePoint() const {
- return mNodeCodePoint;
+ int getDicNodeCodePoint() const {
+ return mDicNodeCodePoint;
}
uint16_t getDepth() const {
@@ -107,7 +106,7 @@ class DicNodeProperties {
}
bool hasChildren() const {
- return mHasChildren || mDepth != mLeavingDepth;
+ return mHasChildrenPtNodes || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
@@ -118,12 +117,12 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
- int mPos;
- int mChildrenPos;
+ int mPtNodePos;
+ int mChildrenPtNodeArrayPos;
int mProbability;
- int mNodeCodePoint;
+ int mDicNodeCodePoint;
bool mIsTerminal;
- bool mHasChildren;
+ bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index b8986203d..dba57056b 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
+ mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
}
@@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = NOT_A_DICT_POS;
+ mPrevWordPtNodePos = NOT_A_DICT_POS;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = prevWordNodePos;
+ mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
- mPrevWordNodePos = prevWord->mPrevWordNodePos;
+ mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
@@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
- mPrevWordNodePos = prevWordNodePos;
+ mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
@@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
return mPrevWordStart;
}
- int getPrevWordNodePos() const {
- return mPrevWordNodePos;
+ int getPrevWordPtNodePos() const {
+ return mPrevWordPtNodePos;
}
int getPrevWordCodePointAt(const int id) const {
@@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
- int mPrevWordNodePos;
+ int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 3c85d0e9d..74f9eee92 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -31,7 +32,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mExactMatch(true),
+ mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
@@ -47,7 +48,7 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
- mExactMatch = true;
+ mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -59,34 +60,21 @@ class DicNodeStateScoring {
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
- mExactMatch = scoring->mExactMatch;
+ mContainingErrorTypes = scoring->mContainingErrorTypes;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
- const int inputSize, const int totalInputIndex, const ErrorType errorType) {
+ const int inputSize, const int totalInputIndex,
+ const ErrorTypeUtils::ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
- switch (errorType) {
- case ET_EDIT_CORRECTION:
- ++mEditCorrectionCount;
- mExactMatch = false;
- break;
- case ET_PROXIMITY_CORRECTION:
- ++mProximityCorrectionCount;
- mExactMatch = false;
- break;
- case ET_COMPLETION:
- mExactMatch = false;
- break;
- case ET_NEW_WORD:
- mExactMatch = false;
- break;
- case ET_INTENTIONAL_OMISSION:
- mExactMatch = false;
- break;
- case ET_NOT_AN_ERROR:
- break;
+ mContainingErrorTypes = mContainingErrorTypes | errorType;
+ if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
+ ++mEditCorrectionCount;
+ }
+ if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
+ ++mProximityCorrectionCount;
}
}
@@ -182,7 +170,7 @@ class DicNodeStateScoring {
}
bool isExactMatch() const {
- return mExactMatch;
+ return ErrorTypeUtils::isExactMatch(mContainingErrorTypes);
}
private:
@@ -199,7 +187,8 @@ class DicNodeStateScoring {
float mSpatialDistance;
float mLanguageDistance;
float mRawLength;
- bool mExactMatch;
+ // All accumulated error types so far
+ ErrorTypeUtils::ErrorType mContainingErrorTypes;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 71f4ef6ea..c2a15a312 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
+ int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
@@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
- int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
+ int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 59ead1894..7b83f27df 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -21,9 +21,7 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
@@ -35,22 +33,15 @@ namespace latinime {
const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
-Dictionary::Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
+Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ &dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
- mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
+ mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
logDictionaryInfo(env);
}
-Dictionary::~Dictionary() {
- delete mBigramDictionary;
- delete mGestureSuggest;
- delete mTypingSuggest;
- delete mDictionaryStructureWithBufferPolicy;
-}
-
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
@@ -60,7 +51,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
if (suggestOptions->isGesture()) {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence);
if (DEBUG_DICT) {
@@ -70,7 +61,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
} else {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
outWords, frequencies, spaceIndices, outputTypes,
outputAutoCommitFirstWordConfidence);
@@ -84,11 +75,12 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
int *outputTypes) const {
if (length <= 0) return 0;
- return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
+ return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies,
+ outputTypes);
}
int Dictionary::getProbability(const int *word, int length) const {
- int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
+ int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;
@@ -98,39 +90,39 @@ int Dictionary::getProbability(const int *word, int length) const {
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
int length1) const {
- return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
+ return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
- mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
+ mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability) {
- mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
+ mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1,
probability);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
+ mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1);
}
void Dictionary::flush(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flush(filePath);
+ mDictionaryStructureWithBufferPolicy.get()->flush(filePath);
}
void Dictionary::flushWithGC(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+ mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
- return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
+ return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC);
}
void Dictionary::getProperty(const char *const query, char *const outResult,
const int maxResultLength) {
- return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
+ return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, outResult, maxResultLength);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 0195d5bf0..e52a40f1d 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,14 +21,16 @@
#include "defines.h"
#include "jni.h"
+#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/suggest_interface.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
-class BigramDictionary;
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
-class SuggestInterface;
class SuggestOptions;
class Dictionary {
@@ -53,8 +55,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
+ Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ &dictionaryStructureWithBufferPoilcy);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
@@ -87,20 +89,22 @@ class Dictionary {
const int maxResultLength);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
- return mDictionaryStructureWithBufferPolicy;
+ return mDictionaryStructureWithBufferPolicy.get();
}
- virtual ~Dictionary();
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+ typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr;
+ typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr;
+
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
- DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
- const BigramDictionary *const mBigramDictionary;
- const SuggestInterface *const mGestureSuggest;
- const SuggestInterface *const mTypingSuggest;
+ const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ mDictionaryStructureWithBufferPolicy;
+ const BigramDictionaryPtr mBigramDictionary;
+ const SuggestInterfacePtr mGestureSuggest;
+ const SuggestInterfacePtr mTypingSuggest;
void logDictionaryInfo(JNIEnv *const env) const;
};
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
new file mode 100644
index 000000000..0635fef7e
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
+ NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
new file mode 100644
index 000000000..ab4a65e48
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ERROR_TYPE_UTILS_H
+#define LATINIME_ERROR_TYPE_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ErrorTypeUtils {
+ public:
+ // ErrorType is mainly decided by CorrectionType but it is also depending on if
+ // the correction has really been performed or not.
+ typedef uint32_t ErrorType;
+
+ static const ErrorType NOT_AN_ERROR;
+ static const ErrorType MATCH_WITH_CASE_ERROR;
+ static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_DIGRAPH;
+ // Treat error as an intentional omission when the CorrectionType is omission and the node can
+ // be intentional omission.
+ static const ErrorType INTENTIONAL_OMISSION;
+ // Substitution, omission and transposition
+ static const ErrorType EDIT_CORRECTION;
+ // Proximity error
+ static const ErrorType PROXIMITY_CORRECTION;
+ // Completion
+ static const ErrorType COMPLETION;
+ // New word
+ // TODO: Remove.
+ // A new word error should be an edit correction error or a proximity correction error.
+ static const ErrorType NEW_WORD;
+
+ // TODO: Differentiate errors.
+ static bool isExactMatch(const ErrorType containingErrors) {
+ return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
+ }
+
+ static bool isEditCorrectionError(const ErrorType errorType) {
+ return (errorType & EDIT_CORRECTION) != 0;
+ }
+
+ static bool isProximityCorrectionError(const ErrorType errorType) {
+ return (errorType & PROXIMITY_CORRECTION) != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
+
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+};
+} // namespace latinime
+#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index e64476d82..ee8e59ef9 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
&& sweetSpotCenterYs && sweetSpotRadii),
mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
/* proximityCharsLength */]),
- mCodeToKeyMap() {
+ mLowerCodePointToKeyMap() {
/* Let's check the input array length here to make sure */
const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
@@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
- return mKeyIndexToCodePointG[keyIndex];
+ return mKeyIndexToLowerCodePointG[keyIndex];
+}
+
+int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToOriginalCodePoint[keyIndex];
}
void ProximityInfo::initializeG() {
@@ -164,8 +171,9 @@ void ProximityInfo::initializeG() {
const float gapY = sweetSpotCenterY - mCenterYsG[i];
mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
}
- mCodeToKeyMap[lowerCode] = i;
- mKeyIndexToCodePointG[i] = lowerCode;
+ mLowerCodePointToKeyMap[lowerCode] = i;
+ mKeyIndexToOriginalCodePoint[i] = code;
+ mKeyIndexToLowerCodePointG[i] = lowerCode;
}
for (int i = 0; i < KEY_COUNT; i++) {
mKeyKeyDistancesG[i][i] = 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h
index f25949001..a91b9d674 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.h
+++ b/native/jni/src/suggest/core/layout/proximity_info.h
@@ -39,6 +39,7 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const bool isGeometric) const;
int getCodePointOf(const int keyIndex) const;
+ int getOriginalCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@@ -76,11 +77,11 @@ class ProximityInfo {
ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
- KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
+ KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
}
AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
- return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
}
AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
@@ -117,9 +118,9 @@ class ProximityInfo {
// Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
- hash_map_compat<int, int> mCodeToKeyMap;
-
- int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ hash_map_compat<int, int> mLowerCodePointToKeyMap;
+ int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index fbabd92f2..bb4b41714 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -30,6 +30,12 @@
namespace latinime {
+int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
+ const int primaryCodePoint = getPrimaryCodePointAt(index);
+ const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
+ return mProximityInfo->getOriginalCodePointOf(keyIndex);
+}
+
// TODO: Remove the dependency of "isGeometric"
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index c94060fa9..e941e43d8 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -65,6 +65,8 @@ class ProximityInfoState {
return getProximityCodePointsAt(index)[0];
}
+ int getPrimaryOriginalCodePointAt(const int index) const;
+
inline bool sameAsTyped(const int *word, int length) const {
if (length != mSampledInputSize) {
return false;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 41f82049f..e649844dc 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -18,6 +18,7 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@@ -33,18 +34,20 @@ class DictionaryShortcutsStructurePolicy;
*/
class DictionaryStructureWithBufferPolicy {
public:
+ typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePoilcyPtr;
+
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0;
- virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
+ virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
- virtual int getTerminalNodePositionOfWord(const int *const inWord,
+ virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index 0c4016893..c202b81fe 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -20,6 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
@@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
traverseSession, parentDicNode, dicNode, &inputStateG);
const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
traverseSession, parentDicNode, dicNode, multiBigramMap);
- const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession,
- parentDicNode, dicNode);
+ const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
+ traverseSession, parentDicNode, dicNode);
profile(correctionType, dicNode);
if (inputStateG.mNeedsToUpdateInputStateG) {
dicNode->updateInputIndexG(&inputStateG);
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index 2d49e98a6..bd6b3cf41 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -18,6 +18,7 @@
#define LATINIME_WEIGHTING_H
#include "defines.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -84,7 +85,7 @@ class Weighting {
virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
- virtual ErrorType getErrorType(const CorrectionType correctionType,
+ virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 50f2bbd8d..5070491f4 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
- mPrevWordPos = NOT_A_DICT_POS;
+ mPrevWordPtNodePos = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPos == NOT_A_DICT_POS) {
+ if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index e0b1c67d9..6e4dda44d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -59,7 +59,7 @@ class DicTraverseSession {
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
- : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
+ : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
@@ -86,11 +86,9 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
- int getPrevWordPos() const { return mPrevWordPos; }
+ int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
// TODO: REMOVE
- void setPrevWordPos(int pos) { mPrevWordPos = pos; }
- // TODO: Use proper parameter when changed
- int getDicRootPos() const { return 0; }
+ void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@@ -119,26 +117,13 @@ class DicTraverseSession {
return true;
}
- void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
- for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
- if (!mProximityInfoStates[i].isUsed()) {
- continue;
- }
- const int pointerId = node->getInputIndex(i);
- const std::vector<int> *const searchKeyVector =
- mProximityInfoStates[i].getSearchKeyVector(pointerId);
- outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
- searchKeyVector->end());
- }
- }
-
- ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
+ ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
- const int pointerId = node->getInputIndex(i);
+ const int pointerId = dicNode->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic
@@ -192,7 +177,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
- int mPrevWordPos;
+ int mPrevWordPtNodePos;
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 73ccebc88..2eda414f4 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Continue suggestion after partial commit.
DicNode *topDicNode =
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
- traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
+ traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
traverseSession->getDicTraverseCache()->continueSearch();
traverseSession->setPartiallyCommited();
}
@@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
- traverseSession->getPrevWordPos(), &rootNode);
+ traverseSession->getPrevWordPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}
@@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
@@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
break;
case UNRELATED_CHAR:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
default:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
}
}
- // Push the node for look-ahead correction
+ // Push the dicNode for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
}
@@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return;
}
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
@@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode(
/**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
- * (by the space omission error correction) search path if input dicNode is on a terminal node.
+ * (by the space omission error correction) search path if input dicNode is on a terminal.
*/
void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
@@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode);
}
-// Process the node codepoint as a digraph. This means that composite glyphs like the German
+// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any
- * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
+ * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions.
*/
@@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
}
/**
- * Weight child node by aligning it to the key
+ * Weight child dicNode by aligning it to the key
*/
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index 1926b9831..de9fc9bbc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index b1170e251..83a32fb0b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -17,8 +17,8 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -157,8 +157,9 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
}
const int bigramTargetNodePos =
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
- if (nodeReader.isDeleted() || !nodeReader.isTerminal()
+ const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(
+ bigramTargetNodePos));
+ if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal()
|| bigramTargetNodePos == NOT_A_DICT_POS) {
// The target is no longer valid terminal. Invalidate the current bigram entry.
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
@@ -342,20 +343,22 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
if (originalBigramPos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ int currentPos = NOT_A_DICT_POS;
int bigramLinkCount = 0;
- while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
- currentPos = nodeReader.getBigramLinkedNodePos();
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ int bigramLinkedNodePos = originalBigramPos;
+ do {
+ currentPos = bigramLinkedNodePos;
+ const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos));
+ bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
bigramLinkCount++;
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
ASSERT(false);
return NOT_A_DICT_POS;
}
- }
+ bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
+ } while (bigramLinkedNodePos != NOT_A_DICT_POS);
return currentPos;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index 0504b59d5..5de456656 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -22,7 +22,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
deleted file mode 100644
index ff80dd2f6..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
- const int size, const bool isUpdatable) {
- // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
- // impl classes of DictionaryStructureWithBufferPolicy.
- const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
- isUpdatable);
- if (!mmapedBuffer) {
- return 0;
- }
- switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
- mmapedBuffer->getBufferSize())) {
- case FormatUtils::VERSION_2:
- return new PatriciaTriePolicy(mmapedBuffer);
- case FormatUtils::VERSION_3:
- return new DynamicPatriciaTriePolicy(mmapedBuffer);
- default:
- AKLOGE("DICT: dictionary format is unknown, bad magic number");
- delete mmapedBuffer;
- ASSERT(false);
- return 0;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
deleted file mode 100644
index 2fa3111d3..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
- ptNodePos, mBuffer->getTailPosition());
- ASSERT(false);
- invalidatePtNodeInfo();
- return;
- }
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- int pos = ptNodePos;
- mHeadPos = ptNodePos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPosOffset =
- DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
- &pos);
- mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
- if (outCodePoints != 0) {
- mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
- } else {
- mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
- }
- if (isTerminal()) {
- mProbabilityFieldPos = pos;
- if (usesAdditionalBuffer) {
- mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
- }
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
- } else {
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- }
- mChildrenPosFieldPos = pos;
- if (usesAdditionalBuffer) {
- mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
- }
- mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, &pos);
- if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
- mChildrenPos += mBuffer->getOriginalBufferSize();
- }
- if (mSiblingPos == NOT_A_DICT_POS) {
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- mBigramLinkedNodePos = mChildrenPos;
- } else {
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- }
- }
- if (usesAdditionalBuffer) {
- pos += mBuffer->getOriginalBufferSize();
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
- mShortcutPos = pos;
- mShortcutsPolicy->skipAllShortcuts(&pos);
- } else {
- mShortcutPos = NOT_A_DICT_POS;
- }
- if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
- mBigramPos = pos;
- mBigramsPolicy->skipAllBigrams(&pos);
- } else {
- mBigramPos = NOT_A_DICT_POS;
- }
- // Update siblingPos if needed.
- if (mSiblingPos == NOT_A_DICT_POS) {
- // Sibling position is the tail position of current node.
- mSiblingPos = pos;
- }
- // Read destination node if the read node is a moved node.
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- // The destination position is stored at the same place as the parent position.
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
- outCodePoints);
- }
-}
-
-void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
- mHeadPos = NOT_A_DICT_POS;
- mFlags = 0;
- mParentPos = NOT_A_DICT_POS;
- mCodePointCount = 0;
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- mChildrenPosFieldPos = NOT_A_DICT_POS;
- mChildrenPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- mShortcutPos = NOT_A_DICT_POS;
- mBigramPos = NOT_A_DICT_POS;
- mSiblingPos = NOT_A_DICT_POS;
-}
-
-}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
deleted file mode 100644
index 3b36d425f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryBigramsStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
- * node and reads node attributes.
- */
-class DynamicPatriciaTrieNodeReader {
- public:
- DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
- mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
- mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
- mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_DICT_POS) {}
-
- ~DynamicPatriciaTrieNodeReader() {}
-
- // Reads PtNode information from dictionary buffer and updates members with the information.
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
- fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
- 0 /* maxCodePointCount */, 0 /* outCodePoints */);
- }
-
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- mSiblingPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
- }
-
- // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
- AK_FORCE_INLINE int getHeadPos() const {
- return mHeadPos;
- }
-
- // Flags
- AK_FORCE_INLINE bool isDeleted() const {
- return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
- }
-
- AK_FORCE_INLINE bool hasChildren() const {
- return mChildrenPos != NOT_A_DICT_POS;
- }
-
- AK_FORCE_INLINE bool isTerminal() const {
- return PatriciaTrieReadingUtils::isTerminal(mFlags);
- }
-
- AK_FORCE_INLINE bool isBlacklisted() const {
- return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
- }
-
- AK_FORCE_INLINE bool isNotAWord() const {
- return PatriciaTrieReadingUtils::isNotAWord(mFlags);
- }
-
- // Parent node position
- AK_FORCE_INLINE int getParentPos() const {
- return mParentPos;
- }
-
- // Number of code points
- AK_FORCE_INLINE uint8_t getCodePointCount() const {
- return mCodePointCount;
- }
-
- // Probability
- AK_FORCE_INLINE int getProbabilityFieldPos() const {
- return mProbabilityFieldPos;
- }
-
- AK_FORCE_INLINE int getProbability() const {
- return mProbability;
- }
-
- // Children PtNode array position
- AK_FORCE_INLINE int getChildrenPosFieldPos() const {
- return mChildrenPosFieldPos;
- }
-
- AK_FORCE_INLINE int getChildrenPos() const {
- return mChildrenPos;
- }
-
- // Bigram linked node position.
- AK_FORCE_INLINE int getBigramLinkedNodePos() const {
- return mBigramLinkedNodePos;
- }
-
- // Shortcutlist position
- AK_FORCE_INLINE int getShortcutPos() const {
- return mShortcutPos;
- }
-
- // Bigrams position
- AK_FORCE_INLINE int getBigramsPos() const {
- return mBigramPos;
- }
-
- // Sibling node position
- AK_FORCE_INLINE int getSiblingNodePos() const {
- return mSiblingPos;
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
-
- const BufferWithExtendableBuffer *const mBuffer;
- const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
- const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- int mHeadPos;
- DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
- int mParentPos;
- uint8_t mCodePointCount;
- int mProbabilityFieldPos;
- int mProbability;
- int mChildrenPosFieldPos;
- int mChildrenPos;
- int mBigramLinkedNodePos;
- int mShortcutPos;
- int mBigramPos;
- int mSiblingPos;
-
- void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
- const int maxCodePointCount, int *const outCodePoints);
-
- void invalidatePtNodeInfo();
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index a9c7805a8..7c06a7117 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -30,8 +30,8 @@ namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
// Reads information from existing dictionary buffer.
- HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
- : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
+ : mDictFormatVersion(formatVersion),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 5ded8f6a1..5ef8e50b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -118,6 +118,9 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
case FormatUtils::VERSION_3:
return buffer->writeUintAndAdvancePosition(3 /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
+ case FormatUtils::VERSION_4:
+ return buffer->writeUintAndAdvancePosition(4 /* data */,
+ HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..3ab6a8e21
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <stdint.h>
+#include <string>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(const char *const path,
+ const int bufOffset, const int size, const bool isUpdatable) {
+ // Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of
+ // MmappedBufferWrapper if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable));
+ if (!mmappedBuffer.get()) {
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(),
+ mmappedBuffer.get()->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
+ new PatriciaTriePolicy(mmappedBuffer));
+ case FormatUtils::VERSION_3:
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
+ new DynamicPatriciaTriePolicy(mmappedBuffer));
+ case FormatUtils::VERSION_4: {
+ std::string dictDirPath(path);
+ const std::string::size_type pos =
+ dictDirPath.rfind(Ver4DictConstants::TRIE_FILE_EXTENSION);
+ if (pos == std::string::npos) {
+ // Dictionary file name is not valid as a version 4 dictionary.
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+ // Removing extension to get the base path.
+ dictDirPath.erase(pos);
+ const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer));
+ if (!dictBuffers.get()->isValid()) {
+ AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
+ new Ver4PatriciaTriePolicy(dictBuffers));
+ }
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number");
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
index 8cebc3b16..1359575f1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
@@ -21,13 +21,15 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
- static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
- const char *const path, const int bufOffset, const int size, const bool isUpdatable);
+ static DictionaryStructureWithBufferPolicy::StructurePoilcyPtr
+ newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
new file mode 100644
index 000000000..7bdd829cd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_PARAMS_H
+#define LATINIME_PT_NODE_PARAMS_H
+
+#include <cstring>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+// This class has information of a PtNode. This class is immutable.
+class PtNodeParams {
+ public:
+ // Invalid PtNode.
+ PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mSiblingPos(NOT_A_DICT_POS) {}
+
+ PtNodeParams(const PtNodeParams& ptNodeParams)
+ : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
+ mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
+ mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
+ mTerminalId(ptNodeParams.mTerminalId),
+ mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
+ mProbability(ptNodeParams.mProbability),
+ mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
+ mChildrenPos(ptNodeParams.mChildrenPos),
+ mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
+ mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
+ mSiblingPos(ptNodeParams.mSiblingPos) {
+ memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
+ }
+
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
+ const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos,
+ const int bigramPos, const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL),
+ mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
+ mBigramPos(bigramPos), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mCodePointCount > 0;
+ }
+
+ // Head position of the PtNode
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ AK_FORCE_INLINE const int *getCodePoints() const {
+ return mCodePoints;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getTerminalIdFieldPos() const {
+ return mTerminalIdFieldPos;
+ }
+
+ AK_FORCE_INLINE int getTerminalId() const {
+ return mTerminalId;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ // This class have a public copy constructor to be used as a return value.
+
+ // Disallowing the assignment operator.
+ PtNodeParams &operator=(PtNodeParams &ptNodeParams);
+
+ const int mHeadPos;
+ const PatriciaTrieReadingUtils::NodeFlags mFlags;
+ const int mParentPos;
+ const uint8_t mCodePointCount;
+ int mCodePoints[MAX_WORD_LENGTH];
+ const int mTerminalIdFieldPos;
+ const int mTerminalId;
+ const int mProbabilityFieldPos;
+ const int mProbability;
+ const int mChildrenPosFieldPos;
+ const int mChildrenPos;
+ const int mBigramLinkedNodePos;
+ const int mShortcutPos;
+ const int mBigramPos;
+ const int mSiblingPos;
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_PARAMS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
new file mode 100644
index 000000000..c6b2a8bed
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_READER_H
+#define LATINIME_PT_NODE_READER_H
+
+#include "defines.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode information.
+class PtNodeReader {
+ public:
+ virtual ~PtNodeReader() {}
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0;
+
+ protected:
+ PtNodeReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 8a84bd261..960c1b936 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -15,22 +15,22 @@
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
-void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- int nextPos = dicNode->getChildrenPos();
+ int nextPos = dicNode->getChildrenPtNodeArrayPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize);
@@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
-// it is possible to check for this with advantageous complexity. For each node, we search
+// it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
-// than the position we look for, and we have to descend the z node).
+// than the position we look for, and we have to descend the z PtNode).
/* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
@@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
- // only traverse nodes that are actually a part of the terminal we are searching, so each time
- // we enter this loop we are one depth level further than last time.
- // The only reason we count nodes is because we want to reduce the probability of infinite
+ // only traverse PtNodes that are actually a part of the terminal we are searching, so each
+ // time we enter this loop we are one depth level further than last time.
+ // The only reason we count PtNodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
@@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true;
} else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the
- // position we should descend THIS node. So we trick the lastCandidatePtNodePos
- // so that we will descend this PtNode, not the previous one.
+ // position we should descend THIS PtNode. So we trick the
+ // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
+ // one.
lastCandidatePtNodePos = startPos;
found = true;
} else {
@@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false;
}
} else {
- // Even if we don't have children here, we could still be on the last PtNode of /
+ // Even if we don't have children here, we could still be on the last PtNode of
// this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount);
@@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0;
}
-// This function gets the position of the terminal node of the exact matching word in the
+// This function gets the position of the terminal PtNode of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
-int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition();
int wordPos = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 0f8662aea..5d99632a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,28 +34,26 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ PatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
+ : mMmappedBuffer(mmappedBuffer),
+ mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
+ - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
- ~PatriciaTriePolicy() {
- delete mBuffer;
- }
-
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildNodes(const DicNode *const dicNode,
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(const int *const inWord,
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -124,7 +123,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- const MmappedBuffer *const mBuffer;
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const int mDictBufferSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
index 7df55815f..82b3593c8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
index 8420ee95a..8420ee95a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
index 5724c5d88..db4e86da1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -14,25 +14,25 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
bool DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
- bool isUselessPtNode = !node->isTerminal();
- if (node->isTerminal() && mIsDecayingDict) {
+ bool isUselessPtNode = !ptNodeParams->isTerminal();
+ if (ptNodeParams->isTerminal() && mIsDecayingDict) {
const int newProbability =
- ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
+ ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(),
mHeaderPolicy);
- int writingPos = node->getProbabilityFieldPos();
+ int writingPos = ptNodeParams->getProbabilityFieldPos();
// Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
mBuffer, newProbability, &writingPos)) {
@@ -44,9 +44,9 @@ bool DynamicPatriciaTrieGcEventListeners
}
if (mChildrenValue > 0) {
isUselessPtNode = false;
- } else if (node->isTerminal()) {
+ } else if (ptNodeParams->isTerminal()) {
// Remove children as all children are useless.
- int writingPos = node->getChildrenPosFieldPos();
+ int writingPos = ptNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
return false;
@@ -54,12 +54,12 @@ bool DynamicPatriciaTrieGcEventListeners
}
if (isUselessPtNode) {
// Current PtNode is no longer needed. Mark it as deleted.
- if (!mWritingHelper->markNodeAsDeleted(node)) {
+ if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) {
return false;
}
} else {
mValueStack.back() += 1;
- if (node->isTerminal()) {
+ if (ptNodeParams->isTerminal()) {
mValidUnigramCount += 1;
}
}
@@ -67,10 +67,9 @@ bool DynamicPatriciaTrieGcEventListeners
}
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (!node->isDeleted()) {
- int pos = node->getBigramsPos();
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isDeleted()) {
+ int pos = ptNodeParams->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
@@ -117,31 +116,29 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (node->isDeleted()) {
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isDeleted()) {
// Current PtNode is not written in new buffer because it has been deleted.
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), NOT_A_DICT_POS));
+ ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
return true;
}
int writingPos = mBufferToWrite->getTailPosition();
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), writingPos));
+ ptNodeParams->getHeadPos(), writingPos));
mValidPtNodeCount++;
// Writes current PtNode.
- return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
- node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
- node->getProbability(), &writingPos);
+ return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams,
+ ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(),
+ ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos);
}
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
// Updates parent position.
- int parentPos = node->getParentPos();
+ int parentPos = ptNodeParams->getParentPos();
if (parentPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
@@ -149,15 +146,16 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
parentPos = it->second;
}
}
- int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ int writingPos = ptNodeParams->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
// Write updated parent offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
- parentPos, node->getHeadPos(), &writingPos)) {
+ parentPos, ptNodeParams->getHeadPos(), &writingPos)) {
return false;
}
// Updates children position.
- int childrenPos = node->getChildrenPos();
+ int childrenPos = ptNodeParams->getChildrenPos();
if (childrenPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
@@ -165,14 +163,14 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
childrenPos = it->second;
}
}
- writingPos = node->getChildrenPosFieldPos();
+ writingPos = ptNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
childrenPos, &writingPos)) {
return false;
}
// Updates bigram target PtNode positions in the bigram list.
- int bigramsPos = node->getBigramsPos();
+ int bigramsPos = ptNodeParams->getBigramsPos();
if (bigramsPos != NOT_A_DICT_POS) {
int bigramEntryCount;
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
@@ -181,7 +179,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
}
mBigramCount += bigramEntryCount;
}
- if (node->isTerminal()) {
+ if (ptNodeParams->isTerminal()) {
mUnigramCount++;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
index 9755120b0..cfe3c145c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
@@ -21,15 +21,16 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/hash_map_compat.h"
namespace latinime {
class DictionaryHeaderStructurePolicy;
+class PtNodeParams;
class DynamicPatriciaTrieGcEventListeners {
public:
@@ -66,8 +67,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidUnigramCount() const {
return mValidUnigramCount;
@@ -101,8 +101,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;
@@ -133,8 +132,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail();
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
@@ -167,8 +165,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getUnigramCount() const {
return mUnigramCount;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..3393ce662
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ const int parentPos =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int probability = NOT_A_PROBABILITY;
+ int probabilityFieldPos = NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ probabilityFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ probabilityFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ int newBigramLinkedNodePos = bigramLinkedNodePos;
+ if (siblingNodePos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ newBigramLinkedNodePos = childrenPos;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ int shortcutsPos = NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ shortcutsPos = pos;
+ mShortcutsPolicy->skipAllShortcuts(&pos);
+ }
+ int bigramsPos = NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ bigramsPos = pos;
+ mBigramsPolicy->skipAllBigrams(&pos);
+ }
+ int newSiblingNodePos = siblingNodePos;
+ if (siblingNodePos == NOT_A_DICT_POS) {
+ // Sibling position is the tail position of current node.
+ newSiblingNodePos = pos;
+ }
+ // Read destination node if the read node is a moved node.
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
+ newBigramLinkedNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ probabilityFieldPos, probability, childrenPosFieldPos, childrenPos,
+ newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos);
+ }
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
new file mode 100644
index 000000000..b5abffeda
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
+ * node and reads node attributes.
+ */
+class DynamicPatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
+ mShortcutsPolicy(shortcutsPolicy) {}
+
+ ~DynamicPatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos, const int bigramLinkedNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
index 495b146c2..50882b3e9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
#include <cstdio>
#include <cstring>
@@ -23,11 +23,11 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -45,29 +45,32 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) {
- bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted();
+ const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
- isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY)
+ isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
!= NOT_A_PROBABILITY;
}
- childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
- nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal,
- nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
- nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
- readingHelper.readNextSiblingNode();
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(),
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ readingHelper.readNextSiblingNode(ptNodeParams);
}
}
@@ -77,29 +80,33 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
// This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order.
int reverseCodePoints[maxCodePointCount];
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
// First, read the terminal node and get its probability.
readingHelper.initWithPtNodePos(ptNodePos);
- if (!readingHelper.isValidTerminalNode()) {
+
+ const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams());
+ if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) {
// Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Store terminal node probability.
- *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
+ *outUnigramProbability = terminalPtNodeParams.getProbability();
// Then, following parent node link to the dictionary root and fetch node code points.
+ int totalCodePointCount = 0;
while (!readingHelper.isEnd()) {
- if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
+ const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
+ totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams);
+ if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
// The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
// Store node code points to buffer in the reverse order.
- readingHelper.fetchMergedNodeCodePointsInReverseOrder(
+ readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams,
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
// Follow parent node toward the root node.
- readingHelper.readParentNode();
+ readingHelper.readParentNode(ptNodeParams);
}
if (readingHelper.isError()) {
// The node position or the dictionary is invalid.
@@ -107,52 +114,54 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
return 0;
}
// Reverse the stored code points to output them.
- const int codePointCount = readingHelper.getTotalCodePointCount();
- for (int i = 0; i < codePointCount; ++i) {
- outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
+ for (int i = 0; i < totalCodePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
}
- return codePointCount;
+ return totalCodePointCount;
}
-int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length];
for (int i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
- if (readingHelper.getTotalCodePointCount() > length
- || !readingHelper.isMatchedCodePoint(0 /* index */,
+ if (readingHelper.getTotalCodePointCount(ptNodeParams) > length
+ || !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */,
searchCodePoints[matchedCodePointCount])) {
// Current node has too many code points or its first code point is different from
// target code point. Skip this node and read the next sibling node.
- readingHelper.readNextSiblingNode();
+ readingHelper.readNextSiblingNode(ptNodeParams);
continue;
}
// Check following merged node code points.
- const int nodeCodePointCount = nodeReader->getCodePointCount();
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) {
- if (!readingHelper.isMatchedCodePoint(
+ if (!readingHelper.isMatchedCodePoint(ptNodeParams,
j, searchCodePoints[matchedCodePointCount + j])) {
// Different code point is found. The given word is not included in the dictionary.
return NOT_A_DICT_POS;
}
}
// All characters are matched.
- if (length == readingHelper.getTotalCodePointCount()) {
+ if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) {
// Terminal position is found.
- return nodeReader->getHeadPos();
+ return ptNodeParams.getHeadPos();
}
- if (!nodeReader->hasChildren()) {
+ if (!ptNodeParams.hasChildren()) {
return NOT_A_DICT_POS;
}
// Advance to the children nodes.
- readingHelper.readChildNode();
+ readingHelper.readChildNode(ptNodeParams);
}
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
@@ -179,44 +188,38 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
- return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
- return nodeReader.getShortcutPos();
+ return ptNodeParams.getShortcutPos();
}
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
- return nodeReader.getBigramsPos();
+ return ptNodeParams.getBigramsPos();
}
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
@@ -225,8 +228,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
@@ -244,7 +246,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1, const int probability) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
@@ -253,12 +255,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
@@ -278,7 +280,7 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
return false;
}
@@ -287,12 +289,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
@@ -308,7 +310,7 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
}
void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return;
}
@@ -318,7 +320,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
}
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
@@ -334,7 +336,7 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
- if (!mBuffer->isUpdatable()) {
+ if (!mMmappedBuffer.get()->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
index be97ee1a5..7a81a9c0a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
@@ -22,7 +22,9 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -32,32 +34,33 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
- mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ DynamicPatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
+ : mMmappedBuffer(mmappedBuffer),
+ mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_3),
+ mBufferWithExtendableBuffer(mMmappedBuffer.get()->getBuffer()
+ + mHeaderPolicy.getSize(), mMmappedBuffer.get()->getBufferSize()
+ - mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer
+ ::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
+ mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
- ~DynamicPatriciaTriePolicy() {
- delete mBuffer;
- }
-
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildNodes(const DicNode *const dicNode,
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(const int *const inWord,
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -108,11 +111,12 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
- const MmappedBuffer *const mBuffer;
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
DynamicShortcutListPolicy mShortcutListPolicy;
DynamicBigramListPolicy mBigramListPolicy;
+ DynamicPatriciaTrieNodeReader mNodeReader;
int mUnigramCount;
int mBigramCount;
int mNeedsToDecayForTesting;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
index f108c219f..398ff21cf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
@@ -14,15 +14,17 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner.
@@ -37,22 +39,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
return false;
}
while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
if (!alreadyVisitedChildren) {
- if (mNodeReader.hasChildren()) {
+ if (ptNodeParams.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode();
+ readChildNode(ptNodeParams);
} else {
alreadyVisitedChildren = true;
}
} else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false;
}
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent.
@@ -101,10 +107,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
}
pushReadingStateToStack();
while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
if (alreadyVisitedAllPtNodesInArray) {
if (alreadyVisitedChildren) {
// Move to next sibling PtNode's children.
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
// Return to the parent PTNode.
if (!listener->onAscend()) {
@@ -120,13 +130,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
alreadyVisitedChildren = false;
}
} else {
- if (mNodeReader.hasChildren()) {
+ if (ptNodeParams.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode();
+ readChildNode(ptNodeParams);
// Push state to return the head of PtNode array.
pushReadingStateToStack();
alreadyVisitedAllPtNodesInArray = false;
@@ -136,10 +146,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
}
}
} else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false;
}
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
if (!listener->onReadingPtNodeArrayTail()) {
return false;
@@ -170,35 +180,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
+ mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
- mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
- dictBuf, &mReadingState.mPos);
+ mReadingState.mRemainingPtNodeCountInThisArray =
+ PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
+ &mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
// Count up nodes and node arrays to avoid infinite loop.
- mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
- mReadingState.mNodeArrayCount++;
- if (mReadingState.mNodeCount < 0
- || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
- || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ mReadingState.mTotalPtNodeIndexInThisArrayChain +=
+ mReadingState.mRemainingPtNodeCountInThisArray;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain++;
+ if (mReadingState.mRemainingPtNodeCountInThisArray < 0
+ || mReadingState.mTotalPtNodeIndexInThisArrayChain
+ > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mPtNodeArrayIndexInThisArrayChain
+ > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
- mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
- MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
- MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ mReadingState.mRemainingPtNodeCountInThisArray,
+ mReadingState.mTotalPtNodeIndexInThisArrayChain,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
+ mReadingState.mPtNodeArrayIndexInThisArrayChain,
+ MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false);
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- if (mReadingState.mNodeCount == 0) {
+ if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
// Empty node array. Try following forward link.
followForwardLink();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
index a71c06971..1e9218e58 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
@@ -21,9 +21,8 @@
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
namespace latinime {
@@ -35,6 +34,7 @@ class DictionaryShortcutsStructurePolicy;
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
*/
+// TODO: Move to pt_common.
class DynamicPatriciaTrieReadingHelper {
public:
class TraversingEventListener {
@@ -51,8 +51,7 @@ class DynamicPatriciaTrieReadingHelper {
virtual bool onReadingPtNodeArrayTail() = 0;
// Returns whether the event handling was succeeded or not.
- virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) = 0;
+ virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
protected:
TraversingEventListener() {};
@@ -62,10 +61,9 @@ class DynamicPatriciaTrieReadingHelper {
};
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ const PtNodeReader *const ptNodeReader)
: mIsError(false), mReadingState(), mBuffer(buffer),
- mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
+ mPtNodeReader(ptNodeReader), mReadingStateStack() {}
~DynamicPatriciaTrieReadingHelper() {}
@@ -84,15 +82,12 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodeArrayPos;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear();
nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
}
}
@@ -103,94 +98,88 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodePos;
- mReadingState.mNodeCount = 1;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
- fetchPtNodeInfo();
}
}
- AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
- return &mNodeReader;
+ AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
+ if (isEnd()) {
+ return PtNodeParams();
+ }
+ return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos);
}
- AK_FORCE_INLINE bool isValidTerminalNode() const {
- return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
+ AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
+ return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
}
- AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
- return mMergedNodeCodePoints[index] == codePoint;
+ AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
+ const int codePoint) const {
+ return ptNodeParams.getCodePoints()[index] == codePoint;
}
// Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount;
+ return mReadingState.mTotalCodePointCountSinceInitialization;
}
// Return code point count include the last read node's code points.
- AK_FORCE_INLINE int getTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
+ AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
+ return mReadingState.mTotalCodePointCountSinceInitialization
+ + ptNodeParams.getCodePointCount();
}
- AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
const int index, int *const outCodePoints) const {
- const int nodeCodePointCount = mNodeReader.getCodePointCount();
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ const int *const nodeCodePoints = ptNodeParams.getCodePoints();
for (int i = 0; i < nodeCodePointCount; ++i) {
- outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
+ outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
}
}
- AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
- return mMergedNodeCodePoints;
- }
-
- AK_FORCE_INLINE void readNextSiblingNode() {
- mReadingState.mNodeCount -= 1;
- mReadingState.mPos = mNodeReader.getSiblingNodePos();
- if (mReadingState.mNodeCount <= 0) {
+ AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
+ mReadingState.mRemainingPtNodeCountInThisArray -= 1;
+ mReadingState.mPos = ptNodeParams.getSiblingNodePos();
+ if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
// All nodes in the current node array have been read.
followForwardLink();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- } else {
- fetchPtNodeInfo();
}
}
// Read the first child node of the current node.
- AK_FORCE_INLINE void readChildNode() {
- if (mNodeReader.hasChildren()) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
- mReadingState.mPos = mNodeReader.getChildrenPos();
+ AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.hasChildren()) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPos = ptNodeParams.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
}
// Read the parent node of the current node.
- AK_FORCE_INLINE void readParentNode() {
- if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
- mReadingState.mNodeCount = 1;
- mReadingState.mPos = mNodeReader.getParentPos();
+ AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mPos = ptNodeParams.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
- fetchPtNodeInfo();
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
@@ -201,13 +190,7 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
- return mReadingState.mPosOfLastPtNodeArrayHead;
- }
-
- AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
+ return mReadingState.mPosOfThisPtNodeArrayHead;
}
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
@@ -218,50 +201,45 @@ class DynamicPatriciaTrieReadingHelper {
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
- class ReadingState {
+ // This class encapsulates the reading state of a position in the dictionary. It points at a
+ // specific PtNode in the dictionary.
+ class PtNodeReadingState {
public:
// Note that copy constructor and assignment operator are used for this class to use
// std::vector.
- ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
- mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
- mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
+ PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
+ mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
+ mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
int mPos;
- // Node count of a node array.
- int mNodeCount;
- int mPrevTotalCodePointCount;
- int mTotalNodeCount;
- int mNodeArrayCount;
+ // Remaining node count in the current array.
+ int mRemainingPtNodeCountInThisArray;
+ int mTotalCodePointCountSinceInitialization;
+ // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
+ int mTotalPtNodeIndexInThisArrayChain;
+ // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
+ // PtNode arrays.
+ int mPtNodeArrayIndexInThisArrayChain;
int mPosOfLastForwardLinkField;
- int mPosOfLastPtNodeArrayHead;
+ int mPosOfThisPtNodeArrayHead;
};
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
- static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
// TODO: Introduce error code to track what caused the error.
bool mIsError;
- ReadingState mReadingState;
+ PtNodeReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
- DynamicPatriciaTrieNodeReader mNodeReader;
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- std::vector<ReadingState> mReadingStateStack;
+ const PtNodeReader *const mPtNodeReader;
+ std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray();
void followForwardLink();
- AK_FORCE_INLINE void fetchPtNodeInfo() {
- mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
- MAX_WORD_LENGTH, mMergedNodeCodePoints);
- if (mNodeReader.getCodePointCount() <= 0) {
- // Empty node is not allowed.
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- }
- }
-
AK_FORCE_INLINE void pushReadingStateToStack() {
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
@@ -279,9 +257,6 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
}
}
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp
index d68446db6..e94925365 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h
index 67c3cc57e..67c3cc57e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
index 052558bfc..05caaebac 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -41,24 +41,26 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
- if (!readingHelper->isMatchedCodePoint(0 /* index */,
+ if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
wordCodePoints[matchedCodePointCount])) {
// The first code point is different from target code point. Skip this node and read
// the next sibling node.
- readingHelper->readNextSiblingNode();
+ readingHelper->readNextSiblingNode(ptNodeParams);
continue;
}
// Check following merged node code points.
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
- const int nodeCodePointCount = nodeReader->getCodePointCount();
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) {
const int nextIndex = matchedCodePointCount + j;
- if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true;
- return reallocatePtNodeAndAddNewPtNodes(nodeReader,
- readingHelper->getMergedNodeCodePoints(), j,
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
probability),
wordCodePoints + matchedCodePointCount,
@@ -66,20 +68,19 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
}
}
// All characters are matched.
- if (codePointCount == readingHelper->getTotalCodePointCount()) {
- return setPtNodeProbability(nodeReader, probability,
- readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
+ return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
}
- if (!nodeReader->hasChildren()) {
+ if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true;
- return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- wordCodePoints + readingHelper->getTotalCodePointCount(),
- codePointCount - readingHelper->getTotalCodePointCount());
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
}
// Advance to the children nodes.
- parentPos = nodeReader->getHeadPos();
- readingHelper->readChildNode();
+ parentPos = ptNodeParams.getHeadPos();
+ readingHelper->readChildNode(ptNodeParams);
}
if (readingHelper->isError()) {
// The dictionary is invalid.
@@ -95,26 +96,24 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
const int probability, bool *const outAddedNewBigram) {
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
- mMergedNodeCodePoints);
+ const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
// Move node to add bigram entry.
const int newNodePos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
+ if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) {
return false;
}
int writingPos = newNodePos;
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
- mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
- &writingPos)) {
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(),
+ ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
+ ptNodeParams.getProbability(), &writingPos)) {
return false;
}
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
- if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
+ const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos));
+ if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list.
- int bigramListPos = nodeReader.getBigramsPos();
+ int bigramListPos = newPtNodeParams.getBigramsPos();
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
outAddedNewBigram);
} else {
@@ -126,10 +125,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
}
// Then, Mark as the PtNode having bigram list in the flags.
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
- nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
- nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
- nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
+ PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
+ newPtNodeParams.isNotAWord(),
+ newPtNodeParams.getProbability() != NOT_A_PROBABILITY,
+ newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
+ newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
writingPos = newNodePos;
// Write updated flags into the moved PtNode's flags field.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
@@ -140,16 +140,17 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
// Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
- if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
+ const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
+ if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
- return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
+ return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos);
}
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
@@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const fileName, const HeaderPolicy *const headerPolicy) {
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
- MAX_DICTIONARY_SIZE);
+ BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
int unigramCount = 0;
int bigramCount = 0;
if (mNeedsToDecay) {
@@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return;
}
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
return;
@@ -180,8 +181,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
- const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
- int pos = nodeToUpdate->getHeadPos();
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
@@ -193,16 +194,16 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
true /* isDeleted */);
- int writingPos = nodeToUpdate->getHeadPos();
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos,
const int bigramLinkedNodePos) {
- int pos = originalNode->getHeadPos();
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
@@ -214,7 +215,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
- int writingPos = originalNode->getHeadPos();
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) {
@@ -222,31 +223,32 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
}
// Update moved position, which is stored in the parent offset field.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
+ mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
- int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
return false;
}
- if (originalNode->hasChildren()) {
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
// Update children's parent position.
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
+ readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
while (!readingHelper.isEnd()) {
- int parentOffsetFieldPos = nodeReader->getHeadPos()
+ const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
+ mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
&parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
}
- readingHelper.readNextSiblingNode();
+ readingHelper.readNextSiblingNode(childPtNodeParams);
}
}
return true;
@@ -332,13 +334,13 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const PtNodeParams *const originalPtNodeParams, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
- originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
- originalNode->getChildrenPos(), originalNode->getBigramsPos(),
- originalNode->getShortcutPos(), writingPos);
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(),
+ originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability,
+ originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(),
+ originalPtNodeParams->getShortcutPos(), writingPos);
}
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
@@ -354,14 +356,14 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const
}
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
- const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
- const int *const codePoints, bool *const outAddedNewUnigram) {
- if (originalPtNode->isTerminal()) {
+ const PtNodeParams *const originalPtNodeParams, const int probability,
+ bool *const outAddedNewUnigram) {
+ if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability.
*outAddedNewUnigram = false;
- const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
- probability);
- int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
+ const int probabilityToWrite = getUpdatedProbability(
+ originalPtNodeParams->getProbability(), probability);
+ int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
probabilityToWrite, &probabilityFieldPos)) {
return false;
@@ -370,11 +372,12 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
// Make the node terminal and write the probability.
*outAddedNewUnigram = true;
int movedPos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
+ if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) {
return false;
}
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
- originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams,
+ originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(),
+ originalPtNodeParams->getCodePointCount(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
&movedPos)) {
return false;
@@ -384,15 +387,15 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
}
bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
+ const PtNodeParams *const parentPtNodeParams, const int probability,
const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
- int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
+ int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, &childrenPosFieldPos)) {
return false;
}
- return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, probability);
}
@@ -417,8 +420,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child.
@@ -434,8 +436,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
- if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
- reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
+ if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(),
+ reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability,
&writingPos)) {
return false;
}
@@ -448,11 +450,11 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams,
firstPartOfReallocatedPtNodePos,
- reallocatingPtNodeCodePoints + overlappingCodePointCount,
- reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
- reallocatingPtNode->getProbability(), &writingPos)) {
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNodeParams->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
@@ -467,16 +469,17 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
- // Update original reallocatingPtNode as moved.
- if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
+ // Update original reallocating PtNode as moved.
+ if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
secondPartOfReallocatedPtNodePos)) {
return false;
}
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
+ const PtNodeParams ptNodeParams(
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
// Update children position.
- int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
+ int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
actualChildrenPos, &childrenPosFieldPos)) {
return false;
@@ -487,7 +490,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
int *const outUnigramCount, int *const outBigramCount) {
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
@@ -529,9 +533,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay);
- // Create reading helper for the GCed dictionary.
- DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
+ // Create reading node reader and reading helper for the GCed dictionary.
+ DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy,
&newDictShortcutPolicy);
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h
index ca8664729..5614cb3ac 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h
@@ -26,11 +26,12 @@ namespace latinime {
class BufferWithExtendableBuffer;
class DynamicBigramListPolicy;
-class DynamicPatriciaTrieNodeReader;
class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy;
class HeaderPolicy;
+class PtNodeParams;
+// TODO: Make it independent from a particular format and move to pt_common.
class DynamicPatriciaTrieWritingHelper {
public:
typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
@@ -77,12 +78,12 @@ class DynamicPatriciaTrieWritingHelper {
// CAVEAT: This method must be called only from inner classes of
// DynamicPatriciaTrieGcEventListeners.
- bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
+ bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
// CAVEAT: This method must be called only from this class or inner classes of
// DynamicPatriciaTrieGcEventListeners.
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const PtNodeParams *const originalPtNodeParams, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos);
@@ -96,7 +97,7 @@ class DynamicPatriciaTrieWritingHelper {
DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecay;
- bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
+ bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos);
bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
@@ -112,19 +113,17 @@ class DynamicPatriciaTrieWritingHelper {
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
- bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
- const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
+ bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
+ bool *const outAddedNewUnigram);
- bool createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount);
+ bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
+ const int probability, const int *const codePoints, const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability);
bool reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp
index 30ff10cd6..67733660b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include <cstddef>
#include <cstdlib>
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h
index af76bc6b5..5654105ee 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h
@@ -20,7 +20,7 @@
#include <cstddef>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
new file mode 100644
index 000000000..0c2f47073
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_CONTENT_H
+#define LATINIME_DICT_CONTENT_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class DictContent {
+ public:
+ virtual ~DictContent() {}
+ virtual bool isValid() const = 0;
+
+ protected:
+ DictContent() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictContent);
+};
+} // namespace latinime
+#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
new file mode 100644
index 000000000..e415881a9
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SINGLE_DICT_CONTENT_H
+#define LATINIME_SINGLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class SingleDictContent : public DictContent {
+ public:
+ SingleDictContent(const char *const dictDirPath, const char *const contentFileName,
+ const bool isUpdatable)
+ : mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
+ mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
+ mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+
+ virtual ~SingleDictContent() {}
+
+ virtual bool isValid() const {
+ return mMmappedBuffer.get() != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
new file mode 100644
index 000000000..446b51ed0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent : public DictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(const char *const dictDirPath,
+ const char *const lookupTableFileName, const char *const addressTableFileName,
+ const char *const contentFileName, const bool isUpdatable)
+ : mLookupTableBuffer(
+ MmappedBuffer::openBuffer(dictDirPath, lookupTableFileName, isUpdatable)),
+ mAddressTableBuffer(
+ MmappedBuffer::openBuffer(dictDirPath, addressTableFileName, isUpdatable)),
+ mContentBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
+ mExpandableLookupTableBuffer(
+ mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBuffer() : 0,
+ mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBuffer() : 0,
+ mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0,
+ mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ virtual bool isValid() const {
+ return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
+ && mContentBuffer.get() != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ // TODO: Have sparse table.
+ const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mContentBuffer;
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
new file mode 100644
index 000000000..1164c408a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_BUFFER_H
+#define LATINIME_VER4_DICT_BUFFER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class Ver4DictBuffers {
+ public:
+ typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
+ const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
+ && mTerminalAddressTable.isValid() && mBigramDictContent.isValid()
+ && mShortcutDictContent.isValid();
+ }
+
+ AK_FORCE_INLINE const uint8_t *getRawDictBuffer() const {
+ return mDictBuffer.get()->getBuffer();
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
+
+ AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
+ : mDictBuffer(dictBuffer),
+ mTerminalAddressTable(dictDirPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
+ mProbabilityDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
+ isUpdatable),
+ mBigramDictContent(dictDirPath,
+ Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable),
+ mShortcutDictContent(dictDirPath,
+ Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable) {}
+
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ SingleDictContent mTerminalAddressTable;
+ SingleDictContent mProbabilityDictContent;
+ SparseTableDictContent mBigramDictContent;
+ SparseTableDictContent mShortcutDictContent;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
new file mode 100644
index 000000000..0bfd07b04
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
+const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
+// tat = Terminal Address Table
+const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
+const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
+const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
+const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
+const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
+ ".shortcut_index_shortcut";
+
+const int Ver4DictConstants::NOT_A_TERMINAL = -1;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
new file mode 100644
index 000000000..6498ce428
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_CONSTANTS_H
+#define LATINIME_VER4_DICT_CONSTANTS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const TRIE_FILE_EXTENSION;
+ static const char *const FREQ_FILE_EXTENSION;
+ static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_FILE_EXTENSION;
+ static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_FILE_EXTENSION;
+ static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+
+ static const int NOT_A_TERMINAL;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
new file mode 100644
index 000000000..b9ee4891c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+
+namespace latinime {
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ // TODO: Implement.
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ // TODO: Implement.
+ return 0;
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ // TODO: Implement.
+ return NOT_A_DICT_POS;
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ // TODO: Implement.
+ return NOT_A_PROBABILITY;
+}
+
+int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ // TODO: Implement.
+ return NOT_A_PROBABILITY;
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ // TODO: Implement.
+ return NOT_A_DICT_POS;
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ // TODO: Implement.
+ return NOT_A_DICT_POS;
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
+ const int probability) {
+ // TODO: Implement.
+ return false;
+}
+
+bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1, const int probability) {
+ // TODO: Implement.
+ return false;
+}
+
+bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) {
+ // TODO: Implement.
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ // TODO: Implement.
+}
+
+void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ // TODO: Implement.
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ // TODO: Implement.
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) {
+ // TODO: Implement.
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
new file mode 100644
index 000000000..f7bfb3b0d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+// TODO: Implement.
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
+ : mBuffers(buffers),
+ mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return 0;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return 0;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability);
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability);
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1);
+
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy mHeaderPolicy;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f692882f2..5032131ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,7 +18,7 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 9dc34823c..1e27a1bec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,12 +32,20 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
+ static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
+
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
- const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
+ const int maxAdditionalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+ // Without original buffer.
+ BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
+ : mOriginalBuffer(0), mOriginalBufferSize(0),
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
@@ -86,7 +94,6 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 994826fa8..b48e5b005 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -20,7 +20,7 @@
#include <cstring>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
@@ -33,6 +33,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
switch (dictVersion) {
case 3:
return createEmptyV3DictFile(filePath, attributeMap);
+ case 4:
+ // TODO: Support version 4 dictionary format.
+ return false;
default:
// Only version 3 dictionary is supported for now.
return false;
@@ -41,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
0 /* extendedRegionSize */);
- BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer bodyBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
new file mode 100644
index 000000000..59b894fa6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FILE_UTILS_H
+#define LATINIME_FILE_UTILS_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class FileUtils {
+ public:
+ // Returns -1 on error.
+ static int getFileSize(const char *const filePath) {
+ const int fd = open(filePath, O_RDONLY);
+ if (fd == -1) {
+ return -1;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return static_cast<int>(statBuf.st_size);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_FILE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 1d77d5c27..4843650ad 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -45,6 +45,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
return VERSION_2;
} else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
return VERSION_3;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 4) {
+ return VERSION_4;
} else {
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 79ed0de29..b90393a53 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,6 +31,7 @@ class FormatUtils {
enum FORMAT_VERSION {
VERSION_2,
VERSION_3,
+ VERSION_4,
UNKNOWN_VERSION
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
new file mode 100644
index 000000000..71f863290
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const int bufferOffset, const int bufferSize,
+ const bool isUpdatable) {
+ const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
+ const int mmapFd = open(path, openMode);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return MmappedBufferPtr(0);
+ }
+ const int pagesize = getpagesize();
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return MmappedBufferPtr(0);
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return MmappedBufferPtr(0);
+ }
+ return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
+ mmapFd, isUpdatable));
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const bool isUpdatable) {
+ const int fileSize = FileUtils::getFileSize(path);
+ if (fileSize == -1) {
+ return MmappedBufferPtr(0);
+ } else if (fileSize == 0) {
+ return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
+ } else {
+ return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
+ }
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const dirPath, const char *const fileName, const bool isUpdatable) {
+ const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
+ char filePath[filePathBufferSize];
+ const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
+ fileName);
+ if (filePathLength >= filePathBufferSize) {
+ return 0;
+ }
+ return openBuffer(filePath, isUpdatable);
+}
+
+MmappedBuffer::~MmappedBuffer() {
+ if (mAlignedSize == 0) {
+ return;
+ }
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6b69116eb..73a733b0c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -17,58 +17,27 @@
#ifndef LATINIME_MMAPPED_BUFFER_H
#define LATINIME_MMAPPED_BUFFER_H
-#include <cerrno>
-#include <fcntl.h>
#include <stdint.h>
-#include <sys/mman.h>
-#include <unistd.h>
#include "defines.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
- const int bufferSize, const bool isUpdatable) {
- const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
- const int mmapFd = open(path, openMode);
- if (mmapFd < 0) {
- AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
- return 0;
- }
- const int pagesize = getpagesize();
- const int offset = bufferOffset % pagesize;
- int alignedOffset = bufferOffset - offset;
- int alignedSize = bufferSize + offset;
- const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
- void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
- alignedOffset);
- if (mmappedBuffer == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
- close(mmapFd);
- return 0;
- }
- uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
- if (!buffer) {
- AKLOGE("DICT: buffer is null");
- close(mmapFd);
- return 0;
- }
- return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
- isUpdatable);
- }
+ typedef ExclusiveOwnershipPointer<MmappedBuffer> MmappedBufferPtr;
- ~MmappedBuffer() {
- int ret = munmap(mMmappedBuffer, mAlignedSize);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
- }
- ret = close(mMmapFd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
- }
- }
+ static MmappedBufferPtr openBuffer(const char *const path,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable);
+
+ // Mmap entire file.
+ static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable);
+
+ static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName,
+ const bool isUpdatable);
+
+ ~MmappedBuffer();
AK_FORCE_INLINE uint8_t *getBuffer() const {
return mBuffer;
@@ -89,6 +58,11 @@ class MmappedBuffer {
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+ // Empty file. We have to handle an empty file as a valid part of a dictionary.
+ AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
+ : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
+ mIsUpdatable(isUpdatable) {}
+
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 007c19e0a..fd0ac9eb6 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int point0Index = dicNode->getInputIndex(0);
- return dicNode->isTerminalWordNode()
+ return dicNode->isTerminalDicNode()
&& traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index);
}
@@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) {
return false;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return false;
}
const int16_t pointIndex = dicNode->getInputIndex(0);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 5b6b5e874..54f65c786 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -23,39 +23,64 @@ namespace latinime {
const TypingWeighting TypingWeighting::sInstance;
-ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
+ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
switch (correctionType) {
case CT_MATCH:
if (isProximityDicNode(traverseSession, dicNode)) {
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ } else if (dicNode->isInDigraph()) {
+ return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
} else {
- return ET_NOT_AN_ERROR;
+ // Compare the node code point with original primary code point on the keyboard.
+ const ProximityInfoState *const pInfoState =
+ traverseSession->getProximityInfoState(0);
+ const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ dicNode->getInputIndex(0));
+ const int nodeCodePoint = dicNode->getNodeCodePoint();
+ if (primaryOriginalCodePoint == nodeCodePoint) {
+ // Node code point is same as original code point on the keyboard.
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ CharUtils::toLowerCase(nodeCodePoint)) {
+ // Only cases of the code points are different.
+ return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
+ CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Node code point is a variant of original code point.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
+ } else {
+ // Node code point is a variant of original code point and the cases are also
+ // different.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
+ | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ }
}
+ break;
case CT_ADDITIONAL_PROXIMITY:
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
case CT_OMISSION:
if (parentDicNode->canBeIntentionalOmission()) {
- return ET_INTENTIONAL_OMISSION;
+ return ErrorTypeUtils::INTENTIONAL_OMISSION;
} else {
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
}
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
- return ET_NEW_WORD;
+ return ErrorTypeUtils::NEW_WORD;
case CT_TERMINAL:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
case CT_COMPLETION:
- return ET_COMPLETION;
+ return ErrorTypeUtils::COMPLETION;
default:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
}
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 9f0a331e3..41314ef52 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -204,7 +205,7 @@ class TypingWeighting : public Weighting {
return cost * traverseSession->getMultiWordCostMultiplier();
}
- ErrorType getErrorType(const CorrectionType correctionType,
+ ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const;
diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h
new file mode 100644
index 000000000..3cf78954a
--- /dev/null
+++ b/native/jni/src/utils/exclusive_ownership_pointer.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
+#define LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+template<class T>
+class ExclusiveOwnershipPointer {
+ public:
+ // This instance become an owner of the raw pointer.
+ ExclusiveOwnershipPointer(T *const rawPointer)
+ : mPointer(rawPointer),
+ mSharedOwnerPtr(new (ExclusiveOwnershipPointer<T> *)(this)) {}
+
+ // Move the ownership.
+ ExclusiveOwnershipPointer(const ExclusiveOwnershipPointer<T> &pointer)
+ : mPointer(pointer.mPointer), mSharedOwnerPtr(pointer.mSharedOwnerPtr) {
+ transferOwnership(&pointer);
+ }
+
+ ~ExclusiveOwnershipPointer() {
+ deletePointersIfHavingOwnership();
+ }
+
+ // Move the ownership.
+ ExclusiveOwnershipPointer<T> &operator=(const ExclusiveOwnershipPointer<T> &pointer) {
+ // Delete pointers when this is an owner of another pointer.
+ deletePointersIfHavingOwnership();
+ mPointer = pointer.mPointer;
+ mSharedOwnerPtr = pointer.mSharedOwnerPtr;
+ transferOwnership(pointer);
+ return *this;
+ }
+
+ T *get() const {
+ return mPointer;
+ }
+
+ private:
+ // This class allows to copy and assign and ensures only one instance has the ownership of the
+ // managed pointer.
+
+ ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {}
+
+ void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
+ if (*mSharedOwnerPtr != src) {
+ AKLOGE("Failed to transfer the ownership because src is not the current owner."
+ "src: %p, owner: %p", src, *mSharedOwnerPtr);
+ ASSERT(false);
+ return;
+ }
+ // Transfer the ownership from src to this instance.
+ *mSharedOwnerPtr = this;
+ }
+
+ void deletePointersIfHavingOwnership() {
+ if (mSharedOwnerPtr && *mSharedOwnerPtr == this) {
+ if (mPointer) {
+ if (DEBUG_DICT) {
+ AKLOGI("Releasing pointer: %p", mPointer);
+ }
+ delete mPointer;
+ }
+ delete mSharedOwnerPtr;
+ }
+ }
+
+ T *mPointer;
+ // mSharedOwnerPtr points a shared memory space where the instance which has the ownership is
+ // stored.
+ ExclusiveOwnershipPointer<T> **mSharedOwnerPtr;
+};
+} // namespace latinime
+#endif /* LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H */