diff options
author | 2013-12-13 17:09:16 +0900 | |
---|---|---|
committer | 2013-12-13 17:13:32 +0900 | |
commit | 2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc (patch) | |
tree | 56a5652edf71dd19d04161f72e3e013608cc2a9c /native/jni/src/suggest/core | |
parent | 18d033405c18a8dc28f60ca22d1d0df23a679384 (diff) | |
download | latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.tar.gz latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.tar.xz latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.zip |
Reset to 9bd6dac4708ad94fd0257c53e977df62b152e20c
The bulk merge from -bayo to klp-dev should not have been merged to master.
Change-Id: I527a03a76f5247e4939a672f27c314dc11cbb854
Diffstat (limited to 'native/jni/src/suggest/core')
30 files changed, 612 insertions, 319 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 49cfdecac..0b2b4a9e8 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -99,7 +99,7 @@ class DicNode { virtual ~DicNode() {} // Init for copy - void initByCopy(const DicNode *dicNode) { + void initByCopy(const DicNode *const dicNode) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init(&dicNode->mDicNodeProperties); @@ -107,25 +107,25 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // Init for root with prevWordNodePos which is used for bigram - void initAsRoot(const int rootGroupPos, const int prevWordNodePos) { + // Init for root with prevWordPtNodePos which is used for bigram + void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); - mDicNodeState.init(prevWordNodePos); + mDicNodeState.init(prevWordPtNodePos); PROF_NODE_RESET(mProfiler); } // Init for root with previous word - void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { + void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -138,7 +138,7 @@ class DicNode { mDicNodeState.mDicNodeStatePrevWord.init( dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1, dicNode->mDicNodeProperties.getProbability(), - dicNode->mDicNodeProperties.getPos(), + dicNode->mDicNodeProperties.getPtNodePos(), dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord, dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), @@ -148,26 +148,27 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - void initAsPassingChild(DicNode *parentNode) { + void initAsPassingChild(DicNode *parentDicNode) { mIsUsed = true; - mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; - const int c = parentNode->getNodeTypedCodePoint(); - mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); - mDicNodeState.init(&parentNode->mDicNodeState); - PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); + mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; + const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); + mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); + mDicNodeState.init(&parentDicNode->mDicNodeState); + PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); } - void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void initAsChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability, - isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth); + mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], + probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, + newLeavingDepth); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -234,7 +235,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -246,29 +247,30 @@ class DicNode { } // Used to get bigram probability in DicNodeUtils - int getPos() const { - return mDicNodeProperties.getPos(); + int getPtNodePos() const { + return mDicNodeProperties.getPtNodePos(); } // Used to get bigram probability in DicNodeUtils - int getPrevWordPos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordTerminalPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } // Used in DicNodeUtils - int getChildrenPos() const { - return mDicNodeProperties.getChildrenPos(); + int getChildrenPtNodeArrayPos() const { + return mDicNodeProperties.getChildrenPtNodeArrayPos(); } int getProbability() const { return mDicNodeProperties.getProbability(); } - AK_FORCE_INLINE bool isTerminalWordNode() const { - const bool isTerminalNodes = mDicNodeProperties.isTerminal(); - const int currentNodeDepth = getNodeCodePointCount(); - const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth(); - return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; + AK_FORCE_INLINE bool isTerminalDicNode() const { + const bool isTerminalPtNode = mDicNodeProperties.isTerminal(); + const int currentDicNodeDepth = getNodeCodePointCount(); + const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth(); + return isTerminalPtNode && currentDicNodeDepth > 0 + && currentDicNodeDepth == terminalDicNodeDepth; } bool shouldBeFilteredBySafetyNetForBigram() const { @@ -374,8 +376,8 @@ class DicNode { } // Used to commit input partially - int getPrevWordNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } AK_FORCE_INLINE const int *getOutputWordBuf() const { @@ -410,7 +412,7 @@ class DicNode { // TODO: Remove once touch path is merged into ProximityInfoState // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph. int getNodeCodePoint() const { - const int codePoint = mDicNodeProperties.getNodeCodePoint(); + const int codePoint = mDicNodeProperties.getDicNodeCodePoint(); const DigraphUtils::DigraphCodePointIndex digraphIndex = mDicNodeState.mDicNodeStateScoring.getDigraphIndex(); if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) { @@ -423,8 +425,8 @@ class DicNode { // Utils for cost calculation // //////////////////////////////// AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const { - return mDicNodeProperties.getNodeCodePoint() - == dicNode->mDicNodeProperties.getNodeCodePoint(); + return mDicNodeProperties.getDicNodeCodePoint() + == dicNode->mDicNodeProperties.getDicNodeCodePoint(); } // TODO: remove @@ -574,7 +576,8 @@ class DicNode { // Caveat: Must not be called outside Weighting // This restriction is guaranteed by "friend" AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost, - const bool doNormalization, const int inputSize, const ErrorType errorType) { + const bool doNormalization, const int inputSize, + const ErrorTypeUtils::ErrorType errorType) { if (DEBUG_GEO_FULL) { LOGI_SHOW_ADD_COST_PROP; } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index ec65114c7..5540b6df5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -22,7 +22,6 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "utils/char_utils.h" namespace latinime { @@ -32,19 +31,20 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); + const int prevWordPtNodePos, DicNode *const newRootDicNode) { + newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *const prevWordLastNode, DicNode *const newRootNode) { - newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { + newRootDicNode->initAsRootWithPreviousWord( + prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); } -/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { - destNode->initByCopy(srcNode); +/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, + DicNode *const destDicNode) { + destDicNode->initByCopy(srcDicNode); } /////////////////////////////////// @@ -52,14 +52,14 @@ namespace latinime { /////////////////////////////////// /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNodeVector *childDicNodes) { + DicNodeVector *const childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { return; } if (!dicNode->isLeavingNode()) { childDicNodes->pushPassingChild(dicNode); } else { - dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); + dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); } } @@ -71,11 +71,11 @@ namespace latinime { */ /* static */ float DicNodeUtils::getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) @@ -85,19 +85,19 @@ namespace latinime { /* static */ int DicNodeUtils::getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - const int unigramProbability = node->getProbability(); - const int wordPos = node->getPos(); - const int prevWordPos = node->getPrevWordPos(); - if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + const int unigramProbability = dicNode->getProbability(); + const int ptNodePos = dicNode->getPtNodePos(); + const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); + if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, - wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, + prevWordTerminalPtNodePos, ptNodePos, unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); @@ -109,7 +109,7 @@ namespace latinime { // TODO: Move to char_utils? /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, int *dest) { + const int *const src1, const int16_t length1, int *const dest) { int actualLength0 = 0; for (int i = 0; i < length0; ++i) { if (src0[i] == 0) { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 3fb351a61..3f1514a52 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -31,20 +31,20 @@ class MultiBigramMap; class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, - const int16_t length1, int *dest); + const int16_t length1, int *const dest); static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *newRootNode); + const int prevWordPtNodePos, DicNode *const newRootDicNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *prevWordLastNode, DicNode *newRootNode); - static void initByCopy(DicNode *srcNode, DicNode *destNode); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); + static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); static void getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *childDicNodes); static float getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *const multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); @@ -53,7 +53,7 @@ class DicNodeUtils { static int getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 42addae8d..9364e7751 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -62,14 +62,14 @@ class DicNodeVector { mDicNodes.back().initAsPassingChild(dicNode); } - void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal, - hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability, + isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 9e0f62ceb..c41a7243a 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -24,15 +24,14 @@ namespace latinime { /** - * Node for traversing the lexicon trie. + * PtNode information related to the DicNode from the lexicon trie. */ -// TODO: Introduce a dictionary node class which has attribute members required to understand the -// dictionary structure. class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false), - mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} + : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), + mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), + mDepth(0), mLeavingDepth(0) {} virtual ~DicNodeProperties() {} @@ -40,57 +39,57 @@ class DicNodeProperties { void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth) { - mPos = pos; - mChildrenPos = childrenPos; - mNodeCodePoint = nodeCodePoint; + mPtNodePos = pos; + mChildrenPtNodeArrayPos = childrenPos; + mDicNodeCodePoint = nodeCodePoint; mProbability = probability; mIsTerminal = isTerminal; - mHasChildren = hasChildren; + mHasChildrenPtNodes = hasChildren; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; } // Init for copy - void init(const DicNodeProperties *const nodeProp) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = nodeProp->mNodeCodePoint; - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth; - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth; + mLeavingDepth = dicNodeProp->mLeavingDepth; } // Init as passing child - void init(const DicNodeProperties *const nodeProp, const int codePoint) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp, const int codePoint) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child + mLeavingDepth = dicNodeProp->mLeavingDepth; } - int getPos() const { - return mPos; + int getPtNodePos() const { + return mPtNodePos; } - int getChildrenPos() const { - return mChildrenPos; + int getChildrenPtNodeArrayPos() const { + return mChildrenPtNodeArrayPos; } int getProbability() const { return mProbability; } - int getNodeCodePoint() const { - return mNodeCodePoint; + int getDicNodeCodePoint() const { + return mDicNodeCodePoint; } uint16_t getDepth() const { @@ -107,7 +106,7 @@ class DicNodeProperties { } bool hasChildren() const { - return mHasChildren || mDepth != mLeavingDepth; + return mHasChildrenPtNodes || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { @@ -118,12 +117,12 @@ class DicNodeProperties { // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - int mPos; - int mChildrenPos; + int mPtNodePos; + int mChildrenPtNodeArrayPos; int mProbability; - int mNodeCodePoint; + int mDicNodeCodePoint; bool mIsTerminal; - bool mHasChildren; + bool mHasChildrenPtNodes; bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index b8986203d..dba57056b 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -30,7 +30,7 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { + mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); } @@ -41,7 +41,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -50,7 +50,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -60,7 +60,7 @@ class DicNodeStatePrevWord { mPrevWordCount = prevWord->mPrevWordCount; mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; - mPrevWordNodePos = prevWord->mPrevWordNodePos; + mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos; mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); } @@ -71,7 +71,7 @@ class DicNodeStatePrevWord { const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; int twoWordsLen = DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); if (twoWordsLen >= MAX_WORD_LENGTH) { @@ -116,8 +116,8 @@ class DicNodeStatePrevWord { return mPrevWordStart; } - int getPrevWordNodePos() const { - return mPrevWordNodePos; + int getPrevWordPtNodePos() const { + return mPrevWordPtNodePos; } int getPrevWordCodePointAt(const int id) const { @@ -147,7 +147,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordLength; int16_t mPrevWordStart; int16_t mPrevWordProbability; - int mPrevWordNodePos; + int mPrevWordPtNodePos; int mSecondWordFirstInputIndex; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index 3c85d0e9d..74f9eee92 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/dictionary/digraph_utils.h" +#include "suggest/core/dictionary/error_type_utils.h" namespace latinime { @@ -31,7 +32,7 @@ class DicNodeStateScoring { mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX), mEditCorrectionCount(0), mProximityCorrectionCount(0), mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f), - mRawLength(0.0f), mExactMatch(true), + mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR), mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) { } @@ -47,7 +48,7 @@ class DicNodeStateScoring { mDoubleLetterLevel = NOT_A_DOUBLE_LETTER; mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX; mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING; - mExactMatch = true; + mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR; } AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) { @@ -59,34 +60,21 @@ class DicNodeStateScoring { mRawLength = scoring->mRawLength; mDoubleLetterLevel = scoring->mDoubleLetterLevel; mDigraphIndex = scoring->mDigraphIndex; - mExactMatch = scoring->mExactMatch; + mContainingErrorTypes = scoring->mContainingErrorTypes; mNormalizedCompoundDistanceAfterFirstWord = scoring->mNormalizedCompoundDistanceAfterFirstWord; } void addCost(const float spatialCost, const float languageCost, const bool doNormalization, - const int inputSize, const int totalInputIndex, const ErrorType errorType) { + const int inputSize, const int totalInputIndex, + const ErrorTypeUtils::ErrorType errorType) { addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex); - switch (errorType) { - case ET_EDIT_CORRECTION: - ++mEditCorrectionCount; - mExactMatch = false; - break; - case ET_PROXIMITY_CORRECTION: - ++mProximityCorrectionCount; - mExactMatch = false; - break; - case ET_COMPLETION: - mExactMatch = false; - break; - case ET_NEW_WORD: - mExactMatch = false; - break; - case ET_INTENTIONAL_OMISSION: - mExactMatch = false; - break; - case ET_NOT_AN_ERROR: - break; + mContainingErrorTypes = mContainingErrorTypes | errorType; + if (ErrorTypeUtils::isEditCorrectionError(errorType)) { + ++mEditCorrectionCount; + } + if (ErrorTypeUtils::isProximityCorrectionError(errorType)) { + ++mProximityCorrectionCount; } } @@ -182,7 +170,7 @@ class DicNodeStateScoring { } bool isExactMatch() const { - return mExactMatch; + return ErrorTypeUtils::isExactMatch(mContainingErrorTypes); } private: @@ -199,7 +187,8 @@ class DicNodeStateScoring { float mSpatialDistance; float mLanguageDistance; float mRawLength; - bool mExactMatch; + // All accumulated error types so far + ErrorTypeUtils::ErrorType mContainingErrorTypes; float mNormalizedCompoundDistanceAfterFirstWord; AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance, diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 71f4ef6ea..2a62b555b 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); @@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; - int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; @@ -163,7 +163,8 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPos) { + if (bigramsIt.getBigramPos() == nextWordPos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { return mDictionaryStructurePolicy->getProbability( mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), bigramsIt.getProbability()); diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h index 5205456a8..5f9700486 100644 --- a/native/jni/src/suggest/core/dictionary/bloom_filter.h +++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h @@ -50,6 +50,8 @@ class BloomFilter { } private: + DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter); + // Size, in bytes, of the bloom filter index for bigrams // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, // where k is the number of hash functions, n the number of bigrams, and m the number of diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 59ead1894..e68c0a6d8 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -21,46 +21,39 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" #include "utils/log_utils.h" +#include "utils/time_keeper.h" namespace latinime { const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; -Dictionary::Dictionary(JNIEnv *env, - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) +Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr + &dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), - mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), + mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { logDictionaryInfo(env); } -Dictionary::~Dictionary() { - delete mBigramDictionary; - delete mGestureSuggest; - delete mTypingSuggest; - delete mDictionaryStructureWithBufferPolicy; -} - int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const { + TimeKeeper::setCurrentTime(); int result = 0; if (suggestOptions->isGesture()) { DicTraverseSession::initSessionInstance( traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); - result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, + result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); if (DEBUG_DICT) { @@ -70,7 +63,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } else { DicTraverseSession::initSessionInstance( traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); - result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, + result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); @@ -83,12 +76,15 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const { + TimeKeeper::setCurrentTime(); if (length <= 0) return 0; - return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); + return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies, + outputTypes); } int Dictionary::getProbability(const int *word, int length) const { - int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, + TimeKeeper::setCurrentTime(); + int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; @@ -98,39 +94,60 @@ int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, int length1) const { - return mBigramDictionary->getBigramProbability(word0, length0, word1, length1); + TimeKeeper::setCurrentTime(); + return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1); } -void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { - mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability); +void Dictionary::addUnigramWord(const int *const word, const int length, const int probability, + const int *const shortcutTargetCodePoints, const int shortcutLength, + const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, + const int timestamp) { + TimeKeeper::setCurrentTime(); + mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability, + shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord, + isBlacklisted, timestamp); } void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability) { - mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1, - probability); + const int length1, const int probability, const int timestamp) { + TimeKeeper::setCurrentTime(); + mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1, + probability, timestamp); } void Dictionary::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); + TimeKeeper::setCurrentTime(); + mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1); } void Dictionary::flush(const char *const filePath) { - mDictionaryStructureWithBufferPolicy->flush(filePath); + TimeKeeper::setCurrentTime(); + mDictionaryStructureWithBufferPolicy.get()->flush(filePath); } void Dictionary::flushWithGC(const char *const filePath) { - mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); + TimeKeeper::setCurrentTime(); + mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath); } bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { - return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC); } -void Dictionary::getProperty(const char *const query, char *const outResult, +void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult, const int maxResultLength) { - return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, queryLength, outResult, + maxResultLength); +} + +const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints, + const int codePointCount) { + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty( + codePoints, codePointCount); } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 0195d5bf0..b37b4aa18 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -21,15 +21,20 @@ #include "defines.h" #include "jni.h" +#include "suggest/core/dictionary/bigram_dictionary.h" +#include "suggest/core/dictionary/unigram_property.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/core/suggest_interface.h" +#include "utils/exclusive_ownership_pointer.h" namespace latinime { -class BigramDictionary; class DictionaryStructureWithBufferPolicy; class DicTraverseSession; class ProximityInfo; -class SuggestInterface; class SuggestOptions; +class UnigramProperty; class Dictionary { public: @@ -53,8 +58,8 @@ class Dictionary { static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000; - Dictionary(JNIEnv *env, - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy); + Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr + &dictionaryStructureWithBufferPolicy); int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, @@ -69,10 +74,13 @@ class Dictionary { int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; - void addUnigramWord(const int *const word, const int length, const int probability); + void addUnigramWord(const int *const word, const int length, const int probability, + const int *const shortcutTargetCodePoints, const int shortcutLength, + const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, + const int timestamp); void addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability); + const int length1, const int probability, const int timestamp); void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); @@ -83,24 +91,33 @@ class Dictionary { bool needsToRunGC(const bool mindsBlockByGC); - void getProperty(const char *const query, char *const outResult, + void getProperty(const char *const query, const int queryLength, char *const outResult, const int maxResultLength); + const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount); + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { - return mDictionaryStructureWithBufferPolicy; + return mDictionaryStructureWithBufferPolicy.get(); } - virtual ~Dictionary(); + int getFormatVersionNumber() const { + return mDictionaryStructureWithBufferPolicy.get()->getHeaderStructurePolicy() + ->getFormatVersionNumber(); + } private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); + typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr; + typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr; + static const int HEADER_ATTRIBUTE_BUFFER_SIZE; - DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; - const BigramDictionary *const mBigramDictionary; - const SuggestInterface *const mGestureSuggest; - const SuggestInterface *const mTypingSuggest; + const DictionaryStructureWithBufferPolicy::StructurePolicyPtr + mDictionaryStructureWithBufferPolicy; + const BigramDictionaryPtr mBigramDictionary; + const SuggestInterfacePtr mGestureSuggest; + const SuggestInterfacePtr mTypingSuggest; void logDictionaryInfo(JNIEnv *const env) const; }; diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp index 3271c1bfb..5f9b8f3e2 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp @@ -28,11 +28,8 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS -const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] = - { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE - { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = - { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; + { DIGRAPH_TYPE_GERMAN_UMLAUT }; /* static */ bool DigraphUtils::hasDigraphForCodePoint( const DictionaryHeaderStructurePolicy *const headerPolicy, @@ -50,9 +47,6 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = if (headerPolicy->requiresGermanUmlautProcessing()) { return DIGRAPH_TYPE_GERMAN_UMLAUT; } - if (headerPolicy->requiresFrenchLigatureProcessing()) { - return DIGRAPH_TYPE_FRENCH_LIGATURES; - } return DIGRAPH_TYPE_NONE; } @@ -86,10 +80,6 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = *digraphs = GERMAN_UMLAUT_DIGRAPHS; return NELEMS(GERMAN_UMLAUT_DIGRAPHS); } - if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) { - *digraphs = FRENCH_LIGATURES_DIGRAPHS; - return NELEMS(FRENCH_LIGATURES_DIGRAPHS); - } return 0; } diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h index 6ae16e390..bec2cd6e2 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.h +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h @@ -34,7 +34,6 @@ class DigraphUtils { typedef enum { DIGRAPH_TYPE_NONE, DIGRAPH_TYPE_GERMAN_UMLAUT, - DIGRAPH_TYPE_FRENCH_LIGATURES } DigraphType; typedef struct { int first; int second; int compositeGlyph; } digraph_t; @@ -55,7 +54,6 @@ class DigraphUtils { const DigraphType digraphType, const int compositeGlyphCodePoint); static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; - static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; static const DigraphType USED_DIGRAPH_TYPES[]; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp new file mode 100644 index 000000000..0635fef7e --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/dictionary/error_type_utils.h" + +namespace latinime { + +const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80; + +const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = + NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH; + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h new file mode 100644 index 000000000..ab4a65e48 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_ERROR_TYPE_UTILS_H +#define LATINIME_ERROR_TYPE_UTILS_H + +#include <stdint.h> + +#include "defines.h" + +namespace latinime { + +class ErrorTypeUtils { + public: + // ErrorType is mainly decided by CorrectionType but it is also depending on if + // the correction has really been performed or not. + typedef uint32_t ErrorType; + + static const ErrorType NOT_AN_ERROR; + static const ErrorType MATCH_WITH_CASE_ERROR; + static const ErrorType MATCH_WITH_ACCENT_ERROR; + static const ErrorType MATCH_WITH_DIGRAPH; + // Treat error as an intentional omission when the CorrectionType is omission and the node can + // be intentional omission. + static const ErrorType INTENTIONAL_OMISSION; + // Substitution, omission and transposition + static const ErrorType EDIT_CORRECTION; + // Proximity error + static const ErrorType PROXIMITY_CORRECTION; + // Completion + static const ErrorType COMPLETION; + // New word + // TODO: Remove. + // A new word error should be an edit correction error or a proximity correction error. + static const ErrorType NEW_WORD; + + // TODO: Differentiate errors. + static bool isExactMatch(const ErrorType containingErrors) { + return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0; + } + + static bool isEditCorrectionError(const ErrorType errorType) { + return (errorType & EDIT_CORRECTION) != 0; + } + + static bool isProximityCorrectionError(const ErrorType errorType) { + return (errorType & PROXIMITY_CORRECTION) != 0; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils); + + static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH; +}; +} // namespace latinime +#endif // LATINIME_ERROR_TYPE_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index b1d2f4b4d..49d82e69a 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -30,4 +30,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25; // Most common previous word contexts currently have 100 bigrams const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100; +// Look up the bigram probability for the given word pair from the cached bigram maps. +// Also caches the bigrams if there is space remaining and they have not been cached already. +int MultiBigramMap::getBigramProbability( + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int wordPosition, const int nextWordPosition, const int unigramProbability) { + hash_map_compat<int, BigramMap>::const_iterator mapPosition = + mBigramMaps.find(wordPosition); + if (mapPosition != mBigramMaps.end()) { + return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, + unigramProbability); + } + if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { + addBigramsForWordPosition(structurePolicy, wordPosition); + return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, + nextWordPosition, unigramProbability); + } + return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, + nextWordPosition, unigramProbability); +} + +void MultiBigramMap::BigramMap::init( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { + const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { + continue; + } + mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); + mBloomFilter.setInFilter(bigramsIt.getBigramPos()); + } +} + +int MultiBigramMap::BigramMap::getBigramProbability( + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int nextWordPosition, const int unigramProbability) const { + int bigramProbability = NOT_A_PROBABILITY; + if (mBloomFilter.isInFilter(nextWordPosition)) { + const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = + mBigramMap.find(nextWordPosition); + if (bigramProbabilityIt != mBigramMap.end()) { + bigramProbability = bigramProbabilityIt->second; + } + } + return structurePolicy->getProbability(unigramProbability, bigramProbability); +} + +void MultiBigramMap::addBigramsForWordPosition( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { + mBigramMaps[position].init(structurePolicy, position); +} + +int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, + const int nextWordPosition, const int unigramProbability) { + int bigramProbability = NOT_A_PROBABILITY; + const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == nextWordPosition) { + bigramProbability = bigramsIt.getProbability(); + break; + } + } + return structurePolicy->getProbability(unigramProbability, bigramProbability); +} + } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 4633c07b0..421b2681c 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -38,21 +38,7 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int wordPosition, const int nextWordPosition, const int unigramProbability) { - hash_map_compat<int, BigramMap>::const_iterator mapPosition = - mBigramMaps.find(wordPosition); - if (mapPosition != mBigramMaps.end()) { - return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, - unigramProbability); - } - if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { - addBigramsForWordPosition(structurePolicy, wordPosition); - return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, - nextWordPosition, unigramProbability); - } - return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, - nextWordPosition, unigramProbability); - } + const int wordPosition, const int nextWordPosition, const int unigramProbability); void clear() { mBigramMaps.clear(); @@ -67,33 +53,11 @@ class MultiBigramMap { ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nodePos) { - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); - mBloomFilter.setInFilter(bigramsIt.getBigramPos()); - } - } + const int nodePos); - AK_FORCE_INLINE int getBigramProbability( + int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nextWordPosition, const int unigramProbability) const { - int bigramProbability = NOT_A_PROBABILITY; - if (mBloomFilter.isInFilter(nextWordPosition)) { - const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = - mBigramMap.find(nextWordPosition); - if (bigramProbabilityIt != mBigramMap.end()) { - bigramProbability = bigramProbabilityIt->second; - } - } - return structurePolicy->getProbability(unigramProbability, bigramProbability); - } + const int nextWordPosition, const int unigramProbability) const; private: // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default @@ -103,27 +67,12 @@ class MultiBigramMap { BloomFilter mBloomFilter; }; - AK_FORCE_INLINE void addBigramsForWordPosition( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { - mBigramMaps[position].init(structurePolicy, position); - } + void addBigramsForWordPosition( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position); - AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( + int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, - const int nextWordPosition, const int unigramProbability) { - int bigramProbability = NOT_A_PROBABILITY; - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPosition) { - bigramProbability = bigramsIt.getProbability(); - break; - } - } - return structurePolicy->getProbability(unigramProbability, bigramProbability); - } + const int nextWordPosition, const int unigramProbability); static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; hash_map_compat<int, BigramMap> mBigramMaps; diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.cpp b/native/jni/src/suggest/core/dictionary/unigram_property.cpp new file mode 100644 index 000000000..16bbb69d8 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/unigram_property.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/dictionary/unigram_property.h" + +namespace latinime { + +void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, + jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo, + jobject outShortcutTargets, jobject outShortcutProbabilities) const { + env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints); + jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts}; + env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); + env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability); + int historicalInfo[] = {mTimestamp, mLevel, mCount}; + env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo), + historicalInfo); + + jclass integerClass = env->FindClass("java/lang/Integer"); + jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V"); + jclass arrayListClass = env->FindClass("java/util/ArrayList"); + jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); + const int shortcutTargetCount = mShortcutTargets.size(); + for (int i = 0; i < shortcutTargetCount; ++i) { + jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size()); + env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */, + mShortcutTargets[i].size(), &mShortcutTargets[i][0]); + env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); + env->DeleteLocalRef(shortcutTargetCodePointArray); + jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, + mShortcutProbabilities[i]); + env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability); + env->DeleteLocalRef(integerProbability); + } + env->DeleteLocalRef(integerClass); + env->DeleteLocalRef(arrayListClass); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.h b/native/jni/src/suggest/core/dictionary/unigram_property.h new file mode 100644 index 000000000..c4ebb86ab --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/unigram_property.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_UNIGRAM_PROPERTY_H +#define LATINIME_UNIGRAM_PROPERTY_H + +#include <cstring> +#include <vector> + +#include "defines.h" +#include "jni.h" + +namespace latinime { + +// This class is used for returning information belonging to a unigram to java side. +class UnigramProperty { + public: + // Invalid unigram. + UnigramProperty() + : mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false), + mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY), + mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {} + + UnigramProperty(const UnigramProperty &unigramProperty) + : mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount), + mIsNotAWord(unigramProperty.mIsNotAWord), + mIsBlacklisted(unigramProperty.mIsBlacklisted), + mHasBigrams(unigramProperty.mHasBigrams), + mHasShortcuts(unigramProperty.mHasShortcuts), + mProbability(unigramProperty.mProbability), + mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel), + mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets), + mShortcutProbabilities(unigramProperty.mShortcutProbabilities) { + memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints)); + } + + UnigramProperty(const int *const codePoints, const int codePointCount, + const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams, + const bool hasShortcuts, const int probability, const int timestamp, + const int level, const int count, + const std::vector<std::vector<int> > *const shortcutTargets, + const std::vector<int> *const shortcutProbabilities) + : mCodePoints(), mCodePointCount(codePointCount), + mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams), + mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp), + mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets), + mShortcutProbabilities(*shortcutProbabilities) { + memcpy(mCodePoints, codePoints, sizeof(mCodePoints)); + } + + void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, + jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets, + jobject outShortcutProbabilities) const; + + private: + DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty); + + int mCodePoints[MAX_WORD_LENGTH]; + int mCodePointCount; + bool mIsNotAWord; + bool mIsBlacklisted; + bool mHasBigrams; + bool mHasShortcuts; + int mProbability; + // Historical information + int mTimestamp; + int mLevel; + int mCount; + // Shortcut + std::vector<std::vector<int> > mShortcutTargets; + std::vector<int> mShortcutProbabilities; +}; +} // namespace latinime +#endif // LATINIME_UNIGRAM_PROPERTY_H diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp index e64476d82..ee8e59ef9 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info.cpp @@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, && sweetSpotCenterYs && sweetSpotRadii), mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE /* proximityCharsLength */]), - mCodeToKeyMap() { + mLowerCodePointToKeyMap() { /* Let's check the input array length here to make sure */ const jsize proximityCharsLength = env->GetArrayLength(proximityChars); if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) { @@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const { if (keyIndex < 0 || keyIndex >= KEY_COUNT) { return NOT_A_CODE_POINT; } - return mKeyIndexToCodePointG[keyIndex]; + return mKeyIndexToLowerCodePointG[keyIndex]; +} + +int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const { + if (keyIndex < 0 || keyIndex >= KEY_COUNT) { + return NOT_A_CODE_POINT; + } + return mKeyIndexToOriginalCodePoint[keyIndex]; } void ProximityInfo::initializeG() { @@ -164,8 +171,9 @@ void ProximityInfo::initializeG() { const float gapY = sweetSpotCenterY - mCenterYsG[i]; mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale); } - mCodeToKeyMap[lowerCode] = i; - mKeyIndexToCodePointG[i] = lowerCode; + mLowerCodePointToKeyMap[lowerCode] = i; + mKeyIndexToOriginalCodePoint[i] = code; + mKeyIndexToLowerCodePointG[i] = lowerCode; } for (int i = 0; i < KEY_COUNT; i++) { mKeyKeyDistancesG[i][i] = 0; diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h index f25949001..a91b9d674 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.h +++ b/native/jni/src/suggest/core/layout/proximity_info.h @@ -39,6 +39,7 @@ class ProximityInfo { float getNormalizedSquaredDistanceFromCenterFloatG( const int keyId, const int x, const int y, const bool isGeometric) const; int getCodePointOf(const int keyIndex) const; + int getOriginalCodePointOf(const int keyIndex) const; bool hasSweetSpotData(const int keyIndex) const { // When there are no calibration data for a key, // the radius of the key is assigned to zero. @@ -76,11 +77,11 @@ class ProximityInfo { ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates, inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights, mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH, - KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes); + KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes); } AK_FORCE_INLINE int getKeyIndexOf(const int c) const { - return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap); + return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap); } AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const { @@ -117,9 +118,9 @@ class ProximityInfo { // Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates. float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; - hash_map_compat<int, int> mCodeToKeyMap; - - int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; + hash_map_compat<int, int> mLowerCodePointToKeyMap; + int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD]; + int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD]; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index fbabd92f2..bb4b41714 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -30,6 +30,12 @@ namespace latinime { +int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const { + const int primaryCodePoint = getPrimaryCodePointAt(index); + const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint); + return mProximityInfo->getOriginalCodePointOf(keyIndex); +} + // TODO: Remove the dependency of "isGeometric" void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index c94060fa9..e941e43d8 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -65,6 +65,8 @@ class ProximityInfoState { return getProximityCodePointsAt(index)[0]; } + int getPrimaryOriginalCodePointAt(const int index) const; + inline bool sameAsTyped(const int *word, int length) const { if (length != mSampledInputSize) { return false; diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h index 5492c6070..b76b13971 100644 --- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -29,12 +29,10 @@ class DictionaryHeaderStructurePolicy { public: virtual ~DictionaryHeaderStructurePolicy() {} - virtual bool supportsDynamicUpdate() const = 0; + virtual int getFormatVersionNumber() const = 0; virtual bool requiresGermanUmlautProcessing() const = 0; - virtual bool requiresFrenchLigatureProcessing() const = 0; - virtual float getMultiWordCostMultiplier() const = 0; virtual int getLastDecayedTime() const = 0; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 41f82049f..c74a4ebbe 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -18,6 +18,8 @@ #define LATINIME_DICTIONARY_STRUCTURE_POLICY_H #include "defines.h" +#include "suggest/core/dictionary/unigram_property.h" +#include "utils/exclusive_ownership_pointer.h" namespace latinime { @@ -28,23 +30,25 @@ class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; /* - * This class abstracts structure of dictionaries. + * This class abstracts the structure of dictionaries. * Implement this policy to support additional dictionaries. */ class DictionaryStructureWithBufferPolicy { public: + typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePolicyPtr; + virtual ~DictionaryStructureWithBufferPolicy() {} virtual int getRootPosition() const = 0; - virtual void createAndGetAllChildNodes(const DicNode *const dicNode, + virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalNodePositionOfWord(const int *const inWord, + virtual int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; virtual int getProbability(const int unigramProbability, @@ -64,11 +68,13 @@ class DictionaryStructureWithBufferPolicy { // Returns whether the update was success or not. virtual bool addUnigramWord(const int *const word, const int length, - const int probability) = 0; + const int probability, const int *const shortcutTargetCodePoints, + const int shortcutLength, const int shortcutProbability, const bool isNotAWord, + const bool isBlacklisted,const int timestamp) = 0; // Returns whether the update was success or not. virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability) = 0; + const int length1, const int probability, const int timestamp) = 0; // Returns whether the update was success or not. virtual bool removeBigramWords(const int *const word0, const int length0, @@ -82,9 +88,13 @@ class DictionaryStructureWithBufferPolicy { // Currently, this method is used only for testing. You may want to consider creating new // dedicated method instead of this if you want to use this in the production. - virtual void getProperty(const char *const query, char *const outResult, + virtual void getProperty(const char *const query, const int queryLength, char *const outResult, const int maxResultLength) = 0; + // Used for testing. + virtual const UnigramProperty getUnigramProperty(const int *const codePonts, + const int codePointCount) const = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index 0c4016893..c202b81fe 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -20,6 +20,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_profiler.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/session/dic_traverse_session.h" namespace latinime { @@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n traverseSession, parentDicNode, dicNode, &inputStateG); const float languageCost = Weighting::getLanguageCost(weighting, correctionType, traverseSession, parentDicNode, dicNode, multiBigramMap); - const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession, - parentDicNode, dicNode); + const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType, + traverseSession, parentDicNode, dicNode); profile(correctionType, dicNode); if (inputStateG.mNeedsToUpdateInputStateG) { dicNode->updateInputIndexG(&inputStateG); diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h index 2d49e98a6..bd6b3cf41 100644 --- a/native/jni/src/suggest/core/policy/weighting.h +++ b/native/jni/src/suggest/core/policy/weighting.h @@ -18,6 +18,7 @@ #define LATINIME_WEIGHTING_H #include "defines.h" +#include "suggest/core/dictionary/error_type_utils.h" namespace latinime { @@ -84,7 +85,7 @@ class Weighting { virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const = 0; - virtual ErrorType getErrorType(const CorrectionType correctionType, + virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 50f2bbd8d..5070491f4 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPos == NOT_A_DICT_POS) { + if (mPrevWordPtNodePos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index e0b1c67d9..6e4dda44d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -59,7 +59,7 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), + : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { @@ -86,11 +86,9 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPos() const { return mPrevWordPos; } + int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; } // TODO: REMOVE - void setPrevWordPos(int pos) { mPrevWordPos = pos; } - // TODO: Use proper parameter when changed - int getDicRootPos() const { return 0; } + void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -119,26 +117,13 @@ class DicTraverseSession { return true; } - void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const { - for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { - if (!mProximityInfoStates[i].isUsed()) { - continue; - } - const int pointerId = node->getInputIndex(i); - const std::vector<int> *const searchKeyVector = - mProximityInfoStates[i].getSearchKeyVector(pointerId); - outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(), - searchKeyVector->end()); - } - } - - ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const { + ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const { ProximityType proximityType = UNRELATED_CHAR; for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { if (!mProximityInfoStates[i].isUsed()) { continue; } - const int pointerId = node->getInputIndex(i); + const int pointerId = dicNode->getInputIndex(i); proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint); ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR); // TODO: Make this more generic @@ -192,7 +177,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordPos; + int mPrevWordPtNodePos; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 73ccebc88..f84c84181 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Continue suggestion after partial commit. DicNode *topDicNode = traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); - traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); + traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos()); traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->setPartiallyCommited(); } @@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), - traverseSession->getPrevWordPos(), &rootNode); + traverseSession->getPrevWordPtNodePos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } } @@ -231,12 +231,15 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen BinaryDictionaryShortcutIterator shortcutIt( traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy() - ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); + ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); + const int shortcutBaseScore = SCORING->doesAutoCorrectValidWord() ? + SCORING->calculateFinalScore(compoundDistance, traverseSession->getInputSize(), + true /* forceCommit */) : finalScore; const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, - outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, + outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( traverseSession->getProximityInfoState(0)); @@ -421,15 +424,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } break; case UNRELATED_CHAR: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; default: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; } } - // Push the node for look-ahead correction + // Push the dicNode for look-ahead correction if (allowsErrorCorrections && canDoLookAheadCorrection) { traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); } @@ -442,7 +445,7 @@ void Suggest::processTerminalDicNode( if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { return; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return; } if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { @@ -463,7 +466,7 @@ void Suggest::processTerminalDicNode( /** * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word - * (by the space omission error correction) search path if input dicNode is on a terminal node. + * (by the space omission error correction) search path if input dicNode is on a terminal. */ void Suggest::processExpandedDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { @@ -505,7 +508,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, processExpandedDicNode(traverseSession, childDicNode); } -// Process the node codepoint as a digraph. This means that composite glyphs like the German +// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, @@ -518,7 +521,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, /** * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * matches for all possible next letters. Note that just skipping the current letter without any - * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check + * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check * the possible *next* letters after the omission to better limit search to plausible omissions. * Note that apostrophes are handled as omissions. */ @@ -605,7 +608,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, } /** - * Weight child node by aligning it to the key + * Weight child dicNode by aligning it to the key */ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int inputSize = traverseSession->getInputSize(); |