diff options
Diffstat (limited to 'native/jni/src/correction.h')
-rw-r--r-- | native/jni/src/correction.h | 195 |
1 files changed, 114 insertions, 81 deletions
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 3300a8491..f016d5453 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -17,11 +17,13 @@ #ifndef LATINIME_CORRECTION_H #define LATINIME_CORRECTION_H -#include <assert.h> +#include <cassert> +#include <cstring> // for memset() #include <stdint.h> -#include "correction_state.h" +#include "correction_state.h" #include "defines.h" +#include "proximity_info_state.h" namespace latinime { @@ -37,10 +39,108 @@ class Correction { NOT_ON_TERMINAL } CorrectionType; + Correction() + : mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false), + mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0), + mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0), + mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0), + mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0), + mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0), + mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false), + mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false), + mSkipping(false), mProximityInfoState() { + memset(mWord, 0, sizeof(mWord)); + memset(mDistances, 0, sizeof(mDistances)); + memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable)); + // NOTE: mCorrectionStates is an array of instances. + // No need to initialize it explicitly here. + } + + virtual ~Correction() {} + void resetCorrection(); + void initCorrection( + const ProximityInfo *pi, const int inputSize, const int maxWordLength); + void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); + + // TODO: remove + void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, + const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, + const bool doAutoCompletion, const int maxErrors); + void checkState(); + bool sameAsTyped(); + bool initProcessState(const int index); + + int getInputIndex() const; + + bool needsToPrune() const; + + int pushAndGetTotalTraverseCount() { + return ++mTotalTraverseCount; + } + + int getFreqForSplitMultipleWords( + const int *freqArray, const int *wordLengthArray, const int wordCount, + const bool isSpaceProximity, const unsigned short *word); + int getFinalProbability(const int probability, unsigned short **word, int *wordLength); + int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, + int *wordLength, const int inputSize); + + CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); + + ///////////////////////// + // Tree helper methods + int goDownTree(const int parentIndex, const int childCount, const int firstChildPos); + + inline int getTreeSiblingPos(const int index) const { + return mCorrectionStates[index].mSiblingPos; + } + + inline void setTreeSiblingPos(const int index, const int pos) { + mCorrectionStates[index].mSiblingPos = pos; + } + + inline int getTreeParentIndex(const int index) const { + return mCorrectionStates[index].mParentIndex; + } + + class RankingAlgorithm { + public: + static int calculateFinalProbability(const int inputIndex, const int depth, + const int probability, int *editDistanceTable, const Correction *correction, + const int inputSize); + static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, + const int wordCount, const Correction *correction, const bool isSpaceProximity, + const unsigned short *word); + static float calcNormalizedScore(const unsigned short *before, const int beforeLength, + const unsigned short *after, const int afterLength, const int score); + static int editDistance(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength); + private: + static const int CODE_SPACE = ' '; + static const int MAX_INITIAL_SCORE = 255; + }; + + // proximity info state + void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes, + const int inputSize, const int *xCoordinates, const int *yCoordinates) { + mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH, + proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); + } + + const unsigned short *getPrimaryInputWord() const { + return mProximityInfoState.getPrimaryInputWord(); + } + + unsigned short getPrimaryCharAt(const int index) const { + return mProximityInfoState.getPrimaryCharAt(index); + } + + private: + DISALLOW_COPY_AND_ASSIGN(Correction); + ///////////////////////// // static inline utils // ///////////////////////// - static const int TWO_31ST_DIV_255 = S_INT_MAX / 255; static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) { return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX); @@ -93,112 +193,45 @@ class Correction { } } - Correction(const int typedLetterMultiplier, const int fullWordMultiplier); - void resetCorrection(); - void initCorrection( - const ProximityInfo *pi, const int inputLength, const int maxWordLength); - void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll); - - // TODO: remove - void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, - const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, - const bool doAutoCompletion, const int maxErrors); - void checkState(); - bool initProcessState(const int index); - - int getInputIndex(); - - virtual ~Correction(); - int getSpaceProximityPos() const { + inline int getSpaceProximityPos() const { return mSpaceProximityPos; } - int getMissingSpacePos() const { + inline int getMissingSpacePos() const { return mMissingSpacePos; } - int getSkipPos() const { + inline int getSkipPos() const { return mSkipPos; } - int getExcessivePos() const { + inline int getExcessivePos() const { return mExcessivePos; } - int getTransposedPos() const { + inline int getTransposedPos() const { return mTransposedPos; } - bool needsToPrune() const; - - int pushAndGetTotalTraverseCount() { - return ++mTotalTraverseCount; - } - - int getFreqForSplitMultipleWords( - const int *freqArray, const int *wordLengthArray, const int wordCount, - const bool isSpaceProximity, const unsigned short *word); - int getFinalProbability(const int probability, unsigned short **word, int* wordLength); - int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int* wordLength, const int inputLength); - - CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); - - ///////////////////////// - // Tree helper methods - int goDownTree(const int parentIndex, const int childCount, const int firstChildPos); - - inline int getTreeSiblingPos(const int index) const { - return mCorrectionStates[index].mSiblingPos; - } - - inline void setTreeSiblingPos(const int index, const int pos) { - mCorrectionStates[index].mSiblingPos = pos; - } - - inline int getTreeParentIndex(const int index) const { - return mCorrectionStates[index].mParentIndex; - } - - class RankingAlgorithm { - public: - static int calculateFinalProbability(const int inputIndex, const int depth, - const int probability, int *editDistanceTable, const Correction* correction, - const int inputLength); - static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const Correction* correction, const bool isSpaceProximity, - const unsigned short *word); - static float calcNormalizedScore(const unsigned short* before, const int beforeLength, - const unsigned short* after, const int afterLength, const int score); - static int editDistance(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength); - private: - static const int CODE_SPACE = ' '; - static const int MAX_INITIAL_SCORE = 255; - static const int TYPED_LETTER_MULTIPLIER = 2; - static const int FULL_WORD_MULTIPLIER = 2; - }; - - private: inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); - inline bool isQuote(const unsigned short c); + inline bool isSingleQuote(const unsigned short c); inline CorrectionType processSkipChar( const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline int getFinalProbabilityInternal(const int probability, unsigned short **word, - int* wordLength, const int inputLength); + int *wordLength, const int inputSize); - const int TYPED_LETTER_MULTIPLIER; - const int FULL_WORD_MULTIPLIER; + static const int TYPED_LETTER_MULTIPLIER = 2; + static const int FULL_WORD_MULTIPLIER = 2; const ProximityInfo *mProximityInfo; bool mUseFullEditDistance; bool mDoAutoCompletion; int mMaxEditDistance; int mMaxDepth; - int mInputLength; + int mInputSize; int mSpaceProximityPos; int mMissingSpacePos; int mTerminalInputIndex; @@ -240,7 +273,7 @@ class Correction { bool mExceeding; bool mTransposing; bool mSkipping; - + ProximityInfoState mProximityInfoState; }; } // namespace latinime #endif // LATINIME_CORRECTION_H |