aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/correction_state.cpp
diff options
context:
space:
mode:
authorsatok <satok@google.com>2011-08-04 18:31:57 +0900
committersatok <satok@google.com>2011-08-05 17:24:56 +0900
commit8876b75ca1c218949539dcc2fb6c88a19da9e3f8 (patch)
tree039ce458df8d2733e071fc52e826790884b2ce7d /native/src/correction_state.cpp
parent2e2906bc1793c0389d9d921bded04fb1de252ab6 (diff)
downloadlatinime-8876b75ca1c218949539dcc2fb6c88a19da9e3f8.tar.gz
latinime-8876b75ca1c218949539dcc2fb6c88a19da9e3f8.tar.xz
latinime-8876b75ca1c218949539dcc2fb6c88a19da9e3f8.zip
Move scoring part to the correction state
Change-Id: I2dc4a0869636fce5526f48b3a6267b6bdf61dbfb
Diffstat (limited to 'native/src/correction_state.cpp')
-rw-r--r--native/src/correction_state.cpp131
1 files changed, 122 insertions, 9 deletions
diff --git a/native/src/correction_state.cpp b/native/src/correction_state.cpp
index b2c77b00d..9000e9e9c 100644
--- a/native/src/correction_state.cpp
+++ b/native/src/correction_state.cpp
@@ -25,13 +25,31 @@
namespace latinime {
+//////////////////////
+// inline functions //
+//////////////////////
+static const char QUOTE = '\'';
+
+inline bool CorrectionState::needsToSkipCurrentNode(const unsigned short c) {
+ const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
+ // Skip the ' or other letter and continue deeper
+ return (c == QUOTE && userTypedChar != QUOTE) || mSkipPos == mOutputIndex;
+}
+
+/////////////////////
+// CorrectionState //
+/////////////////////
+
CorrectionState::CorrectionState(const int typedLetterMultiplier, const int fullWordMultiplier)
: TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
}
-void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength) {
+void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inputLength,
+ const int maxDepth) {
mProximityInfo = pi;
mInputLength = inputLength;
+ mMaxDepth = maxDepth;
+ mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
}
void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos,
@@ -58,27 +76,37 @@ int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int seco
return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
}
-int CorrectionState::getFinalFreq(const unsigned short *word, const int freq) {
- if (mProximityInfo->sameAsTyped(word, mOutputIndex + 1) || mOutputIndex < MIN_SUGGEST_DEPTH) {
+int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
+ const int outputIndex = mOutputIndex - 1;
+ const int inputIndex = (mCurrentStateType == TRAVERSE_ALL_ON_TERMINAL
+ || mCurrentStateType == TRAVERSE_ALL_NOT_ON_TERMINAL) ? mInputIndex : mInputIndex - 1;
+ *wordLength = outputIndex + 1;
+ if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
return -1;
}
- const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == mInputIndex + 2)
- : (mInputLength == mInputIndex + 1);
+ *word = mWord;
+ const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
+ : (mInputLength == inputIndex + 1);
return CorrectionState::RankingAlgorithm::calculateFinalFreq(
- mInputIndex, mOutputIndex, mMatchedCharCount, freq, sameLength, this);
+ inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
}
-void CorrectionState::initProcessState(
- const int matchCount, const int inputIndex, const int outputIndex) {
+void CorrectionState::initProcessState(const int matchCount, const int inputIndex,
+ const int outputIndex, const bool traverseAllNodes, const int diffs) {
mMatchedCharCount = matchCount;
mInputIndex = inputIndex;
mOutputIndex = outputIndex;
+ mTraverseAllNodes = traverseAllNodes;
+ mDiffs = diffs;
}
-void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex) {
+void CorrectionState::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
+ bool *traverseAllNodes, int *diffs) {
*matchedCount = mMatchedCharCount;
*inputIndex = mInputIndex;
*outputIndex = mOutputIndex;
+ *traverseAllNodes = mTraverseAllNodes;
+ *diffs = mDiffs;
}
void CorrectionState::charMatched() {
@@ -95,6 +123,11 @@ int CorrectionState::getInputIndex() {
return mInputIndex;
}
+// TODO: remove
+bool CorrectionState::needsToTraverseAll() {
+ return mTraverseAllNodes;
+}
+
void CorrectionState::incrementInputIndex() {
++mInputIndex;
}
@@ -103,6 +136,86 @@ void CorrectionState::incrementOutputIndex() {
++mOutputIndex;
}
+void CorrectionState::startTraverseAll() {
+ mTraverseAllNodes = true;
+}
+
+bool CorrectionState::needsToPrune() const {
+ return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
+ || mDiffs > mMaxEditDistance);
+}
+
+CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
+ const int32_t c, const bool isTerminal) {
+ mCurrentStateType = NOT_ON_TERMINAL;
+ // This has to be done for each virtual char (this forwards the "inputIndex" which
+ // is the index in the user-inputted chars, as read by proximity chars.
+ if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
+ incrementInputIndex();
+ }
+
+ if (mTraverseAllNodes || needsToSkipCurrentNode(c)) {
+ mWord[mOutputIndex] = c;
+ if (needsToTraverseAll() && isTerminal) {
+ mCurrentStateType = TRAVERSE_ALL_ON_TERMINAL;
+ } else {
+ mCurrentStateType = TRAVERSE_ALL_NOT_ON_TERMINAL;
+ }
+ } else {
+ int inputIndexForProximity = mInputIndex;
+
+ if (mTransposedPos >= 0) {
+ if (mInputIndex == mTransposedPos) {
+ ++inputIndexForProximity;
+ }
+ if (mInputIndex == (mTransposedPos + 1)) {
+ --inputIndexForProximity;
+ }
+ }
+
+ int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
+ inputIndexForProximity, c, this);
+ if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
+ mCurrentStateType = UNRELATED;
+ return mCurrentStateType;
+ }
+ mWord[mOutputIndex] = c;
+ // If inputIndex is greater than mInputLength, that means there is no
+ // proximity chars. So, we don't need to check proximity.
+ if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+ charMatched();
+ }
+
+ if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+ incrementDiffs();
+ }
+
+ const bool isSameAsUserTypedLength = mInputLength
+ == getInputIndex() + 1
+ || (mExcessivePos == mInputLength - 1
+ && getInputIndex() == mInputLength - 2);
+ if (isSameAsUserTypedLength && isTerminal) {
+ mCurrentStateType = ON_TERMINAL;
+ }
+ // Start traversing all nodes after the index exceeds the user typed length
+ if (isSameAsUserTypedLength) {
+ startTraverseAll();
+ }
+
+ // Finally, we are ready to go to the next character, the next "virtual node".
+ // We should advance the input index.
+ // We do this in this branch of the 'if traverseAllNodes' because we are still matching
+ // characters to input; the other branch is not matching them but searching for
+ // completions, this is why it does not have to do it.
+ incrementInputIndex();
+ }
+
+ // Also, the next char is one "virtual node" depth more than this char.
+ incrementOutputIndex();
+
+ return mCurrentStateType;
+}
+
CorrectionState::~CorrectionState() {
}