aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/correction.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h79
-rw-r--r--native/jni/src/suggest/policyimpl/utils/edit_distance.h70
-rw-r--r--native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h43
4 files changed, 197 insertions, 42 deletions
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 0c65939e0..61bf3f619 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -23,6 +23,8 @@
#include "defines.h"
#include "proximity_info_state.h"
#include "suggest_utils.h"
+#include "suggest/policyimpl/utils/edit_distance.h"
+#include "suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h"
namespace latinime {
@@ -906,50 +908,11 @@ inline static bool isUpperCase(unsigned short c) {
return totalFreq;
}
-/* Damerau-Levenshtein distance */
-inline static int editDistanceInternal(int *editDistanceTable, const int *before,
- const int beforeLength, const int *after, const int afterLength) {
- // dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int *dp = editDistanceTable;
- const int li = beforeLength + 1;
- const int lo = afterLength + 1;
- for (int i = 0; i < li; ++i) {
- dp[lo * i] = i;
- }
- for (int i = 0; i < lo; ++i) {
- dp[i] = i;
- }
-
- for (int i = 0; i < li - 1; ++i) {
- for (int j = 0; j < lo - 1; ++j) {
- const int ci = toBaseLowerCase(before[i]);
- const int co = toBaseLowerCase(after[j]);
- const int cost = (ci == co) ? 0 : 1;
- dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
- min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
- if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
- && co == toBaseLowerCase(before[i - 1])) {
- dp[(i + 1) * lo + (j + 1)] = min(
- dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
- }
- }
- }
-
- if (DEBUG_EDIT_DISTANCE) {
- AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
- for (int i = 0; i < li; ++i) {
- for (int j = 0; j < lo; ++j) {
- AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
- }
- }
- }
- return dp[li * lo - 1];
-}
-
/* static */ int Correction::RankingAlgorithm::editDistance(const int *before,
const int beforeLength, const int *after, const int afterLength) {
- int table[(beforeLength + 1) * (afterLength + 1)];
- return editDistanceInternal(table, before, beforeLength, after, afterLength);
+ const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein(
+ before, beforeLength, after, afterLength);
+ return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein));
}
diff --git a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
new file mode 100644
index 000000000..ec1457455
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
+#define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
+
+#include "char_utils.h"
+#include "suggest/policyimpl/utils/edit_distance_policy.h"
+
+namespace latinime {
+
+class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
+ public:
+ DamerauLevenshteinEditDistancePolicy(const int *const string0, const int length0,
+ const int *const string1, const int length1)
+ : mString0(string0), mString0Length(length0), mString1(string1),
+ mString1Length(length1) {}
+ ~DamerauLevenshteinEditDistancePolicy() {}
+
+ AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
+ const int c0 = toBaseLowerCase(mString0[index0]);
+ const int c1 = toBaseLowerCase(mString1[index1]);
+ return (c0 == c1) ? 0.0f : 1.0f;
+ }
+
+ AK_FORCE_INLINE float getDeletionCost(const int index0, const int index1) const {
+ return 1.0f;
+ }
+
+ AK_FORCE_INLINE float getInsertionCost(const int index0, const int index1) const {
+ return 1.0f;
+ }
+
+ AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
+ const int c0 = toBaseLowerCase(mString0[index0]);
+ const int c1 = toBaseLowerCase(mString1[index1]);
+ if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1])
+ && c1 == toBaseLowerCase(mString0[index0 - 1])) {
+ return true;
+ }
+ return false;
+ }
+
+ AK_FORCE_INLINE float getTranspositionCost(const int index0, const int index1) const {
+ return getSubstitutionCost(index0, index1);
+ }
+
+ AK_FORCE_INLINE int getString0Length() const {
+ return mString0Length;
+ }
+
+ AK_FORCE_INLINE int getString1Length() const {
+ return mString1Length;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN (DamerauLevenshteinEditDistancePolicy);
+
+ const int *const mString0;
+ const int mString0Length;
+ const int *const mString1;
+ const int mString1Length;
+};
+} // namespace latinime
+
+#endif // LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance.h b/native/jni/src/suggest/policyimpl/utils/edit_distance.h
new file mode 100644
index 000000000..cbbd66894
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/utils/edit_distance.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EDIT_DISTANCE_H
+#define LATINIME_EDIT_DISTANCE_H
+
+#include "defines.h"
+#include "suggest/policyimpl/utils/edit_distance_policy.h"
+
+namespace latinime {
+
+class EditDistance {
+ public:
+ // CAVEAT: There may be performance penalty if you need the edit distance as an integer value.
+ AK_FORCE_INLINE static float getEditDistance(const EditDistancePolicy *const policy) {
+ const int beforeLength = policy->getString0Length();
+ const int afterLength = policy->getString1Length();
+ float dp[(beforeLength + 1) * (afterLength + 1)];
+ for (int i = 0; i <= beforeLength; ++i) {
+ dp[(afterLength + 1) * i] = i * policy->getInsertionCost(i - 1, -1);
+ }
+ for (int i = 0; i <= afterLength; ++i) {
+ dp[i] = i * policy->getDeletionCost(-1, i - 1);
+ }
+
+ for (int i = 0; i < beforeLength; ++i) {
+ for (int j = 0; j < afterLength; ++j) {
+ dp[(afterLength + 1) * (i + 1) + (j + 1)] = min(
+ dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j),
+ min(dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j),
+ dp[(afterLength + 1) * i + j]
+ + policy->getSubstitutionCost(i, j)));
+ if (policy->allowTransposition(i, j)) {
+ dp[(afterLength + 1) * (i + 1) + (j + 1)] = min(
+ dp[(afterLength + 1) * (i + 1) + (j + 1)],
+ dp[(afterLength + 1) * (i - 1) + (j - 1)]
+ + policy->getTranspositionCost(i, j));
+ }
+ }
+ }
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ for (int i = 0; i < beforeLength + 1; ++i) {
+ for (int j = 0; j < afterLength + 1; ++j) {
+ AKLOGI("EDIT[%d][%d], %f", i, j, dp[(afterLength + 1) * i + j]);
+ }
+ }
+ }
+ return dp[(beforeLength + 1) * (afterLength + 1) - 1];
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(EditDistance);
+};
+} // namespace latinime
+
+#endif // LATINIME_EDIT_DISTANCE_H
diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h
new file mode 100644
index 000000000..e3d1792cb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/utils/edit_distance_policy.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EDIT_DISTANCE_POLICY_H
+#define LATINIME_EDIT_DISTANCE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class EditDistancePolicy {
+ public:
+ virtual float getSubstitutionCost(const int index0, const int index1) const = 0;
+ virtual float getDeletionCost(const int index0, const int index1) const = 0;
+ virtual float getInsertionCost(const int index0, const int index1) const = 0;
+ virtual bool allowTransposition(const int index0, const int index1) const = 0;
+ virtual float getTranspositionCost(const int index0, const int index1) const = 0;
+ virtual int getString0Length() const = 0;
+ virtual int getString1Length() const = 0;
+
+ protected:
+ EditDistancePolicy() {}
+ virtual ~EditDistancePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(EditDistancePolicy);
+};
+} // namespace latinime
+
+#endif // LATINIME_EDIT_DISTANCE_POLICY_H