Merge "Limit the suggestions with an excessive character by filtering proximity characters"

author: satok <satok@google.com> 2010-12-09 08:45:45 -0800
committer: Android (Google) Code Review <android-gerrit@google.com> 2010-12-09 08:45:45 -0800
commit: 3f7eac0d2ce626b223c822765a08057c9f12d0cd (patch)
tree: 3ce54873daf2be8640cb26a935b1adb221e55393 /native/src
parent: 34235220c2dd5f54ba12cc197fbc1b37b5c53d68 (diff)
parent: e07baa6fabe1be1aef68caa9a55a073d4f118334 (diff)
download: latinime-3f7eac0d2ce626b223c822765a08057c9f12d0cd.tar.gz
latinime-3f7eac0d2ce626b223c822765a08057c9f12d0cd.tar.xz
latinime-3f7eac0d2ce626b223c822765a08057c9f12d0cd.zip
2 files changed, 30 insertions, 4 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 55d879fd2..46332c74d 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -58,8 +58,10 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
     // Suggestion with excessive character
     if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER) {
         for (int i = 0; i < codesSize; ++i) {
-            if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
-            getSuggestionCandidates(codesSize, -1, i, NULL, 0);
+            if (existsAdjacentProximityChars(i, codesSize)) {
+                if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
+                getSuggestionCandidates(codesSize, -1, i, NULL, 0);
+            }
         }
     }
 
@@ -331,11 +333,34 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
     return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
 }
 
+inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
+        const int inputLength) {
+    if (inputIndex < 0 || inputIndex >= inputLength) return false;
+    const int currentChar = *getInputCharsAt(inputIndex);
+    const int leftIndex = inputIndex - 1;
+    if (leftIndex >= 0) {
+        int *leftChars = getInputCharsAt(leftIndex);
+        int i = 0;
+        while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
+            if (leftChars[i++] == currentChar) return true;
+        }
+    }
+    const int rightIndex = inputIndex + 1;
+    if (rightIndex < inputLength) {
+        int *rightChars = getInputCharsAt(rightIndex);
+        int i = 0;
+        while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
+            if (rightChars[i++] == currentChar) return true;
+        }
+    }
+    return false;
+}
+
 inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
         const unsigned short c, const int skipPos) {
     const unsigned short lowerC = toLowerCase(c);
     int j = 0;
-    while (currentChars[j] > 0) {
+    while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
         const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
         // If skipPos is defined, not to search proximity collections.
         // First char is what user typed.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index cdec46557..f8af55c92 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -69,7 +69,8 @@ private:
     bool processCurrentNodeForExactMatch(const int firstChildPos,
             const int startInputIndex, const int depth, unsigned short *word,
             int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
-
+    bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
+    int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);}
     const unsigned char *DICT;
     const int MAX_WORDS;
     const int MAX_WORD_LENGTH;
author	satok <satok@google.com>	2010-12-09 08:45:45 -0800
committer	Android (Google) Code Review <android-gerrit@google.com>	2010-12-09 08:45:45 -0800
commit	3f7eac0d2ce626b223c822765a08057c9f12d0cd (patch)
tree	3ce54873daf2be8640cb26a935b1adb221e55393 /native/src
parent	34235220c2dd5f54ba12cc197fbc1b37b5c53d68 (diff)
parent	e07baa6fabe1be1aef68caa9a55a073d4f118334 (diff)
download	latinime-3f7eac0d2ce626b223c822765a08057c9f12d0cd.tar.gz latinime-3f7eac0d2ce626b223c822765a08057c9f12d0cd.tar.xz latinime-3f7eac0d2ce626b223c822765a08057c9f12d0cd.zip