aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/RichInputConnection.java
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-06-27 17:59:21 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-06-27 17:59:21 +0900
commit1c2f1ada8305e36defa8572da687a4596bf083ea (patch)
tree2a37c03d73a123c8a1e4ee2d66b49a6f7178a8a4 /java/src/com/android/inputmethod/latin/RichInputConnection.java
parent05b1e0d42f9f103516103d4d33e61862c0851e9d (diff)
downloadlatinime-1c2f1ada8305e36defa8572da687a4596bf083ea.tar.gz
latinime-1c2f1ada8305e36defa8572da687a4596bf083ea.tar.xz
latinime-1c2f1ada8305e36defa8572da687a4596bf083ea.zip
Find multiple previous word information to support n-gram.
Bug: 14425059 Change-Id: Ieace636334a9b2a094527341d4fcfc05958296c5
Diffstat (limited to 'java/src/com/android/inputmethod/latin/RichInputConnection.java')
-rw-r--r--java/src/com/android/inputmethod/latin/RichInputConnection.java105
1 files changed, 59 insertions, 46 deletions
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 3be6bccc6..8f252bd84 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -26,6 +26,7 @@ import android.view.inputmethod.ExtractedText;
import android.view.inputmethod.ExtractedTextRequest;
import android.view.inputmethod.InputConnection;
+import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.CapsModeUtils;
import com.android.inputmethod.latin.utils.DebugLogUtils;
@@ -49,8 +50,10 @@ public final class RichInputConnection {
private static final boolean DBG = false;
private static final boolean DEBUG_PREVIOUS_TEXT = false;
private static final boolean DEBUG_BATCH_NESTING = false;
- // Provision for a long word pair and a separator
- private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH * 2 + 1;
+ // Provision for long words and separators between the words.
+ private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH
+ * (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */
+ + Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */;
private static final Pattern spaceRegex = Pattern.compile("\\s+");
private static final int INVALID_CURSOR_POSITION = -1;
@@ -544,22 +547,25 @@ public final class RichInputConnection {
return Arrays.binarySearch(sortedSeparators, code) >= 0;
}
- // Get information of the nth word before cursor. n = 1 retrieves the word immediately before
- // the cursor, n = 2 retrieves the word before that, and so on. This splits on whitespace only.
+ // Get context information from nth word before the cursor. n = 1 retrieves the words
+ // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
+ // on whitespace only.
// Also, it won't return words that end in a separator (if the nth word before the cursor
// ends in a separator, it returns information representing beginning-of-sentence).
- // Example :
- // (n = 1) "abc def|" -> def
- // (n = 1) "abc def |" -> def
- // (n = 1) "abc 'def|" -> 'def
+ // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2):
+ // (n = 1) "abc def|" -> abc, def
+ // (n = 1) "abc def |" -> abc, def
+ // (n = 1) "abc 'def|" -> empty, 'def
// (n = 1) "abc def. |" -> beginning-of-sentence
// (n = 1) "abc def . |" -> beginning-of-sentence
- // (n = 2) "abc def|" -> abc
- // (n = 2) "abc def |" -> abc
+ // (n = 2) "abc def|" -> beginning-of-sentence, abc
+ // (n = 2) "abc def |" -> beginning-of-sentence, abc
// (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
// represent this situation using PrevWordsInfo. See TODO in the method.
- // (n = 2) "abc def. |" -> abc
- // (n = 2) "abc def . |" -> def
+ // TODO: The next example's result should be "abc, def". This have to be fixed before we
+ // retrieve the prior context of Beginning-of-Sentence.
+ // (n = 2) "abc def. |" -> beginning-of-sentence, abc
+ // (n = 2) "abc def . |" -> abc, def
// (n = 2) "abc|" -> beginning-of-sentence
// (n = 2) "abc |" -> beginning-of-sentence
// (n = 2) "abc. def|" -> beginning-of-sentence
@@ -567,43 +573,50 @@ public final class RichInputConnection {
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
final String[] w = spaceRegex.split(prev);
-
- // Referring to the word after the nth word.
- if ((n - 1) > 0 && (n - 1) <= w.length) {
- final String wordFollowingTheNthPrevWord = w[w.length - n + 1];
- if (!wordFollowingTheNthPrevWord.isEmpty()) {
- final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
- if (spacingAndPunctuations.isWordConnector(firstChar)) {
- // The word following the n-th prev word is starting with a word connector.
- // TODO: Return meaningful context for this case.
- return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
+ final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (int i = 0; i < prevWordsInfo.length; i++) {
+ final int focusedWordIndex = w.length - n - i;
+ // Referring to the word after the focused word.
+ if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) {
+ final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1];
+ if (!wordFollowingTheNthPrevWord.isEmpty()) {
+ final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
+ if (spacingAndPunctuations.isWordConnector(firstChar)) {
+ // The word following the focused word is starting with a word connector.
+ // TODO: Return meaningful context for this case.
+ prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
+ break;
+ }
}
}
+ // If we can't find (n + i) words, the context is beginning-of-sentence.
+ if (focusedWordIndex < 0) {
+ prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
+ break;
+ }
+ final String focusedWord = w[focusedWordIndex];
+ // If the word is empty, the context is beginning-of-sentence.
+ final int length = focusedWord.length();
+ if (length <= 0) {
+ prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
+ break;
+ }
+ // If ends in a sentence separator, the context is beginning-of-sentence.
+ final char lastChar = focusedWord.charAt(length - 1);
+ if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
+ prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
+ break;
+ }
+ // If ends in a word separator or connector, the context is unclear.
+ // TODO: Return meaningful context for this case.
+ if (spacingAndPunctuations.isWordSeparator(lastChar)
+ || spacingAndPunctuations.isWordConnector(lastChar)) {
+ prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
+ break;
+ }
+ prevWordsInfo[i] = new WordInfo(focusedWord);
}
-
- // If we can't find n words, or we found an empty word, the context is
- // beginning-of-sentence.
- if (w.length < n) {
- return PrevWordsInfo.BEGINNING_OF_SENTENCE;
- }
- final String nthPrevWord = w[w.length - n];
- final int length = nthPrevWord.length();
- if (length <= 0) {
- return PrevWordsInfo.BEGINNING_OF_SENTENCE;
- }
-
- // If ends in a sentence separator, the context is beginning-of-sentence.
- final char lastChar = nthPrevWord.charAt(length - 1);
- if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
- return PrevWordsInfo.BEGINNING_OF_SENTENCE;
- }
- // If ends in a word separator or connector, the context is unclear.
- // TODO: Return meaningful context for this case.
- if (spacingAndPunctuations.isWordSeparator(lastChar)
- || spacingAndPunctuations.isWordConnector(lastChar)) {
- return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
- }
- return new PrevWordsInfo(new PrevWordsInfo.WordInfo(nthPrevWord));
+ return new PrevWordsInfo(prevWordsInfo);
}
/**