Merge "Start-of-sentence should include newlines and non-period terminators."

author: Tom Ouyang <ouyang@google.com> 2015-03-17 22:24:59 +0000
committer: Android (Google) Code Review <android-gerrit@google.com> 2015-03-17 22:25:00 +0000
commit: edcf5853d1f9253e3a76e6db01731b9465da6ae0 (patch)
tree: 77ff08f17370cf5dd5f99746a5f4425da8ed6675 /java
parent: ea727b6b71398c9f78fbc7b79d3f0c73d301b938 (diff)
parent: 5aeb09213000eb571ea4f07d97e59254b8831eee (diff)
download: latinime-edcf5853d1f9253e3a76e6db01731b9465da6ae0.tar.gz
latinime-edcf5853d1f9253e3a76e6db01731b9465da6ae0.tar.xz
latinime-edcf5853d1f9253e3a76e6db01731b9465da6ae0.zip
1 files changed, 10 insertions, 4 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
index 727df1a93..c05ffd693 100644
--- a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
@@ -31,6 +31,7 @@ public final class NgramContextUtils {
         // Intentional empty constructor for utility class.
     }
 
+    private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+");
     private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
     // Get context information from nth word before the cursor. n = 1 retrieves the words
     // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
@@ -58,7 +59,11 @@ public final class NgramContextUtils {
     public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
             final SpacingAndPunctuations spacingAndPunctuations, final int n) {
         if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
-        final String[] w = SPACE_REGEX.split(prev);
+        final String[] lines = NEWLINE_REGEX.split(prev);
+        if (lines.length == 0) {
+            return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
+        }
+        final String[] w = SPACE_REGEX.split(lines[lines.length - 1]);
         final WordInfo[] prevWordsInfo =
                 new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
         Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
@@ -81,16 +86,17 @@ public final class NgramContextUtils {
                 prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                 break;
             }
+
             final String focusedWord = w[focusedWordIndex];
-            // If the word is, the context is beginning-of-sentence.
+            // If the word is empty, the context is beginning-of-sentence.
             final int length = focusedWord.length();
             if (length <= 0) {
                 prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                 break;
             }
-            // If ends in a sentence separator, the context is beginning-of-sentence.
+            // If the word ends in a sentence terminator, the context is beginning-of-sentence.
             final char lastChar = focusedWord.charAt(length - 1);
-            if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
+            if (spacingAndPunctuations.isSentenceTerminator(lastChar)) {
                 prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                 break;
             }
author	Tom Ouyang <ouyang@google.com>	2015-03-17 22:24:59 +0000
committer	Android (Google) Code Review <android-gerrit@google.com>	2015-03-17 22:25:00 +0000
commit	edcf5853d1f9253e3a76e6db01731b9465da6ae0 (patch)
tree	77ff08f17370cf5dd5f99746a5f4425da8ed6675 /java
parent	ea727b6b71398c9f78fbc7b79d3f0c73d301b938 (diff)
parent	5aeb09213000eb571ea4f07d97e59254b8831eee (diff)
download	latinime-edcf5853d1f9253e3a76e6db01731b9465da6ae0.tar.gz latinime-edcf5853d1f9253e3a76e6db01731b9465da6ae0.tar.xz latinime-edcf5853d1f9253e3a76e6db01731b9465da6ae0.zip