diff options
author | 2015-03-17 12:16:59 -0700 | |
---|---|---|
committer | 2015-03-17 15:18:42 -0700 | |
commit | 5aeb09213000eb571ea4f07d97e59254b8831eee (patch) | |
tree | 7fff9d8431463e7d4d19c9e825306ddf972b890c /java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java | |
parent | 320cef5fe71baf049971eeac957d2fe73c57b09f (diff) | |
download | latinime-5aeb09213000eb571ea4f07d97e59254b8831eee.tar.gz latinime-5aeb09213000eb571ea4f07d97e59254b8831eee.tar.xz latinime-5aeb09213000eb571ea4f07d97e59254b8831eee.zip |
Start-of-sentence should include newlines and non-period terminators.
Bug: 19795382
Change-Id: Id6cc4a494a06de03d351aa6257632bd3b82e2ec4
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java index 727df1a93..c05ffd693 100644 --- a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java @@ -31,6 +31,7 @@ public final class NgramContextUtils { // Intentional empty constructor for utility class. } + private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+"); private static final Pattern SPACE_REGEX = Pattern.compile("\\s+"); // Get context information from nth word before the cursor. n = 1 retrieves the words // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits @@ -58,7 +59,11 @@ public final class NgramContextUtils { public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev, final SpacingAndPunctuations spacingAndPunctuations, final int n) { if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO; - final String[] w = SPACE_REGEX.split(prev); + final String[] lines = NEWLINE_REGEX.split(prev); + if (lines.length == 0) { + return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); + } + final String[] w = SPACE_REGEX.split(lines[lines.length - 1]); final WordInfo[] prevWordsInfo = new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO); @@ -81,16 +86,17 @@ public final class NgramContextUtils { prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO; break; } + final String focusedWord = w[focusedWordIndex]; - // If the word is, the context is beginning-of-sentence. + // If the word is empty, the context is beginning-of-sentence. final int length = focusedWord.length(); if (length <= 0) { prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO; break; } - // If ends in a sentence separator, the context is beginning-of-sentence. + // If the word ends in a sentence terminator, the context is beginning-of-sentence. final char lastChar = focusedWord.charAt(length - 1); - if (spacingAndPunctuations.isSentenceSeparator(lastChar)) { + if (spacingAndPunctuations.isSentenceTerminator(lastChar)) { prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO; break; } |