diff options
author | 2024-12-16 21:45:41 -0500 | |
---|---|---|
committer | 2025-01-11 14:17:35 -0500 | |
commit | e9a0e66716dab4dd3184d009d8920de1961efdfa (patch) | |
tree | 02dcc096643d74645bf28459c2834c3d4a2ad7f2 /java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java | |
parent | fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff) | |
download | latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip |
Rename to Kelar Keyboard (org.kelar.inputmethod.latin)
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java | 113 |
1 files changed, 0 insertions, 113 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java deleted file mode 100644 index c05ffd693..000000000 --- a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import com.android.inputmethod.latin.NgramContext; -import com.android.inputmethod.latin.NgramContext.WordInfo; -import com.android.inputmethod.latin.define.DecoderSpecificConstants; -import com.android.inputmethod.latin.settings.SpacingAndPunctuations; - -import java.util.Arrays; -import java.util.regex.Pattern; - -import javax.annotation.Nonnull; - -public final class NgramContextUtils { - private NgramContextUtils() { - // Intentional empty constructor for utility class. - } - - private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+"); - private static final Pattern SPACE_REGEX = Pattern.compile("\\s+"); - // Get context information from nth word before the cursor. n = 1 retrieves the words - // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits - // on whitespace only. - // Also, it won't return words that end in a separator (if the nth word before the cursor - // ends in a separator, it returns information representing beginning-of-sentence). - // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2): - // (n = 1) "abc def|" -> abc, def - // (n = 1) "abc def |" -> abc, def - // (n = 1) "abc 'def|" -> empty, 'def - // (n = 1) "abc def. |" -> beginning-of-sentence - // (n = 1) "abc def . |" -> beginning-of-sentence - // (n = 2) "abc def|" -> beginning-of-sentence, abc - // (n = 2) "abc def |" -> beginning-of-sentence, abc - // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot - // represent this situation using NgramContext. See TODO in the method. - // TODO: The next example's result should be "abc, def". This have to be fixed before we - // retrieve the prior context of Beginning-of-Sentence. - // (n = 2) "abc def. |" -> beginning-of-sentence, abc - // (n = 2) "abc def . |" -> abc, def - // (n = 2) "abc|" -> beginning-of-sentence - // (n = 2) "abc |" -> beginning-of-sentence - // (n = 2) "abc. def|" -> beginning-of-sentence - @Nonnull - public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev, - final SpacingAndPunctuations spacingAndPunctuations, final int n) { - if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO; - final String[] lines = NEWLINE_REGEX.split(prev); - if (lines.length == 0) { - return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); - } - final String[] w = SPACE_REGEX.split(lines[lines.length - 1]); - final WordInfo[] prevWordsInfo = - new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO); - for (int i = 0; i < prevWordsInfo.length; i++) { - final int focusedWordIndex = w.length - n - i; - // Referring to the word after the focused word. - if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) { - final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1]; - if (!wordFollowingTheNthPrevWord.isEmpty()) { - final char firstChar = wordFollowingTheNthPrevWord.charAt(0); - if (spacingAndPunctuations.isWordConnector(firstChar)) { - // The word following the focused word is starting with a word connector. - // TODO: Return meaningful context for this case. - break; - } - } - } - // If we can't find (n + i) words, the context is beginning-of-sentence. - if (focusedWordIndex < 0) { - prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO; - break; - } - - final String focusedWord = w[focusedWordIndex]; - // If the word is empty, the context is beginning-of-sentence. - final int length = focusedWord.length(); - if (length <= 0) { - prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO; - break; - } - // If the word ends in a sentence terminator, the context is beginning-of-sentence. - final char lastChar = focusedWord.charAt(length - 1); - if (spacingAndPunctuations.isSentenceTerminator(lastChar)) { - prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO; - break; - } - // If ends in a word separator or connector, the context is unclear. - // TODO: Return meaningful context for this case. - if (spacingAndPunctuations.isWordSeparator(lastChar) - || spacingAndPunctuations.isWordConnector(lastChar)) { - break; - } - prevWordsInfo[i] = new WordInfo(focusedWord); - } - return new NgramContext(prevWordsInfo); - } -} |