aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
diff options
context:
space:
mode:
authorAmin Bandali <bandali@kelar.org>2024-12-16 21:45:41 -0500
committerAmin Bandali <bandali@kelar.org>2025-01-11 14:17:35 -0500
commite9a0e66716dab4dd3184d009d8920de1961efdfa (patch)
tree02dcc096643d74645bf28459c2834c3d4a2ad7f2 /java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
parentfb3b9360d70596d7e921de8bf7d3ca99564a077e (diff)
downloadlatinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip
Rename to Kelar Keyboard (org.kelar.inputmethod.latin)
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java')
-rw-r--r--java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java113
1 files changed, 0 insertions, 113 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
deleted file mode 100644
index c05ffd693..000000000
--- a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.utils;
-
-import com.android.inputmethod.latin.NgramContext;
-import com.android.inputmethod.latin.NgramContext.WordInfo;
-import com.android.inputmethod.latin.define.DecoderSpecificConstants;
-import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
-
-import java.util.Arrays;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nonnull;
-
-public final class NgramContextUtils {
- private NgramContextUtils() {
- // Intentional empty constructor for utility class.
- }
-
- private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+");
- private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
- // Get context information from nth word before the cursor. n = 1 retrieves the words
- // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
- // on whitespace only.
- // Also, it won't return words that end in a separator (if the nth word before the cursor
- // ends in a separator, it returns information representing beginning-of-sentence).
- // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2):
- // (n = 1) "abc def|" -> abc, def
- // (n = 1) "abc def |" -> abc, def
- // (n = 1) "abc 'def|" -> empty, 'def
- // (n = 1) "abc def. |" -> beginning-of-sentence
- // (n = 1) "abc def . |" -> beginning-of-sentence
- // (n = 2) "abc def|" -> beginning-of-sentence, abc
- // (n = 2) "abc def |" -> beginning-of-sentence, abc
- // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
- // represent this situation using NgramContext. See TODO in the method.
- // TODO: The next example's result should be "abc, def". This have to be fixed before we
- // retrieve the prior context of Beginning-of-Sentence.
- // (n = 2) "abc def. |" -> beginning-of-sentence, abc
- // (n = 2) "abc def . |" -> abc, def
- // (n = 2) "abc|" -> beginning-of-sentence
- // (n = 2) "abc |" -> beginning-of-sentence
- // (n = 2) "abc. def|" -> beginning-of-sentence
- @Nonnull
- public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
- final SpacingAndPunctuations spacingAndPunctuations, final int n) {
- if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
- final String[] lines = NEWLINE_REGEX.split(prev);
- if (lines.length == 0) {
- return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
- }
- final String[] w = SPACE_REGEX.split(lines[lines.length - 1]);
- final WordInfo[] prevWordsInfo =
- new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
- Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
- for (int i = 0; i < prevWordsInfo.length; i++) {
- final int focusedWordIndex = w.length - n - i;
- // Referring to the word after the focused word.
- if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) {
- final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1];
- if (!wordFollowingTheNthPrevWord.isEmpty()) {
- final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
- if (spacingAndPunctuations.isWordConnector(firstChar)) {
- // The word following the focused word is starting with a word connector.
- // TODO: Return meaningful context for this case.
- break;
- }
- }
- }
- // If we can't find (n + i) words, the context is beginning-of-sentence.
- if (focusedWordIndex < 0) {
- prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
- break;
- }
-
- final String focusedWord = w[focusedWordIndex];
- // If the word is empty, the context is beginning-of-sentence.
- final int length = focusedWord.length();
- if (length <= 0) {
- prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
- break;
- }
- // If the word ends in a sentence terminator, the context is beginning-of-sentence.
- final char lastChar = focusedWord.charAt(length - 1);
- if (spacingAndPunctuations.isSentenceTerminator(lastChar)) {
- prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
- break;
- }
- // If ends in a word separator or connector, the context is unclear.
- // TODO: Return meaningful context for this case.
- if (spacingAndPunctuations.isWordSeparator(lastChar)
- || spacingAndPunctuations.isWordConnector(lastChar)) {
- break;
- }
- prevWordsInfo[i] = new WordInfo(focusedWord);
- }
- return new NgramContext(prevWordsInfo);
- }
-}