Rename to Kelar Keyboard (org.kelar.inputmethod.latin)

author: Amin Bandali <bandali@kelar.org> 2024-12-16 21:45:41 -0500
committer: Amin Bandali <bandali@kelar.org> 2025-01-11 14:17:35 -0500
commit: e9a0e66716dab4dd3184d009d8920de1961efdfa (patch)
tree: 02dcc096643d74645bf28459c2834c3d4a2ad7f2 /java/src/org/kelar/inputmethod/latin/NgramContext.java
parent: fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff)
download: latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz
latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip
1 files changed, 291 insertions, 0 deletions
diff --git a/java/src/org/kelar/inputmethod/latin/NgramContext.java b/java/src/org/kelar/inputmethod/latin/NgramContext.java
new file mode 100644
index 000000000..3555ab9a2
--- /dev/null
+++ b/java/src/org/kelar/inputmethod/latin/NgramContext.java
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.kelar.inputmethod.latin;
+
+import android.text.TextUtils;
+
+import org.kelar.inputmethod.annotations.UsedForTesting;
+import org.kelar.inputmethod.latin.common.StringUtils;
+import org.kelar.inputmethod.latin.define.DecoderSpecificConstants;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import javax.annotation.Nonnull;
+
+/**
+ * Class to represent information of previous words. This class is used to add n-gram entries
+ * into binary dictionaries, to get predictions, and to get suggestions.
+ */
+public class NgramContext {
+    @Nonnull
+    public static final NgramContext EMPTY_PREV_WORDS_INFO =
+            new NgramContext(WordInfo.EMPTY_WORD_INFO);
+    @Nonnull
+    public static final NgramContext BEGINNING_OF_SENTENCE =
+            new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
+
+    public static final String BEGINNING_OF_SENTENCE_TAG = "<S>";
+
+    public static final String CONTEXT_SEPARATOR = " ";
+
+    public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
+        return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
+    }
+
+    /**
+     * Word information used to represent previous words information.
+     */
+    public static class WordInfo {
+        @Nonnull
+        public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
+        @Nonnull
+        public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
+
+        // This is an empty char sequence when mIsBeginningOfSentence is true.
+        public final CharSequence mWord;
+        // TODO: Have sentence separator.
+        // Whether the current context is beginning of sentence or not. This is true when composing
+        // at the beginning of an input field or composing a word after a sentence separator.
+        public final boolean mIsBeginningOfSentence;
+
+        // Beginning of sentence.
+        private WordInfo() {
+            mWord = "";
+            mIsBeginningOfSentence = true;
+        }
+
+        public WordInfo(final CharSequence word) {
+            mWord = word;
+            mIsBeginningOfSentence = false;
+        }
+
+        public boolean isValid() {
+            return mWord != null;
+        }
+
+        @Override
+        public int hashCode() {
+            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (!(o instanceof WordInfo)) return false;
+            final WordInfo wordInfo = (WordInfo)o;
+            if (mWord == null || wordInfo.mWord == null) {
+                return mWord == wordInfo.mWord
+                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
+            }
+            return TextUtils.equals(mWord, wordInfo.mWord)
+                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
+        }
+    }
+
+    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
+    // have any context for that previous word including the "beginning of sentence context" - we
+    // just don't know what to predict using the information. An example of that is after a comma.
+    // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
+    // WordComposer was reset and before starting a new composing word, but we should never be
+    // calling getSuggetions* in this situation.
+    private final WordInfo[] mPrevWordsInfo;
+    private final int mPrevWordsCount;
+
+    private final int mMaxPrevWordCount;
+
+    // Construct from the previous word information.
+    public NgramContext(final WordInfo... prevWordsInfo) {
+        this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
+    }
+
+    public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
+        mPrevWordsInfo = prevWordsInfo;
+        mPrevWordsCount = prevWordsInfo.length;
+        mMaxPrevWordCount = maxPrevWordCount;
+    }
+
+    /**
+     * Create next prevWordsInfo using current prevWordsInfo.
+     */
+    @Nonnull
+    public NgramContext getNextNgramContext(final WordInfo wordInfo) {
+        final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
+        final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
+        prevWordsInfo[0] = wordInfo;
+        System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
+        return new NgramContext(mMaxPrevWordCount, prevWordsInfo);
+    }
+
+
+    /**
+     * Extracts the previous words context.
+     *
+     * @return a String with the previous words separated by white space.
+     */
+    public String extractPrevWordsContext() {
+        final ArrayList<String> terms = new ArrayList<>();
+        for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
+            if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
+                final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
+                if (wordInfo.mIsBeginningOfSentence) {
+                    terms.add(BEGINNING_OF_SENTENCE_TAG);
+                } else {
+                    final String term = wordInfo.mWord.toString();
+                    if (!term.isEmpty()) {
+                        terms.add(term);
+                    }
+                }
+            }
+        }
+        return TextUtils.join(CONTEXT_SEPARATOR, terms);
+    }
+
+    /**
+     * Extracts the previous words context.
+     *
+     * @return a String array with the previous words.
+     */
+    public String[] extractPrevWordsContextArray() {
+        final ArrayList<String> prevTermList = new ArrayList<>();
+        for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
+            if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
+                final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
+                if (wordInfo.mIsBeginningOfSentence) {
+                    prevTermList.add(BEGINNING_OF_SENTENCE_TAG);
+                } else {
+                    final String term = wordInfo.mWord.toString();
+                    if (!term.isEmpty()) {
+                        prevTermList.add(term);
+                    }
+                }
+            }
+        }
+        final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]);
+        return contextStringArray;
+    }
+
+    public boolean isValid() {
+        return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
+    }
+
+    public boolean isBeginningOfSentenceContext() {
+        return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
+    }
+
+    // n is 1-indexed.
+    // TODO: Remove
+    public CharSequence getNthPrevWord(final int n) {
+        if (n <= 0 || n > mPrevWordsCount) {
+            return null;
+        }
+        return mPrevWordsInfo[n - 1].mWord;
+    }
+
+    // n is 1-indexed.
+    @UsedForTesting
+    public boolean isNthPrevWordBeginningOfSentence(final int n) {
+        if (n <= 0 || n > mPrevWordsCount) {
+            return false;
+        }
+        return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
+    }
+
+    public void outputToArray(final int[][] codePointArrays,
+            final boolean[] isBeginningOfSentenceArray) {
+        for (int i = 0; i < mPrevWordsCount; i++) {
+            final WordInfo wordInfo = mPrevWordsInfo[i];
+            if (wordInfo == null || !wordInfo.isValid()) {
+                codePointArrays[i] = new int[0];
+                isBeginningOfSentenceArray[i] = false;
+                continue;
+            }
+            codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
+            isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
+        }
+    }
+
+    public int getPrevWordCount() {
+        return mPrevWordsCount;
+    }
+
+    @Override
+    public int hashCode() {
+        int hashValue = 0;
+        for (final WordInfo wordInfo : mPrevWordsInfo) {
+            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
+                break;
+            }
+            hashValue ^= wordInfo.hashCode();
+        }
+        return hashValue;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof NgramContext)) return false;
+        final NgramContext prevWordsInfo = (NgramContext)o;
+
+        final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
+        for (int i = 0; i < minLength; i++) {
+            if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
+                return false;
+            }
+        }
+        final WordInfo[] longerWordsInfo;
+        final int longerWordsInfoCount;
+        if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
+            longerWordsInfo = mPrevWordsInfo;
+            longerWordsInfoCount = mPrevWordsCount;
+        } else {
+            longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
+            longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
+        }
+        for (int i = minLength; i < longerWordsInfoCount; i++) {
+            if (longerWordsInfo[i] != null
+                    && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuffer builder = new StringBuffer();
+        for (int i = 0; i < mPrevWordsCount; i++) {
+            final WordInfo wordInfo = mPrevWordsInfo[i];
+            builder.append("PrevWord[");
+            builder.append(i);
+            builder.append("]: ");
+            if (wordInfo == null) {
+                builder.append("null. ");
+                continue;
+            }
+            if (!wordInfo.isValid()) {
+                builder.append("Empty. ");
+                continue;
+            }
+            builder.append(wordInfo.mWord);
+            builder.append(", isBeginningOfSentence: ");
+            builder.append(wordInfo.mIsBeginningOfSentence);
+            builder.append(". ");
+        }
+        return builder.toString();
+    }
+}
author	Amin Bandali <bandali@kelar.org>	2024-12-16 21:45:41 -0500
committer	Amin Bandali <bandali@kelar.org>	2025-01-11 14:17:35 -0500
commit	e9a0e66716dab4dd3184d009d8920de1961efdfa (patch)
tree	02dcc096643d74645bf28459c2834c3d4a2ad7f2 /java/src/org/kelar/inputmethod/latin/NgramContext.java
parent	fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff)
download	latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip