diff options
author | 2024-12-16 21:45:41 -0500 | |
---|---|---|
committer | 2025-01-11 14:17:35 -0500 | |
commit | e9a0e66716dab4dd3184d009d8920de1961efdfa (patch) | |
tree | 02dcc096643d74645bf28459c2834c3d4a2ad7f2 /java/src/org/kelar/inputmethod/latin/NgramContext.java | |
parent | fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff) | |
download | latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip |
Rename to Kelar Keyboard (org.kelar.inputmethod.latin)
Diffstat (limited to 'java/src/org/kelar/inputmethod/latin/NgramContext.java')
-rw-r--r-- | java/src/org/kelar/inputmethod/latin/NgramContext.java | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/java/src/org/kelar/inputmethod/latin/NgramContext.java b/java/src/org/kelar/inputmethod/latin/NgramContext.java new file mode 100644 index 000000000..3555ab9a2 --- /dev/null +++ b/java/src/org/kelar/inputmethod/latin/NgramContext.java @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.kelar.inputmethod.latin; + +import android.text.TextUtils; + +import org.kelar.inputmethod.annotations.UsedForTesting; +import org.kelar.inputmethod.latin.common.StringUtils; +import org.kelar.inputmethod.latin.define.DecoderSpecificConstants; + +import java.util.ArrayList; +import java.util.Arrays; + +import javax.annotation.Nonnull; + +/** + * Class to represent information of previous words. This class is used to add n-gram entries + * into binary dictionaries, to get predictions, and to get suggestions. + */ +public class NgramContext { + @Nonnull + public static final NgramContext EMPTY_PREV_WORDS_INFO = + new NgramContext(WordInfo.EMPTY_WORD_INFO); + @Nonnull + public static final NgramContext BEGINNING_OF_SENTENCE = + new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); + + public static final String BEGINNING_OF_SENTENCE_TAG = "<S>"; + + public static final String CONTEXT_SEPARATOR = " "; + + public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) { + return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO); + } + + /** + * Word information used to represent previous words information. + */ + public static class WordInfo { + @Nonnull + public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); + @Nonnull + public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); + + // This is an empty char sequence when mIsBeginningOfSentence is true. + public final CharSequence mWord; + // TODO: Have sentence separator. + // Whether the current context is beginning of sentence or not. This is true when composing + // at the beginning of an input field or composing a word after a sentence separator. + public final boolean mIsBeginningOfSentence; + + // Beginning of sentence. + private WordInfo() { + mWord = ""; + mIsBeginningOfSentence = true; + } + + public WordInfo(final CharSequence word) { + mWord = word; + mIsBeginningOfSentence = false; + } + + public boolean isValid() { + return mWord != null; + } + + @Override + public int hashCode() { + return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof WordInfo)) return false; + final WordInfo wordInfo = (WordInfo)o; + if (mWord == null || wordInfo.mWord == null) { + return mWord == wordInfo.mWord + && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; + } + return TextUtils.equals(mWord, wordInfo.mWord) + && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; + } + } + + // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't + // have any context for that previous word including the "beginning of sentence context" - we + // just don't know what to predict using the information. An example of that is after a comma. + // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the + // WordComposer was reset and before starting a new composing word, but we should never be + // calling getSuggetions* in this situation. + private final WordInfo[] mPrevWordsInfo; + private final int mPrevWordsCount; + + private final int mMaxPrevWordCount; + + // Construct from the previous word information. + public NgramContext(final WordInfo... prevWordsInfo) { + this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo); + } + + public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) { + mPrevWordsInfo = prevWordsInfo; + mPrevWordsCount = prevWordsInfo.length; + mMaxPrevWordCount = maxPrevWordCount; + } + + /** + * Create next prevWordsInfo using current prevWordsInfo. + */ + @Nonnull + public NgramContext getNextNgramContext(final WordInfo wordInfo) { + final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1); + final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; + prevWordsInfo[0] = wordInfo; + System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); + return new NgramContext(mMaxPrevWordCount, prevWordsInfo); + } + + + /** + * Extracts the previous words context. + * + * @return a String with the previous words separated by white space. + */ + public String extractPrevWordsContext() { + final ArrayList<String> terms = new ArrayList<>(); + for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { + if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { + final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; + if (wordInfo.mIsBeginningOfSentence) { + terms.add(BEGINNING_OF_SENTENCE_TAG); + } else { + final String term = wordInfo.mWord.toString(); + if (!term.isEmpty()) { + terms.add(term); + } + } + } + } + return TextUtils.join(CONTEXT_SEPARATOR, terms); + } + + /** + * Extracts the previous words context. + * + * @return a String array with the previous words. + */ + public String[] extractPrevWordsContextArray() { + final ArrayList<String> prevTermList = new ArrayList<>(); + for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { + if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { + final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; + if (wordInfo.mIsBeginningOfSentence) { + prevTermList.add(BEGINNING_OF_SENTENCE_TAG); + } else { + final String term = wordInfo.mWord.toString(); + if (!term.isEmpty()) { + prevTermList.add(term); + } + } + } + } + final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]); + return contextStringArray; + } + + public boolean isValid() { + return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); + } + + public boolean isBeginningOfSentenceContext() { + return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; + } + + // n is 1-indexed. + // TODO: Remove + public CharSequence getNthPrevWord(final int n) { + if (n <= 0 || n > mPrevWordsCount) { + return null; + } + return mPrevWordsInfo[n - 1].mWord; + } + + // n is 1-indexed. + @UsedForTesting + public boolean isNthPrevWordBeginningOfSentence(final int n) { + if (n <= 0 || n > mPrevWordsCount) { + return false; + } + return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; + } + + public void outputToArray(final int[][] codePointArrays, + final boolean[] isBeginningOfSentenceArray) { + for (int i = 0; i < mPrevWordsCount; i++) { + final WordInfo wordInfo = mPrevWordsInfo[i]; + if (wordInfo == null || !wordInfo.isValid()) { + codePointArrays[i] = new int[0]; + isBeginningOfSentenceArray[i] = false; + continue; + } + codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); + isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; + } + } + + public int getPrevWordCount() { + return mPrevWordsCount; + } + + @Override + public int hashCode() { + int hashValue = 0; + for (final WordInfo wordInfo : mPrevWordsInfo) { + if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { + break; + } + hashValue ^= wordInfo.hashCode(); + } + return hashValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof NgramContext)) return false; + final NgramContext prevWordsInfo = (NgramContext)o; + + final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); + for (int i = 0; i < minLength; i++) { + if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { + return false; + } + } + final WordInfo[] longerWordsInfo; + final int longerWordsInfoCount; + if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { + longerWordsInfo = mPrevWordsInfo; + longerWordsInfoCount = mPrevWordsCount; + } else { + longerWordsInfo = prevWordsInfo.mPrevWordsInfo; + longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; + } + for (int i = minLength; i < longerWordsInfoCount; i++) { + if (longerWordsInfo[i] != null + && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { + return false; + } + } + return true; + } + + @Override + public String toString() { + final StringBuffer builder = new StringBuffer(); + for (int i = 0; i < mPrevWordsCount; i++) { + final WordInfo wordInfo = mPrevWordsInfo[i]; + builder.append("PrevWord["); + builder.append(i); + builder.append("]: "); + if (wordInfo == null) { + builder.append("null. "); + continue; + } + if (!wordInfo.isValid()) { + builder.append("Empty. "); + continue; + } + builder.append(wordInfo.mWord); + builder.append(", isBeginningOfSentence: "); + builder.append(wordInfo.mIsBeginningOfSentence); + builder.append(". "); + } + return builder.toString(); + } +} |