aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/NgramContext.java
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-09-29 10:52:18 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-09-29 10:52:18 +0900
commitbb0eca57054758ef17b032d2654c1fc5f6b32101 (patch)
tree25a22c28dad09d7e2bdbbb95098ca54de67aced7 /java/src/com/android/inputmethod/latin/NgramContext.java
parentbbd6a26be025bc419e342e32d86629c4ebd68dd8 (diff)
downloadlatinime-bb0eca57054758ef17b032d2654c1fc5f6b32101.tar.gz
latinime-bb0eca57054758ef17b032d2654c1fc5f6b32101.tar.xz
latinime-bb0eca57054758ef17b032d2654c1fc5f6b32101.zip
Rename PrevWordsInfo to NgramContext.
Bug: 14425059 Change-Id: Id06a71681fa8b5e589e29fba10fe5c1cfed66984
Diffstat (limited to 'java/src/com/android/inputmethod/latin/NgramContext.java')
-rw-r--r--java/src/com/android/inputmethod/latin/NgramContext.java229
1 files changed, 229 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/NgramContext.java b/java/src/com/android/inputmethod/latin/NgramContext.java
new file mode 100644
index 000000000..c35c6e2c8
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/NgramContext.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.text.TextUtils;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.utils.StringUtils;
+
+import java.util.Arrays;
+
+/**
+ * Class to represent information of previous words. This class is used to add n-gram entries
+ * into binary dictionaries, to get predictions, and to get suggestions.
+ */
+public class NgramContext {
+ public static final NgramContext EMPTY_PREV_WORDS_INFO =
+ new NgramContext(WordInfo.EMPTY_WORD_INFO);
+ public static final NgramContext BEGINNING_OF_SENTENCE =
+ new NgramContext(WordInfo.BEGINNING_OF_SENTENCE);
+
+ /**
+ * Word information used to represent previous words information.
+ */
+ public static class WordInfo {
+ public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
+ public static final WordInfo BEGINNING_OF_SENTENCE = new WordInfo();
+
+ // This is an empty char sequence when mIsBeginningOfSentence is true.
+ public final CharSequence mWord;
+ // TODO: Have sentence separator.
+ // Whether the current context is beginning of sentence or not. This is true when composing
+ // at the beginning of an input field or composing a word after a sentence separator.
+ public final boolean mIsBeginningOfSentence;
+
+ // Beginning of sentence.
+ public WordInfo() {
+ mWord = "";
+ mIsBeginningOfSentence = true;
+ }
+
+ public WordInfo(final CharSequence word) {
+ mWord = word;
+ mIsBeginningOfSentence = false;
+ }
+
+ public boolean isValid() {
+ return mWord != null;
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof WordInfo)) return false;
+ final WordInfo wordInfo = (WordInfo)o;
+ if (mWord == null || wordInfo.mWord == null) {
+ return mWord == wordInfo.mWord
+ && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
+ }
+ return TextUtils.equals(mWord, wordInfo.mWord)
+ && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
+ }
+ }
+
+ // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
+ // have any context for that previous word including the "beginning of sentence context" - we
+ // just don't know what to predict using the information. An example of that is after a comma.
+ // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
+ // WordComposer was reset and before starting a new composing word, but we should never be
+ // calling getSuggetions* in this situation.
+ private final WordInfo[] mPrevWordsInfo;
+ private final int mPrevWordsCount;
+
+ // Construct from the previous word information.
+ public NgramContext(final WordInfo... prevWordsInfo) {
+ mPrevWordsInfo = prevWordsInfo;
+ mPrevWordsCount = prevWordsInfo.length;
+ }
+
+ // Construct from WordInfo array and size. The caller shouldn't change prevWordsInfo after
+ // calling this method.
+ private NgramContext(final NgramContext ngramContext, final int prevWordsCount) {
+ if (ngramContext.mPrevWordsCount < prevWordsCount) {
+ throw new IndexOutOfBoundsException("ngramContext.mPrevWordsCount ("
+ + ngramContext.mPrevWordsCount + ") is smaller than prevWordsCount ("
+ + prevWordsCount + ")");
+ }
+ mPrevWordsInfo = ngramContext.mPrevWordsInfo;
+ mPrevWordsCount = prevWordsCount;
+ }
+
+ // Create next prevWordsInfo using current prevWordsInfo.
+ public NgramContext getNextNgramContext(final WordInfo wordInfo) {
+ final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM,
+ mPrevWordsCount + 1);
+ final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
+ prevWordsInfo[0] = wordInfo;
+ System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
+ return new NgramContext(prevWordsInfo);
+ }
+
+ public boolean isValid() {
+ return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
+ }
+
+ public boolean isBeginningOfSentenceContext() {
+ return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
+ }
+
+ // n is 1-indexed.
+ // TODO: Remove
+ public CharSequence getNthPrevWord(final int n) {
+ if (n <= 0 || n > mPrevWordsCount) {
+ return null;
+ }
+ return mPrevWordsInfo[n - 1].mWord;
+ }
+
+ // n is 1-indexed.
+ @UsedForTesting
+ public boolean isNthPrevWordBeginningOfSontence(final int n) {
+ if (n <= 0 || n > mPrevWordsCount) {
+ return false;
+ }
+ return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
+ }
+
+ public void outputToArray(final int[][] codePointArrays,
+ final boolean[] isBeginningOfSentenceArray) {
+ for (int i = 0; i < mPrevWordsCount; i++) {
+ final WordInfo wordInfo = mPrevWordsInfo[i];
+ if (wordInfo == null || !wordInfo.isValid()) {
+ codePointArrays[i] = new int[0];
+ isBeginningOfSentenceArray[i] = false;
+ continue;
+ }
+ codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
+ isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
+ }
+ }
+
+ public NgramContext getTrimmedNgramContext(final int maxPrevWordCount) {
+ final int newSize = Math.min(maxPrevWordCount, mPrevWordsCount);
+ return new NgramContext(this /* prevWordsInfo */, newSize);
+ }
+
+ public int getPrevWordCount() {
+ return mPrevWordsCount;
+ }
+
+ @Override
+ public int hashCode() {
+ // Just for having equals().
+ return mPrevWordsInfo[0].hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof NgramContext)) return false;
+ final NgramContext prevWordsInfo = (NgramContext)o;
+
+ final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
+ for (int i = 0; i < minLength; i++) {
+ if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
+ return false;
+ }
+ }
+ final WordInfo[] longerWordsInfo;
+ final int longerWordsInfoCount;
+ if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
+ longerWordsInfo = mPrevWordsInfo;
+ longerWordsInfoCount = mPrevWordsCount;
+ } else {
+ longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
+ longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
+ }
+ for (int i = minLength; i < longerWordsInfoCount; i++) {
+ if (longerWordsInfo[i] != null
+ && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuffer builder = new StringBuffer();
+ for (int i = 0; i < mPrevWordsCount; i++) {
+ final WordInfo wordInfo = mPrevWordsInfo[i];
+ builder.append("PrevWord[");
+ builder.append(i);
+ builder.append("]: ");
+ if (wordInfo == null) {
+ builder.append("null. ");
+ continue;
+ }
+ if (!wordInfo.isValid()) {
+ builder.append("Empty. ");
+ continue;
+ }
+ builder.append(wordInfo.mWord);
+ builder.append(", isBeginningOfSentence: ");
+ builder.append(wordInfo.mIsBeginningOfSentence);
+ builder.append(". ");
+ }
+ return builder.toString();
+ }
+}