aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/utils
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils')
-rw-r--r--java/src/com/android/inputmethod/latin/utils/DistracterFilter.java11
-rw-r--r--java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java166
-rw-r--r--java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java9
-rw-r--r--java/src/com/android/inputmethod/latin/utils/ResourceUtils.java2
-rw-r--r--java/src/com/android/inputmethod/latin/utils/StringUtils.java6
-rw-r--r--java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java117
6 files changed, 134 insertions, 177 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
index 355d00dac..525212c96 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
@@ -16,14 +16,16 @@
package com.android.inputmethod.latin.utils;
-import java.util.List;
-import java.util.Locale;
-
import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.NgramContext;
+import java.util.List;
+import java.util.Locale;
+
+import javax.annotation.Nonnull;
+
public interface DistracterFilter {
/**
* Determine whether a word is a distracter to words in dictionaries.
@@ -68,8 +70,9 @@ public interface DistracterFilter {
public static boolean shouldBeHandledAsOov(final int handlingType) {
return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0;
}
- };
+ }
+ @Nonnull
public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() {
@Override
public boolean isDistracterToWordsInDictionaries(NgramContext ngramContext,
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
deleted file mode 100644
index 3e5cb33ca..000000000
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.utils;
-
-import android.util.Log;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.Dictionary;
-import com.android.inputmethod.latin.NgramContext;
-import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
-import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-
-// Note: this class is used as a parameter type of a native method. You should be careful when you
-// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
-public final class LanguageModelParam {
- private static final String TAG = LanguageModelParam.class.getSimpleName();
- private static final boolean DEBUG = false;
- private static final boolean DEBUG_TOKEN = false;
-
- // For now, these probability values are being referred to only when we add new entries to
- // decaying dynamic binary dictionaries. When these are referred to, what matters is 0 or
- // non-0. Thus, it's not meaningful to compare 10, 100, and so on.
- // TODO: Revise the logic in ForgettingCurveUtils in native code.
- private static final int UNIGRAM_PROBABILITY_FOR_VALID_WORD = 100;
- private static final int UNIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;
- private static final int BIGRAM_PROBABILITY_FOR_VALID_WORD = 10;
- private static final int BIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;
-
- public final CharSequence mTargetWord;
- public final int[] mWord0;
- public final int[] mWord1;
- // TODO: this needs to be a list of shortcuts
- public final int[] mShortcutTarget;
- public final int mUnigramProbability;
- public final int mBigramProbability;
- public final int mShortcutProbability;
- public final boolean mIsNotAWord;
- public final boolean mIsPossiblyOffensive;
- // Time stamp in seconds.
- public final int mTimestamp;
-
- // Constructor for unigram. TODO: support shortcuts
- @UsedForTesting
- public LanguageModelParam(final CharSequence word, final int unigramProbability,
- final int timestamp) {
- this(null /* word0 */, word, unigramProbability, Dictionary.NOT_A_PROBABILITY, timestamp);
- }
-
- // Constructor for unigram and bigram.
- @UsedForTesting
- public LanguageModelParam(final CharSequence word0, final CharSequence word1,
- final int unigramProbability, final int bigramProbability,
- final int timestamp) {
- mTargetWord = word1;
- mWord0 = (word0 == null) ? null : StringUtils.toCodePointArray(word0);
- mWord1 = StringUtils.toCodePointArray(word1);
- mShortcutTarget = null;
- mUnigramProbability = unigramProbability;
- mBigramProbability = bigramProbability;
- mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
- mIsNotAWord = false;
- mIsPossiblyOffensive = false;
- mTimestamp = timestamp;
- }
-
- // Process a list of words and return a list of {@link LanguageModelParam} objects.
- public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
- final List<String> tokens, final int timestamp,
- final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
- final DistracterFilter distracterFilter) {
- final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
- final int N = tokens.size();
- NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
- for (int i = 0; i < N; ++i) {
- final String tempWord = tokens.get(i);
- if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
- // just skip this token
- if (DEBUG_TOKEN) {
- Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\"");
- }
- continue;
- }
- if (!DictionaryInfoUtils.looksValidForDictionaryInsertion(
- tempWord, spacingAndPunctuations)) {
- if (DEBUG_TOKEN) {
- Log.d(TAG, "--- not looksValidForDictionaryInsertion: \""
- + tempWord + "\"");
- }
- // Sentence terminator found. Split.
- ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
- continue;
- }
- if (DEBUG_TOKEN) {
- Log.d(TAG, "--- word: \"" + tempWord + "\"");
- }
- final LanguageModelParam languageModelParam =
- detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- ngramContext, tempWord, timestamp, locale, distracterFilter);
- if (languageModelParam == null) {
- continue;
- }
- languageModelParams.add(languageModelParam);
- ngramContext = ngramContext.getNextNgramContext(
- new NgramContext.WordInfo(tempWord));
- }
- return languageModelParams;
- }
-
- private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- final NgramContext ngramContext, final String targetWord, final int timestamp,
- final Locale locale, final DistracterFilter distracterFilter) {
- if (locale == null) {
- return null;
- }
- final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext,
- targetWord, locale);
- final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
- targetWord.toLowerCase(locale) : targetWord;
- if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) {
- // The word is a distracter.
- return null;
- }
- return createAndGetLanguageModelParamOfWord(ngramContext, word, timestamp,
- !HandlingType.shouldBeHandledAsOov(wordHandlingType));
- }
-
- private static LanguageModelParam createAndGetLanguageModelParamOfWord(
- final NgramContext ngramContext, final String word, final int timestamp,
- final boolean isValidWord) {
- final int unigramProbability = isValidWord ?
- UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
- if (!ngramContext.isValid()) {
- if (DEBUG) {
- Log.d(TAG, "--- add unigram: current("
- + (isValidWord ? "Valid" : "OOV") + ") = " + word);
- }
- return new LanguageModelParam(word, unigramProbability, timestamp);
- }
- if (DEBUG) {
- Log.d(TAG, "--- add bigram: prev = " + ngramContext + ", current("
- + (isValidWord ? "Valid" : "OOV") + ") = " + word);
- }
- final int bigramProbability = isValidWord ?
- BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD;
- return new LanguageModelParam(ngramContext.getNthPrevWord(1 /* n */), word,
- unigramProbability, bigramProbability, timestamp);
- }
-}
diff --git a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
index 95a1f0fb2..ba436777d 100644
--- a/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java
@@ -16,14 +16,16 @@
package com.android.inputmethod.latin.utils;
-import java.util.Arrays;
-import java.util.regex.Pattern;
-
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.NgramContext;
import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nonnull;
+
public final class NgramContextUtils {
private NgramContextUtils() {
// Intentional empty constructor for utility class.
@@ -52,6 +54,7 @@ public final class NgramContextUtils {
// (n = 2) "abc|" -> beginning-of-sentence
// (n = 2) "abc |" -> beginning-of-sentence
// (n = 2) "abc. def|" -> beginning-of-sentence
+ @Nonnull
public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
diff --git a/java/src/com/android/inputmethod/latin/utils/ResourceUtils.java b/java/src/com/android/inputmethod/latin/utils/ResourceUtils.java
index 093c5a6c1..d1fc642f3 100644
--- a/java/src/com/android/inputmethod/latin/utils/ResourceUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/ResourceUtils.java
@@ -110,7 +110,6 @@ public final class ResourceUtils {
* are true for the specified key value pairs.
*
* For example, "condition,constant" has the following format.
- * (See {@link ResourceUtilsTests#testFindConstantForKeyValuePairsRegexp()})
* - HARDWARE=mako,constantForNexus4
* - MODEL=Nexus 4:MANUFACTURER=LGE,constantForNexus4
* - ,defaultConstant
@@ -119,6 +118,7 @@ public final class ResourceUtils {
* @param conditionConstantArray an array of "condition,constant" elements to be searched.
* @return the constant part of the matched "condition,constant" element. Returns null if no
* condition matches.
+ * @see com.android.inputmethod.latin.utils.ResourceUtilsTests#testFindConstantForKeyValuePairsRegexp()
*/
@UsedForTesting
static String findConstantForKeyValuePairs(final HashMap<String, String> keyValuePairs,
diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
index bbcef990d..bc068ac53 100644
--- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
@@ -521,12 +521,12 @@ public final class StringUtils {
* {@code charSequence.toString().split(regex, preserveTrailingEmptySegments ? -1 : 0)}
* except that the spans are preserved in the result array.
* </p>
- * @param input the character sequence to be split.
+ * @param charSequence the character sequence to be split.
* @param regex the regex pattern to be used as the separator.
* @param preserveTrailingEmptySegments {@code true} to preserve the trailing empty
* segments. Otherwise, trailing empty segments will be removed before being returned.
- * @return the array which contains the result. All the spans in the {@param input} is
- * preserved.
+ * @return the array which contains the result. All the spans in the <code>charSequence</code>
+ * is preserved.
*/
@UsedForTesting
public static CharSequence[] split(final CharSequence charSequence, final String regex,
diff --git a/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java b/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java
new file mode 100644
index 000000000..644fda57f
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.utils;
+
+import android.util.Log;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.Constants;
+import com.android.inputmethod.latin.NgramContext;
+import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
+import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+// Note: this class is used as a parameter type of a native method. You should be careful when you
+// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
+public final class WordInputEventForPersonalization {
+ private static final String TAG = WordInputEventForPersonalization.class.getSimpleName();
+ private static final boolean DEBUG_TOKEN = false;
+
+ public final int[] mTargetWord;
+ public final int mPrevWordsCount;
+ public final int[][] mPrevWordArray = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
+ public final boolean[] mIsPrevWordBeginningOfSentenceArray =
+ new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ public final boolean mIsValid;
+ // Time stamp in seconds.
+ public final int mTimestamp;
+
+ @UsedForTesting
+ public WordInputEventForPersonalization(final CharSequence targetWord,
+ final NgramContext ngramContext, final boolean isValid, final int timestamp) {
+ mTargetWord = StringUtils.toCodePointArray(targetWord);
+ mPrevWordsCount = ngramContext.getPrevWordCount();
+ ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray);
+ mIsValid = isValid;
+ mTimestamp = timestamp;
+ }
+
+ // Process a list of words and return a list of {@link WordInputEventForPersonalization}
+ // objects.
+ public static ArrayList<WordInputEventForPersonalization> createInputEventFrom(
+ final List<String> tokens, final int timestamp,
+ final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
+ final DistracterFilter distracterFilter) {
+ final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>();
+ final int N = tokens.size();
+ NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
+ for (int i = 0; i < N; ++i) {
+ final String tempWord = tokens.get(i);
+ if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
+ // just skip this token
+ if (DEBUG_TOKEN) {
+ Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\"");
+ }
+ continue;
+ }
+ if (!DictionaryInfoUtils.looksValidForDictionaryInsertion(
+ tempWord, spacingAndPunctuations)) {
+ if (DEBUG_TOKEN) {
+ Log.d(TAG, "--- not looksValidForDictionaryInsertion: \""
+ + tempWord + "\"");
+ }
+ // Sentence terminator found. Split.
+ // TODO: Detect whether the context is beginning-of-sentence.
+ ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
+ continue;
+ }
+ if (DEBUG_TOKEN) {
+ Log.d(TAG, "--- word: \"" + tempWord + "\"");
+ }
+ final WordInputEventForPersonalization inputEvent =
+ detectWhetherVaildWordOrNotAndGetInputEvent(
+ ngramContext, tempWord, timestamp, locale, distracterFilter);
+ if (inputEvent == null) {
+ continue;
+ }
+ inputEvents.add(inputEvent);
+ ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord));
+ }
+ return inputEvents;
+ }
+
+ private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent(
+ final NgramContext ngramContext, final String targetWord, final int timestamp,
+ final Locale locale, final DistracterFilter distracterFilter) {
+ if (locale == null) {
+ return null;
+ }
+ final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext,
+ targetWord, locale);
+ final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
+ targetWord.toLowerCase(locale) : targetWord;
+ if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) {
+ // The word is a distracter.
+ return null;
+ }
+ return new WordInputEventForPersonalization(word, ngramContext,
+ !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp);
+ }
+}