aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java')
-rw-r--r--java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java166
1 files changed, 0 insertions, 166 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
deleted file mode 100644
index 3e5cb33ca..000000000
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.utils;
-
-import android.util.Log;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.Dictionary;
-import com.android.inputmethod.latin.NgramContext;
-import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
-import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-
-// Note: this class is used as a parameter type of a native method. You should be careful when you
-// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
-public final class LanguageModelParam {
- private static final String TAG = LanguageModelParam.class.getSimpleName();
- private static final boolean DEBUG = false;
- private static final boolean DEBUG_TOKEN = false;
-
- // For now, these probability values are being referred to only when we add new entries to
- // decaying dynamic binary dictionaries. When these are referred to, what matters is 0 or
- // non-0. Thus, it's not meaningful to compare 10, 100, and so on.
- // TODO: Revise the logic in ForgettingCurveUtils in native code.
- private static final int UNIGRAM_PROBABILITY_FOR_VALID_WORD = 100;
- private static final int UNIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;
- private static final int BIGRAM_PROBABILITY_FOR_VALID_WORD = 10;
- private static final int BIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;
-
- public final CharSequence mTargetWord;
- public final int[] mWord0;
- public final int[] mWord1;
- // TODO: this needs to be a list of shortcuts
- public final int[] mShortcutTarget;
- public final int mUnigramProbability;
- public final int mBigramProbability;
- public final int mShortcutProbability;
- public final boolean mIsNotAWord;
- public final boolean mIsPossiblyOffensive;
- // Time stamp in seconds.
- public final int mTimestamp;
-
- // Constructor for unigram. TODO: support shortcuts
- @UsedForTesting
- public LanguageModelParam(final CharSequence word, final int unigramProbability,
- final int timestamp) {
- this(null /* word0 */, word, unigramProbability, Dictionary.NOT_A_PROBABILITY, timestamp);
- }
-
- // Constructor for unigram and bigram.
- @UsedForTesting
- public LanguageModelParam(final CharSequence word0, final CharSequence word1,
- final int unigramProbability, final int bigramProbability,
- final int timestamp) {
- mTargetWord = word1;
- mWord0 = (word0 == null) ? null : StringUtils.toCodePointArray(word0);
- mWord1 = StringUtils.toCodePointArray(word1);
- mShortcutTarget = null;
- mUnigramProbability = unigramProbability;
- mBigramProbability = bigramProbability;
- mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
- mIsNotAWord = false;
- mIsPossiblyOffensive = false;
- mTimestamp = timestamp;
- }
-
- // Process a list of words and return a list of {@link LanguageModelParam} objects.
- public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
- final List<String> tokens, final int timestamp,
- final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
- final DistracterFilter distracterFilter) {
- final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
- final int N = tokens.size();
- NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
- for (int i = 0; i < N; ++i) {
- final String tempWord = tokens.get(i);
- if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
- // just skip this token
- if (DEBUG_TOKEN) {
- Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\"");
- }
- continue;
- }
- if (!DictionaryInfoUtils.looksValidForDictionaryInsertion(
- tempWord, spacingAndPunctuations)) {
- if (DEBUG_TOKEN) {
- Log.d(TAG, "--- not looksValidForDictionaryInsertion: \""
- + tempWord + "\"");
- }
- // Sentence terminator found. Split.
- ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
- continue;
- }
- if (DEBUG_TOKEN) {
- Log.d(TAG, "--- word: \"" + tempWord + "\"");
- }
- final LanguageModelParam languageModelParam =
- detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- ngramContext, tempWord, timestamp, locale, distracterFilter);
- if (languageModelParam == null) {
- continue;
- }
- languageModelParams.add(languageModelParam);
- ngramContext = ngramContext.getNextNgramContext(
- new NgramContext.WordInfo(tempWord));
- }
- return languageModelParams;
- }
-
- private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- final NgramContext ngramContext, final String targetWord, final int timestamp,
- final Locale locale, final DistracterFilter distracterFilter) {
- if (locale == null) {
- return null;
- }
- final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext,
- targetWord, locale);
- final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
- targetWord.toLowerCase(locale) : targetWord;
- if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) {
- // The word is a distracter.
- return null;
- }
- return createAndGetLanguageModelParamOfWord(ngramContext, word, timestamp,
- !HandlingType.shouldBeHandledAsOov(wordHandlingType));
- }
-
- private static LanguageModelParam createAndGetLanguageModelParamOfWord(
- final NgramContext ngramContext, final String word, final int timestamp,
- final boolean isValidWord) {
- final int unigramProbability = isValidWord ?
- UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
- if (!ngramContext.isValid()) {
- if (DEBUG) {
- Log.d(TAG, "--- add unigram: current("
- + (isValidWord ? "Valid" : "OOV") + ") = " + word);
- }
- return new LanguageModelParam(word, unigramProbability, timestamp);
- }
- if (DEBUG) {
- Log.d(TAG, "--- add bigram: prev = " + ngramContext + ", current("
- + (isValidWord ? "Valid" : "OOV") + ") = " + word);
- }
- final int bigramProbability = isValidWord ?
- BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD;
- return new LanguageModelParam(ngramContext.getNthPrevWord(1 /* n */), word,
- unigramProbability, bigramProbability, timestamp);
- }
-}