diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java | 166 |
1 files changed, 0 insertions, 166 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java deleted file mode 100644 index 3e5cb33ca..000000000 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.NgramContext; -import com.android.inputmethod.latin.settings.SpacingAndPunctuations; -import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; - -import java.util.ArrayList; -import java.util.List; -import java.util.Locale; - -// Note: this class is used as a parameter type of a native method. You should be careful when you -// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative(). -public final class LanguageModelParam { - private static final String TAG = LanguageModelParam.class.getSimpleName(); - private static final boolean DEBUG = false; - private static final boolean DEBUG_TOKEN = false; - - // For now, these probability values are being referred to only when we add new entries to - // decaying dynamic binary dictionaries. When these are referred to, what matters is 0 or - // non-0. Thus, it's not meaningful to compare 10, 100, and so on. - // TODO: Revise the logic in ForgettingCurveUtils in native code. - private static final int UNIGRAM_PROBABILITY_FOR_VALID_WORD = 100; - private static final int UNIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY; - private static final int BIGRAM_PROBABILITY_FOR_VALID_WORD = 10; - private static final int BIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY; - - public final CharSequence mTargetWord; - public final int[] mWord0; - public final int[] mWord1; - // TODO: this needs to be a list of shortcuts - public final int[] mShortcutTarget; - public final int mUnigramProbability; - public final int mBigramProbability; - public final int mShortcutProbability; - public final boolean mIsNotAWord; - public final boolean mIsPossiblyOffensive; - // Time stamp in seconds. - public final int mTimestamp; - - // Constructor for unigram. TODO: support shortcuts - @UsedForTesting - public LanguageModelParam(final CharSequence word, final int unigramProbability, - final int timestamp) { - this(null /* word0 */, word, unigramProbability, Dictionary.NOT_A_PROBABILITY, timestamp); - } - - // Constructor for unigram and bigram. - @UsedForTesting - public LanguageModelParam(final CharSequence word0, final CharSequence word1, - final int unigramProbability, final int bigramProbability, - final int timestamp) { - mTargetWord = word1; - mWord0 = (word0 == null) ? null : StringUtils.toCodePointArray(word0); - mWord1 = StringUtils.toCodePointArray(word1); - mShortcutTarget = null; - mUnigramProbability = unigramProbability; - mBigramProbability = bigramProbability; - mShortcutProbability = Dictionary.NOT_A_PROBABILITY; - mIsNotAWord = false; - mIsPossiblyOffensive = false; - mTimestamp = timestamp; - } - - // Process a list of words and return a list of {@link LanguageModelParam} objects. - public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( - final List<String> tokens, final int timestamp, - final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, - final DistracterFilter distracterFilter) { - final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>(); - final int N = tokens.size(); - NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; - for (int i = 0; i < N; ++i) { - final String tempWord = tokens.get(i); - if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) { - // just skip this token - if (DEBUG_TOKEN) { - Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\""); - } - continue; - } - if (!DictionaryInfoUtils.looksValidForDictionaryInsertion( - tempWord, spacingAndPunctuations)) { - if (DEBUG_TOKEN) { - Log.d(TAG, "--- not looksValidForDictionaryInsertion: \"" - + tempWord + "\""); - } - // Sentence terminator found. Split. - ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; - continue; - } - if (DEBUG_TOKEN) { - Log.d(TAG, "--- word: \"" + tempWord + "\""); - } - final LanguageModelParam languageModelParam = - detectWhetherVaildWordOrNotAndGetLanguageModelParam( - ngramContext, tempWord, timestamp, locale, distracterFilter); - if (languageModelParam == null) { - continue; - } - languageModelParams.add(languageModelParam); - ngramContext = ngramContext.getNextNgramContext( - new NgramContext.WordInfo(tempWord)); - } - return languageModelParams; - } - - private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( - final NgramContext ngramContext, final String targetWord, final int timestamp, - final Locale locale, final DistracterFilter distracterFilter) { - if (locale == null) { - return null; - } - final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext, - targetWord, locale); - final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? - targetWord.toLowerCase(locale) : targetWord; - if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) { - // The word is a distracter. - return null; - } - return createAndGetLanguageModelParamOfWord(ngramContext, word, timestamp, - !HandlingType.shouldBeHandledAsOov(wordHandlingType)); - } - - private static LanguageModelParam createAndGetLanguageModelParamOfWord( - final NgramContext ngramContext, final String word, final int timestamp, - final boolean isValidWord) { - final int unigramProbability = isValidWord ? - UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD; - if (!ngramContext.isValid()) { - if (DEBUG) { - Log.d(TAG, "--- add unigram: current(" - + (isValidWord ? "Valid" : "OOV") + ") = " + word); - } - return new LanguageModelParam(word, unigramProbability, timestamp); - } - if (DEBUG) { - Log.d(TAG, "--- add bigram: prev = " + ngramContext + ", current(" - + (isValidWord ? "Valid" : "OOV") + ") = " + word); - } - final int bigramProbability = isValidWord ? - BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD; - return new LanguageModelParam(ngramContext.getNthPrevWord(1 /* n */), word, - unigramProbability, bigramProbability, timestamp); - } -} |