aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java46
-rw-r--r--java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java2
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java5
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java2
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java2
-rw-r--r--java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java171
6 files changed, 178 insertions, 50 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 708f75a06..2c7998688 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -25,6 +25,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.settings.NativeSuggestOptions;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.JniUtils;
+import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.UnigramProperty;
@@ -364,51 +365,6 @@ public final class BinaryDictionary extends Dictionary {
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
}
- public static class LanguageModelParam {
- public final String mTargetWord;
- public final int[] mWord0;
- public final int[] mWord1;
- // TODO: this needs to be a list of shortcuts
- public final int[] mShortcutTarget;
- public final int mUnigramProbability;
- public final int mBigramProbability;
- public final int mShortcutProbability;
- public final boolean mIsNotAWord;
- public final boolean mIsBlacklisted;
- public final int mTimestamp;
-
- // Constructor for unigram. TODO: support shortcuts
- public LanguageModelParam(final String word, final int unigramProbability,
- final int timestamp) {
- mTargetWord = word;
- mWord0 = null;
- mWord1 = StringUtils.toCodePointArray(word);
- mShortcutTarget = null;
- mUnigramProbability = unigramProbability;
- mBigramProbability = NOT_A_PROBABILITY;
- mShortcutProbability = NOT_A_PROBABILITY;
- mIsNotAWord = false;
- mIsBlacklisted = false;
- mTimestamp = timestamp;
- }
-
- // Constructor for unigram and bigram.
- public LanguageModelParam(final String word0, final String word1,
- final int unigramProbability, final int bigramProbability,
- final int timestamp) {
- mTargetWord = word1;
- mWord0 = StringUtils.toCodePointArray(word0);
- mWord1 = StringUtils.toCodePointArray(word1);
- mShortcutTarget = null;
- mUnigramProbability = unigramProbability;
- mBigramProbability = bigramProbability;
- mShortcutProbability = NOT_A_PROBABILITY;
- mIsNotAWord = false;
- mIsBlacklisted = false;
- mTimestamp = timestamp;
- }
- }
-
public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
if (!isValidDictionary()) return;
int processedParamCount = 0;
diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java
index 7e4f0e85c..5698384fc 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java
@@ -44,7 +44,7 @@ public class DictionaryFacilitatorForSuggest {
public static final String TAG = DictionaryFacilitatorForSuggest.class.getSimpleName();
private final Context mContext;
- private final Locale mLocale;
+ public final Locale mLocale;
private final ConcurrentHashMap<String, Dictionary> mDictionaries =
CollectionUtils.newConcurrentHashMap();
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index 7757d2910..ecef20efc 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -21,12 +21,12 @@ import android.util.Log;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
-import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.utils.AsyncResultHolder;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.FileUtils;
+import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
import java.io.File;
@@ -58,6 +58,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
private static final boolean DBG_STRESS_TEST = false;
private static final int TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS = 100;
+ private static final int TIMEOUT_FOR_READ_OPS_FOR_TESTS_IN_MILLISECONDS = 1000;
/**
* The maximum length of a word in this dictionary.
@@ -761,7 +762,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
}
}
});
- return holder.get(false, TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS);
+ return holder.get(false, TIMEOUT_FOR_READ_OPS_FOR_TESTS_IN_MILLISECONDS);
}
@UsedForTesting
diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
index 701c29023..cc57d13dc 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
@@ -20,13 +20,13 @@ import android.content.Context;
import android.util.Log;
import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java
index 3eb8f35a9..bc11db289 100644
--- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java
@@ -18,8 +18,8 @@ package com.android.inputmethod.latin.personalization;
import android.content.Context;
-import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
+import com.android.inputmethod.latin.utils.LanguageModelParam;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
new file mode 100644
index 000000000..a1d641508
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.utils;
+
+import android.util.Log;
+
+import com.android.inputmethod.latin.Dictionary;
+import com.android.inputmethod.latin.DictionaryFacilitatorForSuggest;
+import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
+
+import java.util.ArrayList;
+import java.util.Locale;
+
+// Note: this class is used as a parameter type of a native method. You should be careful when you
+// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
+public final class LanguageModelParam {
+ private static final String TAG = LanguageModelParam.class.getSimpleName();
+ private static final boolean DEBUG = false;
+ private static final boolean DEBUG_TOKEN = false;
+
+ // For now, these probability values are being referred to only when we add new entries to
+ // decaying dynamic binary dictionaries. When these are referred to, what matters is 0 or
+ // non-0. Thus, it's not meaningful to compare 10, 100, and so on.
+ // TODO: Revise the logic in ForgettingCurveUtils in native code.
+ private static final int UNIGRAM_PROBABILITY_FOR_VALID_WORD = 100;
+ private static final int UNIGRAM_PROBABILITY_FOR_OOV_WORD = 10;
+ private static final int BIGRAM_PROBABILITY_FOR_VALID_WORD = 0;
+ private static final int BIGRAM_PROBABILITY_FOR_OOV_WORD = 0;
+
+ public final String mTargetWord;
+ public final int[] mWord0;
+ public final int[] mWord1;
+ // TODO: this needs to be a list of shortcuts
+ public final int[] mShortcutTarget;
+ public final int mUnigramProbability;
+ public final int mBigramProbability;
+ public final int mShortcutProbability;
+ public final boolean mIsNotAWord;
+ public final boolean mIsBlacklisted;
+ // Time stamp in seconds.
+ public final int mTimestamp;
+
+ // Constructor for unigram. TODO: support shortcuts
+ public LanguageModelParam(final String word, final int unigramProbability,
+ final int timestamp) {
+ this(null /* word0 */, word, unigramProbability, Dictionary.NOT_A_PROBABILITY, timestamp);
+ }
+
+ // Constructor for unigram and bigram.
+ public LanguageModelParam(final String word0, final String word1,
+ final int unigramProbability, final int bigramProbability,
+ final int timestamp) {
+ mTargetWord = word1;
+ mWord0 = (word0 == null) ? null : StringUtils.toCodePointArray(word0);
+ mWord1 = StringUtils.toCodePointArray(word1);
+ mShortcutTarget = null;
+ mUnigramProbability = unigramProbability;
+ mBigramProbability = bigramProbability;
+ mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
+ mIsNotAWord = false;
+ mIsBlacklisted = false;
+ mTimestamp = timestamp;
+ }
+
+ // Process a list of words and return a list of {@link LanguageModelParam} objects.
+ public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
+ final ArrayList<String> tokens, final int timestamp,
+ final DictionaryFacilitatorForSuggest dictionaryFacilitator,
+ final SpacingAndPunctuations spacingAndPunctuations) {
+ final ArrayList<LanguageModelParam> languageModelParams =
+ CollectionUtils.newArrayList();
+ final int N = tokens.size();
+ String prevWord = null;
+ for (int i = 0; i < N; ++i) {
+ final String tempWord = tokens.get(i);
+ if (StringUtils.isEmptyStringOrWhiteSpaces(tempWord)) {
+ // just skip this token
+ if (DEBUG_TOKEN) {
+ Log.d(TAG, "--- isEmptyStringOrWhiteSpaces: \"" + tempWord + "\"");
+ }
+ continue;
+ }
+ if (!DictionaryInfoUtils.looksValidForDictionaryInsertion(
+ tempWord, spacingAndPunctuations)) {
+ if (DEBUG_TOKEN) {
+ Log.d(TAG, "--- not looksValidForDictionaryInsertion: \""
+ + tempWord + "\"");
+ }
+ // Sentence terminator found. Split.
+ prevWord = null;
+ continue;
+ }
+ if (DEBUG_TOKEN) {
+ Log.d(TAG, "--- word: \"" + tempWord + "\"");
+ }
+ final LanguageModelParam languageModelParam =
+ detectWhetherVaildWordOrNotAndGetLanguageModelParam(
+ prevWord, tempWord, timestamp, dictionaryFacilitator);
+ languageModelParams.add(languageModelParam);
+ prevWord = languageModelParam.mTargetWord;
+ }
+ return languageModelParams;
+ }
+
+ private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
+ final String prevWord, final String targetWord, final int timestamp,
+ final DictionaryFacilitatorForSuggest dictionaryFacilitator) {
+ final Locale locale = dictionaryFacilitator.mLocale;
+ if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) {
+ // OOV word.
+ return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
+ false /* isValidWord */, locale);
+ }
+ if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
+ return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
+ true /* isValidWord */, locale);
+ }
+ final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
+ if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
+ // Add the lower-cased word.
+ return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord,
+ timestamp, true /* isValidWord */, locale);
+ }
+ // Treat the word as an OOV word.
+ return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
+ false /* isValidWord */, locale);
+ }
+
+ private static LanguageModelParam createAndGetLanguageModelParamOfWord(
+ final String prevWord, final String targetWord, final int timestamp,
+ final boolean isValidWord, final Locale locale) {
+ final String word;
+ if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
+ && prevWord == null && !isValidWord) {
+ word = targetWord.toLowerCase(locale);
+ } else {
+ word = targetWord;
+ }
+ final int unigramProbability = isValidWord ?
+ UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
+ if (prevWord == null) {
+ if (DEBUG) {
+ Log.d(TAG, "--- add unigram: current("
+ + (isValidWord ? "Valid" : "OOV") + ") = " + word);
+ }
+ return new LanguageModelParam(word, unigramProbability, timestamp);
+ }
+ if (DEBUG) {
+ Log.d(TAG, "--- add bigram: prev = " + prevWord + ", current("
+ + (isValidWord ? "Valid" : "OOV") + ") = " + word);
+ }
+ final int bigramProbability = isValidWord ?
+ BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD;
+ return new LanguageModelParam(prevWord, word, unigramProbability,
+ bigramProbability, timestamp);
+ }
+}