diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/personalization')
-rw-r--r-- | java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java | 8 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java | 35 |
2 files changed, 28 insertions, 15 deletions
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java index 9d72de8c5..734ed5583 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java @@ -18,20 +18,22 @@ package com.android.inputmethod.latin.personalization; import java.util.Collections; import java.util.List; -import java.util.Locale; public class PersonalizationDataChunk { + public static final String LANGUAGE_UNKNOWN = ""; + public final boolean mInputByUser; public final List<String> mTokens; public final int mTimestampInSeconds; public final String mPackageName; - public final Locale mlocale = null; + public final String mDetectedLanguage; public PersonalizationDataChunk(boolean inputByUser, final List<String> tokens, - final int timestampInSeconds, final String packageName) { + final int timestampInSeconds, final String packageName, final String detectedLanguage) { mInputByUser = inputByUser; mTokens = Collections.unmodifiableList(tokens); mTimestampInSeconds = timestampInSeconds; mPackageName = packageName; + mDetectedLanguage = detectedLanguage; } } diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java index 34d4d4ed7..d1486f630 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java @@ -35,6 +35,7 @@ import java.util.Locale; */ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase { /* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName(); + private final static int SUPPORTED_NGRAM = 2; // TODO: 3 // TODO: Make this constructor private /* package */ UserHistoryDictionary(final Context context, final Locale locale) { @@ -61,9 +62,7 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary, final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid, final int timestamp, final DistracterFilter distracterFilter) { - final CharSequence prevWord = prevWordsInfo.mPrevWordsInfo[0].mWord; - if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH || - (prevWord != null && prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) { + if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH) { return; } final int frequency = isValid ? @@ -71,17 +70,29 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency, null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */, false /* isBlacklisted */, timestamp, distracterFilter); - // Do not insert a word as a bigram of itself - if (TextUtils.equals(word, prevWord)) { - return; - } - if (null != prevWord) { - if (prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence) { - // Beginning-of-Sentence n-gram entry is treated as a n-gram entry of invalid word. - userHistoryDictionary.addNgramEntry(prevWordsInfo, word, + + final boolean isBeginningOfSentenceContext = + prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence; + final PrevWordsInfo prevWordsInfoToBeSaved = + prevWordsInfo.getTrimmedPrevWordsInfo(SUPPORTED_NGRAM - 1); + for (int i = 0; i < prevWordsInfoToBeSaved.getPrevWordCount(); i++) { + final CharSequence prevWord = prevWordsInfoToBeSaved.mPrevWordsInfo[i].mWord; + if (prevWord == null || (prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) { + return; + } + // Do not insert a word as a bigram of itself + if (i == 0 && TextUtils.equals(word, prevWord)) { + return; + } + if (isBeginningOfSentenceContext) { + // Beginning-of-Sentence n-gram entry is added as an n-gram entry of an OOV word. + userHistoryDictionary.addNgramEntry( + prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp); } else { - userHistoryDictionary.addNgramEntry(prevWordsInfo, word, frequency, timestamp); + userHistoryDictionary.addNgramEntry( + prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, frequency, + timestamp); } } } |