aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-09-23 10:18:38 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-09-23 10:18:41 +0000
commit9997853d6fa5b17402c0a8a490fa3bafc0e58933 (patch)
tree98284a28675b8dcc0ad8b544e4894ad3291b5d4c /java/src
parente2e48eb9b341f4a0f62443cf031009b895c881e4 (diff)
parentddfaeff544c77b1d42857ebf34ca2545e8868e13 (diff)
downloadlatinime-9997853d6fa5b17402c0a8a490fa3bafc0e58933.tar.gz
latinime-9997853d6fa5b17402c0a8a490fa3bafc0e58933.tar.xz
latinime-9997853d6fa5b17402c0a8a490fa3bafc0e58933.zip
Merge "Prepare supporting n-gram for user history dictionary."
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/PrevWordsInfo.java7
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java35
2 files changed, 30 insertions, 12 deletions
diff --git a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
index 76d4f57da..1b7e8f96d 100644
--- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
+++ b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
@@ -126,6 +126,13 @@ public class PrevWordsInfo {
}
}
+ public PrevWordsInfo getTrimmedPrevWordsInfo(final int maxPrevWordCount) {
+ final int newSize = Math.min(maxPrevWordCount, mPrevWordsInfo.length);
+ // TODO: Quit creating a new array.
+ final WordInfo[] prevWordsInfo = Arrays.copyOf(mPrevWordsInfo, newSize);
+ return new PrevWordsInfo(prevWordsInfo);
+ }
+
public int getPrevWordCount() {
return mPrevWordsInfo.length;
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
index 34d4d4ed7..d1486f630 100644
--- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
@@ -35,6 +35,7 @@ import java.util.Locale;
*/
public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
/* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName();
+ private final static int SUPPORTED_NGRAM = 2; // TODO: 3
// TODO: Make this constructor private
/* package */ UserHistoryDictionary(final Context context, final Locale locale) {
@@ -61,9 +62,7 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid,
final int timestamp, final DistracterFilter distracterFilter) {
- final CharSequence prevWord = prevWordsInfo.mPrevWordsInfo[0].mWord;
- if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH ||
- (prevWord != null && prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
+ if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH) {
return;
}
final int frequency = isValid ?
@@ -71,17 +70,29 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency,
null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, timestamp, distracterFilter);
- // Do not insert a word as a bigram of itself
- if (TextUtils.equals(word, prevWord)) {
- return;
- }
- if (null != prevWord) {
- if (prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence) {
- // Beginning-of-Sentence n-gram entry is treated as a n-gram entry of invalid word.
- userHistoryDictionary.addNgramEntry(prevWordsInfo, word,
+
+ final boolean isBeginningOfSentenceContext =
+ prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence;
+ final PrevWordsInfo prevWordsInfoToBeSaved =
+ prevWordsInfo.getTrimmedPrevWordsInfo(SUPPORTED_NGRAM - 1);
+ for (int i = 0; i < prevWordsInfoToBeSaved.getPrevWordCount(); i++) {
+ final CharSequence prevWord = prevWordsInfoToBeSaved.mPrevWordsInfo[i].mWord;
+ if (prevWord == null || (prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
+ return;
+ }
+ // Do not insert a word as a bigram of itself
+ if (i == 0 && TextUtils.equals(word, prevWord)) {
+ return;
+ }
+ if (isBeginningOfSentenceContext) {
+ // Beginning-of-Sentence n-gram entry is added as an n-gram entry of an OOV word.
+ userHistoryDictionary.addNgramEntry(
+ prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word,
FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp);
} else {
- userHistoryDictionary.addNgramEntry(prevWordsInfo, word, frequency, timestamp);
+ userHistoryDictionary.addNgramEntry(
+ prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, frequency,
+ timestamp);
}
}
}