aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/research/MainLogBuffer.java
diff options
context:
space:
mode:
authorKurt Partridge <kep@google.com>2013-04-15 18:41:59 -0700
committerKurt Partridge <kep@google.com>2013-04-19 08:55:39 -0700
commite92b5e145f74808ff778a42dc5ba979aa27343ca (patch)
tree629dd787ce4ffd4bbd763edad6d5252b9ca1ab17 /java/src/com/android/inputmethod/research/MainLogBuffer.java
parent56f35a10cde1beeea51d99427992d832fa2de2bb (diff)
downloadlatinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.gz
latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.xz
latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.zip
Allow LogUnits to hold >1 word
LogUnits have been annotated with the autocorrected words, but until now this was assumed to be a single word without spaces. But spaceless typing can result in spaces in the LogUnit label. With this change, the LogUnit inspects the autocorrected text to determine how many words were inserted, and counts them accurately. This change corrects a privacy problem, which was that if the word sampling algorithm chose a LogUnit that actually contained multiple words, then more than two successive words would be included in the log. Change-Id: I7c01c3dd3ac33d7e96c00836256bae9c14b124ed
Diffstat (limited to 'java/src/com/android/inputmethod/research/MainLogBuffer.java')
-rw-r--r--java/src/com/android/inputmethod/research/MainLogBuffer.java44
1 files changed, 23 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
index cd4c1db6e..42ef5d3b6 100644
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
- final String word = logUnit.getWord();
- if (word != null) {
- numWordsInLogUnitList++;
- }
+ numWordsInLogUnitList += logUnit.getNumWords();
}
return numWordsInLogUnitList >= minNGramSize;
}
@@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
// the complete buffer contents in detail.
int numWordsInLogUnitList = 0;
final int length = logUnits.size();
- for (int i = 0; i < length; i++) {
- final LogUnit logUnit = logUnits.get(i);
- if (!logUnit.hasWord()) {
+ for (final LogUnit logUnit : logUnits) {
+ if (!logUnit.hasOneOrMoreWords()) {
// Digits outside words are a privacy threat.
if (logUnit.mayContainDigit()) {
return false;
}
} else {
- numWordsInLogUnitList++;
- final String word = logUnit.getWord();
- // Words not in the dictionary are a privacy threat.
- if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
- if (DEBUG) {
- Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word)
- + ", isValid: " + (dictionary.isValidWord(word)));
+ numWordsInLogUnitList += logUnit.getNumWords();
+ final String[] words = logUnit.getWordsAsStringArray();
+ for (final String word : words) {
+ // Words not in the dictionary are a privacy threat.
+ if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
+ if (DEBUG) {
+ Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
+ + ResearchLogger.hasLetters(word)
+ + ", isValid: " + (dictionary.isValidWord(word)));
+ }
+ return false;
}
- return false;
}
}
}
- // Finally, only return true if the minNGramSize is met.
- return numWordsInLogUnitList >= minNGramSize;
+ // Finally, only return true if the ngram is the right size.
+ return numWordsInLogUnitList == minNGramSize;
}
public void shiftAndPublishAll() {
@@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
shiftOutWords(N_GRAM_SIZE);
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
} else {
- // No good n-gram at front, and buffer is full. Shift out the first word (or if there
- // is none, the existing logUnits).
- logUnits = peekAtFirstNWords(1);
+ // No good n-gram at front, and buffer is full. Shift out up through the first logUnit
+ // with associated words (or if there is none, all the existing logUnits).
+ logUnits.clear();
+ for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords();
+ logUnit = shiftOut()) {
+ logUnits.add(logUnit);
+ }
publish(logUnits, false /* canIncludePrivateData */);
- shiftOutWords(1);
}
}