diff options
author | 2013-04-15 18:41:59 -0700 | |
---|---|---|
committer | 2013-04-19 08:55:39 -0700 | |
commit | e92b5e145f74808ff778a42dc5ba979aa27343ca (patch) | |
tree | 629dd787ce4ffd4bbd763edad6d5252b9ca1ab17 /java/src/com/android/inputmethod/research/MainLogBuffer.java | |
parent | 56f35a10cde1beeea51d99427992d832fa2de2bb (diff) | |
download | latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.gz latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.xz latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.zip |
Allow LogUnits to hold >1 word
LogUnits have been annotated with the autocorrected words, but
until now this was assumed to be a single word without spaces.
But spaceless typing can result in spaces in the LogUnit label. With this
change, the LogUnit inspects the autocorrected text to determine how many
words were inserted, and counts them accurately.
This change corrects a privacy problem, which was that if the word sampling
algorithm chose a LogUnit that actually contained multiple words, then more
than two successive words would be included in the log.
Change-Id: I7c01c3dd3ac33d7e96c00836256bae9c14b124ed
Diffstat (limited to 'java/src/com/android/inputmethod/research/MainLogBuffer.java')
-rw-r--r-- | java/src/com/android/inputmethod/research/MainLogBuffer.java | 44 |
1 files changed, 23 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java index cd4c1db6e..42ef5d3b6 100644 --- a/java/src/com/android/inputmethod/research/MainLogBuffer.java +++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java @@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer { final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); - final String word = logUnit.getWord(); - if (word != null) { - numWordsInLogUnitList++; - } + numWordsInLogUnitList += logUnit.getNumWords(); } return numWordsInLogUnitList >= minNGramSize; } @@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer { // the complete buffer contents in detail. int numWordsInLogUnitList = 0; final int length = logUnits.size(); - for (int i = 0; i < length; i++) { - final LogUnit logUnit = logUnits.get(i); - if (!logUnit.hasWord()) { + for (final LogUnit logUnit : logUnits) { + if (!logUnit.hasOneOrMoreWords()) { // Digits outside words are a privacy threat. if (logUnit.mayContainDigit()) { return false; } } else { - numWordsInLogUnitList++; - final String word = logUnit.getWord(); - // Words not in the dictionary are a privacy threat. - if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { - if (DEBUG) { - Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word) - + ", isValid: " + (dictionary.isValidWord(word))); + numWordsInLogUnitList += logUnit.getNumWords(); + final String[] words = logUnit.getWordsAsStringArray(); + for (final String word : words) { + // Words not in the dictionary are a privacy threat. + if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { + if (DEBUG) { + Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: " + + ResearchLogger.hasLetters(word) + + ", isValid: " + (dictionary.isValidWord(word))); + } + return false; } - return false; } } } - // Finally, only return true if the minNGramSize is met. - return numWordsInLogUnitList >= minNGramSize; + // Finally, only return true if the ngram is the right size. + return numWordsInLogUnitList == minNGramSize; } public void shiftAndPublishAll() { @@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer { shiftOutWords(N_GRAM_SIZE); mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; } else { - // No good n-gram at front, and buffer is full. Shift out the first word (or if there - // is none, the existing logUnits). - logUnits = peekAtFirstNWords(1); + // No good n-gram at front, and buffer is full. Shift out up through the first logUnit + // with associated words (or if there is none, all the existing logUnits). + logUnits.clear(); + for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords(); + logUnit = shiftOut()) { + logUnits.add(logUnit); + } publish(logUnits, false /* canIncludePrivateData */); - shiftOutWords(1); } } |