Allow LogUnits to hold >1 word

LogUnits have been annotated with the autocorrected words, but until now this was assumed to be a single word without spaces. But spaceless typing can result in spaces in the LogUnit label. With this change, the LogUnit inspects the autocorrected text to determine how many words were inserted, and counts them accurately. This change corrects a privacy problem, which was that if the word sampling algorithm chose a LogUnit that actually contained multiple words, then more than two successive words would be included in the log. Change-Id: I7c01c3dd3ac33d7e96c00836256bae9c14b124ed
author: Kurt Partridge <kep@google.com> 2013-04-15 18:41:59 -0700
committer: Kurt Partridge <kep@google.com> 2013-04-19 08:55:39 -0700
commit: e92b5e145f74808ff778a42dc5ba979aa27343ca (patch)
tree: 629dd787ce4ffd4bbd763edad6d5252b9ca1ab17 /java/src/com/android/inputmethod/research/MainLogBuffer.java
parent: 56f35a10cde1beeea51d99427992d832fa2de2bb (diff)
download: latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.gz
latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.xz
latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.zip
1 files changed, 23 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
index cd4c1db6e..42ef5d3b6 100644
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
             final int length = logUnits.size();
             for (int i = 0; i < length; i++) {
                 final LogUnit logUnit = logUnits.get(i);
-                final String word = logUnit.getWord();
-                if (word != null) {
-                    numWordsInLogUnitList++;
-                }
+                numWordsInLogUnitList += logUnit.getNumWords();
             }
             return numWordsInLogUnitList >= minNGramSize;
         }
@@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
         // the complete buffer contents in detail.
         int numWordsInLogUnitList = 0;
         final int length = logUnits.size();
-        for (int i = 0; i < length; i++) {
-            final LogUnit logUnit = logUnits.get(i);
-            if (!logUnit.hasWord()) {
+        for (final LogUnit logUnit : logUnits) {
+            if (!logUnit.hasOneOrMoreWords()) {
                 // Digits outside words are a privacy threat.
                 if (logUnit.mayContainDigit()) {
                     return false;
                 }
             } else {
-                numWordsInLogUnitList++;
-                final String word = logUnit.getWord();
-                // Words not in the dictionary are a privacy threat.
-                if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
-                    if (DEBUG) {
-                        Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word)
-                                + ", isValid: " + (dictionary.isValidWord(word)));
+                numWordsInLogUnitList += logUnit.getNumWords();
+                final String[] words = logUnit.getWordsAsStringArray();
+                for (final String word : words) {
+                    // Words not in the dictionary are a privacy threat.
+                    if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
+                        if (DEBUG) {
+                            Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
+                                    + ResearchLogger.hasLetters(word)
+                                    + ", isValid: " + (dictionary.isValidWord(word)));
+                        }
+                        return false;
                     }
-                    return false;
                 }
             }
         }
 
-        // Finally, only return true if the minNGramSize is met.
-        return numWordsInLogUnitList >= minNGramSize;
+        // Finally, only return true if the ngram is the right size.
+        return numWordsInLogUnitList == minNGramSize;
     }
 
     public void shiftAndPublishAll() {
@@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
             shiftOutWords(N_GRAM_SIZE);
             mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
         } else {
-            // No good n-gram at front, and buffer is full.  Shift out the first word (or if there
-            // is none, the existing logUnits).
-            logUnits = peekAtFirstNWords(1);
+            // No good n-gram at front, and buffer is full.  Shift out up through the first logUnit
+            // with associated words (or if there is none, all the existing logUnits).
+            logUnits.clear();
+            for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords();
+                    logUnit = shiftOut()) {
+                logUnits.add(logUnit);
+            }
             publish(logUnits, false /* canIncludePrivateData */);
-            shiftOutWords(1);
         }
     }
author	Kurt Partridge <kep@google.com>	2013-04-15 18:41:59 -0700
committer	Kurt Partridge <kep@google.com>	2013-04-19 08:55:39 -0700
commit	e92b5e145f74808ff778a42dc5ba979aa27343ca (patch)
tree	629dd787ce4ffd4bbd763edad6d5252b9ca1ab17 /java/src/com/android/inputmethod/research/MainLogBuffer.java
parent	56f35a10cde1beeea51d99427992d832fa2de2bb (diff)
download	latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.gz latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.xz latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.zip