Allow LogUnits to hold >1 word

LogUnits have been annotated with the autocorrected words, but until now this was assumed to be a single word without spaces. But spaceless typing can result in spaces in the LogUnit label. With this change, the LogUnit inspects the autocorrected text to determine how many words were inserted, and counts them accurately. This change corrects a privacy problem, which was that if the word sampling algorithm chose a LogUnit that actually contained multiple words, then more than two successive words would be included in the log. Change-Id: I7c01c3dd3ac33d7e96c00836256bae9c14b124ed
author: Kurt Partridge <kep@google.com> 2013-04-15 18:41:59 -0700
committer: Kurt Partridge <kep@google.com> 2013-04-19 08:55:39 -0700
commit: e92b5e145f74808ff778a42dc5ba979aa27343ca (patch)
tree: 629dd787ce4ffd4bbd763edad6d5252b9ca1ab17 /java/src/com/android/inputmethod/research/FixedLogBuffer.java
parent: 56f35a10cde1beeea51d99427992d832fa2de2bb (diff)
download: latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.gz
latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.xz
latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.zip
1 files changed, 28 insertions, 23 deletions
diff --git a/java/src/com/android/inputmethod/research/FixedLogBuffer.java b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
index 641bf7eae..4249af544 100644
--- a/java/src/com/android/inputmethod/research/FixedLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
@@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer {
      */
     @Override
     public void shiftIn(final LogUnit newLogUnit) {
-        if (!newLogUnit.hasWord()) {
-            // This LogUnit isn't a word, so it doesn't count toward the word-limit.
+        if (!newLogUnit.hasOneOrMoreWords()) {
+            // This LogUnit doesn't contain any word, so it doesn't count toward the word-limit.
             super.shiftIn(newLogUnit);
             return;
         }
+        final int numWordsIncoming = newLogUnit.getNumWords();
         if (mNumActualWords >= mWordCapacity) {
             // Give subclass a chance to handle the buffer full condition by shifting out logUnits.
             onBufferFull();
             // If still full, evict.
             if (mNumActualWords >= mWordCapacity) {
-                shiftOutWords(1);
+                shiftOutWords(numWordsIncoming);
             }
         }
         super.shiftIn(newLogUnit);
-        mNumActualWords++; // Must be a word, or we wouldn't be here.
+        mNumActualWords += numWordsIncoming;
     }
 
     @Override
     public LogUnit unshiftIn() {
         final LogUnit logUnit = super.unshiftIn();
-        if (logUnit != null && logUnit.hasWord()) {
-            mNumActualWords--;
+        if (logUnit != null && logUnit.hasOneOrMoreWords()) {
+            mNumActualWords -= logUnit.getNumWords();
         }
         return logUnit;
     }
@@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer {
     @Override
     public LogUnit shiftOut() {
         final LogUnit logUnit = super.shiftOut();
-        if (logUnit != null && logUnit.hasWord()) {
-            mNumActualWords--;
+        if (logUnit != null && logUnit.hasOneOrMoreWords()) {
+            mNumActualWords -= logUnit.getNumWords();
         }
         return logUnit;
     }
@@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer {
      * If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out
      * all {@code LogUnit}s in the buffer.
      *
-     * @param numWords the number of word-containing {@link LogUnit}s to shift out
+     * @param numWords the minimum number of word-containing {@link LogUnit}s to shift out
      * @return the number of actual {@code LogUnit}s shifted out
      */
     protected int shiftOutWords(final int numWords) {
         int numWordContainingLogUnitsShiftedOut = 0;
         for (LogUnit logUnit = shiftOut(); logUnit != null
                 && numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) {
-            if (logUnit.hasWord()) {
-                numWordContainingLogUnitsShiftedOut++;
+            if (logUnit.hasOneOrMoreWords()) {
+                numWordContainingLogUnitsShiftedOut += logUnit.getNumWords();
             }
         }
         return numWordContainingLogUnitsShiftedOut;
@@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer {
     }
 
     /**
-     * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words.  No
-     * more than {@code n} LogUnits will have words associated with them.  If there are not enough
-     * LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
+     * Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with
+     * them.
+     *
+     * There will be no more than {@code n} words in the returned list.  So if 2 words are
+     * requested, and the first LogUnit has 3 words, it is not returned.  If 2 words are requested,
+     * and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit
+     * is returned.  If the first LogUnit has no words associated with it, and the second LogUnit
+     * has three words, then only the first LogUnit (which has no associated words) is returned.  If
+     * there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits
+     * will be returned.
      *
      * @param n The maximum number of {@link LogUnit}s with words to return.
      * @return The list of the {@link LogUnit}s containing the first n words
      */
     public ArrayList<LogUnit> peekAtFirstNWords(int n) {
         final LinkedList<LogUnit> logUnits = getLogUnits();
-        final int length = logUnits.size();
         // Allocate space for n*2 logUnits.  There will be at least n, one for each word, and
         // there may be additional for punctuation, between-word commands, etc.  This should be
         // enough that reallocation won't be necessary.
-        final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
-        for (int i = 0; i < length && n > 0; i++) {
-            final LogUnit logUnit = logUnits.get(i);
-            list.add(logUnit);
-            if (logUnit.hasWord()) {
-                n--;
-            }
+        final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2);
+        for (final LogUnit logUnit : logUnits) {
+            n -= logUnit.getNumWords();
+            if (n < 0) break;
+            resultList.add(logUnit);
         }
-        return list;
+        return resultList;
     }
 }
author	Kurt Partridge <kep@google.com>	2013-04-15 18:41:59 -0700
committer	Kurt Partridge <kep@google.com>	2013-04-19 08:55:39 -0700
commit	e92b5e145f74808ff778a42dc5ba979aa27343ca (patch)
tree	629dd787ce4ffd4bbd763edad6d5252b9ca1ab17 /java/src/com/android/inputmethod/research/FixedLogBuffer.java
parent	56f35a10cde1beeea51d99427992d832fa2de2bb (diff)
download	latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.gz latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.tar.xz latinime-e92b5e145f74808ff778a42dc5ba979aa27343ca.zip