diff options
Diffstat (limited to 'java/src/com/android/inputmethod/research/MainLogBuffer.java')
-rw-r--r-- | java/src/com/android/inputmethod/research/MainLogBuffer.java | 89 |
1 files changed, 51 insertions, 38 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java index 3303d2bdb..9aa349906 100644 --- a/java/src/com/android/inputmethod/research/MainLogBuffer.java +++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java @@ -23,9 +23,9 @@ import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.Suggest; import com.android.inputmethod.latin.define.ProductionFlag; +import java.io.IOException; import java.util.ArrayList; import java.util.LinkedList; -import java.util.Random; /** * MainLogBuffer is a FixedLogBuffer that tracks the state of LogUnits to make privacy guarantees. @@ -100,10 +100,6 @@ public abstract class MainLogBuffer extends FixedLogBuffer { return mSuggest.getMainDictionary(); } - public void resetWordCounter() { - mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; - } - public void setIsStopping() { mIsStopping = true; } @@ -131,10 +127,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer { final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); - final String word = logUnit.getWord(); - if (word != null) { - numWordsInLogUnitList++; - } + numWordsInLogUnitList += logUnit.getNumWords(); } return numWordsInLogUnitList >= minNGramSize; } @@ -158,32 +151,34 @@ public abstract class MainLogBuffer extends FixedLogBuffer { // the complete buffer contents in detail. int numWordsInLogUnitList = 0; final int length = logUnits.size(); - for (int i = 0; i < length; i++) { - final LogUnit logUnit = logUnits.get(i); - if (!logUnit.hasWord()) { + for (final LogUnit logUnit : logUnits) { + if (!logUnit.hasOneOrMoreWords()) { // Digits outside words are a privacy threat. if (logUnit.mayContainDigit()) { return false; } } else { - numWordsInLogUnitList++; - final String word = logUnit.getWord(); - // Words not in the dictionary are a privacy threat. - if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { - if (DEBUG) { - Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word) - + ", isValid: " + (dictionary.isValidWord(word))); + numWordsInLogUnitList += logUnit.getNumWords(); + final String[] words = logUnit.getWordsAsStringArray(); + for (final String word : words) { + // Words not in the dictionary are a privacy threat. + if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { + if (DEBUG) { + Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: " + + ResearchLogger.hasLetters(word) + + ", isValid: " + (dictionary.isValidWord(word))); + } + return false; } - return false; } } } - // Finally, only return true if the minNGramSize is met. - return numWordsInLogUnitList >= minNGramSize; + // Finally, only return true if the ngram is the right size. + return numWordsInLogUnitList == minNGramSize; } - public void shiftAndPublishAll() { + public void shiftAndPublishAll() throws IOException { final LinkedList<LogUnit> logUnits = getLogUnits(); while (!logUnits.isEmpty()) { publishLogUnitsAtFrontOfBuffer(); @@ -192,23 +187,40 @@ public abstract class MainLogBuffer extends FixedLogBuffer { @Override protected final void onBufferFull() { - publishLogUnitsAtFrontOfBuffer(); + try { + publishLogUnitsAtFrontOfBuffer(); + } catch (final IOException e) { + if (DEBUG) { + Log.w(TAG, "IOException when publishing front of LogBuffer", e); + } + } } - protected final void publishLogUnitsAtFrontOfBuffer() { + protected final void publishLogUnitsAtFrontOfBuffer() throws IOException { + // TODO: Refactor this method to require fewer passes through the LogUnits. Should really + // require only one pass. ArrayList<LogUnit> logUnits = peekAtFirstNWords(N_GRAM_SIZE); if (isSafeNGram(logUnits, N_GRAM_SIZE)) { // Good n-gram at the front of the buffer. Publish it, disclosing details. publish(logUnits, true /* canIncludePrivateData */); shiftOutWords(N_GRAM_SIZE); - resetWordCounter(); - } else { - // No good n-gram at front, and buffer is full. Shift out the first word (or if there - // is none, the existing logUnits). - logUnits = peekAtFirstNWords(1); - publish(logUnits, false /* canIncludePrivateData */); - shiftOutWords(1); + mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; + return; } + // No good n-gram at front, and buffer is full. Shift out up through the first logUnit + // with associated words (or if there is none, all the existing logUnits). + logUnits.clear(); + LogUnit logUnit = shiftOut(); + while (logUnit != null) { + logUnits.add(logUnit); + final int numWords = logUnit.getNumWords(); + if (numWords > 0) { + mNumWordsUntilSafeToSample = Math.max(0, mNumWordsUntilSafeToSample - numWords); + break; + } + logUnit = shiftOut(); + } + publish(logUnits, false /* canIncludePrivateData */); } /** @@ -219,18 +231,19 @@ public abstract class MainLogBuffer extends FixedLogBuffer { * @param logUnits The list of logUnits to be published. * @param canIncludePrivateData Whether the private data in the logUnits can be included in * publication. + * + * @throws IOException if publication to the log file is not possible */ protected abstract void publish(final ArrayList<LogUnit> logUnits, - final boolean canIncludePrivateData); + final boolean canIncludePrivateData) throws IOException; @Override - protected void shiftOutWords(final int numWords) { - final int oldNumActualWords = getNumActualWords(); - super.shiftOutWords(numWords); - final int numWordsShifted = oldNumActualWords - getNumActualWords(); - mNumWordsUntilSafeToSample -= numWordsShifted; + protected int shiftOutWords(final int numWords) { + final int numWordsShiftedOut = super.shiftOutWords(numWords); + mNumWordsUntilSafeToSample = Math.max(0, mNumWordsUntilSafeToSample - numWordsShiftedOut); if (DEBUG) { Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample); } + return numWordsShiftedOut; } } |