diff options
author | 2013-01-10 10:51:17 -0800 | |
---|---|---|
committer | 2013-01-10 10:51:17 -0800 | |
commit | 700ce8df07eb242ce93f4f5e3e0ceb78473938ab (patch) | |
tree | fb9f105a96578dd6d2118b0dca810fd91b87e82f /java/src/com/android/inputmethod/research/MainLogBuffer.java | |
parent | 4da2ed7a78c63284fa3869450a492ee7ae420ed9 (diff) | |
parent | 403c423940b197e56f4d203050341b7cd90ca0cd (diff) | |
download | latinime-700ce8df07eb242ce93f4f5e3e0ceb78473938ab.tar.gz latinime-700ce8df07eb242ce93f4f5e3e0ceb78473938ab.tar.xz latinime-700ce8df07eb242ce93f4f5e3e0ceb78473938ab.zip |
Merge "[Rlog56] Buffer words before pushing out LogUnit"
Diffstat (limited to 'java/src/com/android/inputmethod/research/MainLogBuffer.java')
-rw-r--r-- | java/src/com/android/inputmethod/research/MainLogBuffer.java | 111 |
1 files changed, 66 insertions, 45 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java index 898a042d6..a8f255a41 100644 --- a/java/src/com/android/inputmethod/research/MainLogBuffer.java +++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java @@ -26,18 +26,42 @@ import java.util.LinkedList; import java.util.Random; /** - * Provide a log buffer of fixed length that enforces privacy restrictions. + * MainLogBuffer is a FixedLogBuffer that tracks the state of LogUnits to make privacy guarantees. * - * The privacy restrictions include making sure that no numbers are logged, that all logged words - * are in the dictionary, and that words are recorded infrequently enough that the user's meaning - * cannot be easily determined. + * There are three forms of privacy protection: 1) only words in the main dictionary are allowed to + * be logged in enough detail to determine their contents, 2) only a subset of words are logged + * in detail, such as 10%, and 3) no numbers are logged. + * + * This class maintains a list of LogUnits, each corresponding to a word. As the user completes + * words, they are added here. But if the user backs up over their current word to edit a word + * entered earlier, then it is pulled out of this LogBuffer, changes are then added to the end of + * the LogUnit, and it is pushed back in here when the user is done. Because words may be pulled + * back out even after they are pushed in, we must not publish the contents of this LogBuffer too + * quickly. However, we cannot let the contents pile up either, or it will limit the editing that + * a user can perform. + * + * To balance these requirements (keep history so user can edit, flush history so it does not pile + * up), the LogBuffer is considered "complete" when the user has entered enough words to form an + * n-gram, followed by enough additional non-detailed words (that are in the 90%, as per above). + * Once complete, the n-gram may be published to flash storage (via the ResearchLog class). + * However, the additional non-detailed words are retained, in case the user backspaces to edit + * them. The MainLogBuffer then continues to add words, publishing individual non-detailed words + * as new words arrive. After enough non-detailed words have been pushed out to account for the + * 90% between words, the words at the front of the LogBuffer can be published as an n-gram again. + * + * If the words that would form the valid n-gram are not in the dictionary, then words are pushed + * through the LogBuffer one at a time until an n-gram is found that is entirely composed of + * dictionary words. + * + * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded + * n-gram containing dictionary words. */ public class MainLogBuffer extends FixedLogBuffer { private static final String TAG = MainLogBuffer.class.getSimpleName(); private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG; // The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams. - private static final int N_GRAM_SIZE = 2; + public static final int N_GRAM_SIZE = 2; // The number of words between n-grams to omit from the log. If debugging, record 50% of all // words. Otherwise, only record 10%. private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES = @@ -46,49 +70,31 @@ public class MainLogBuffer extends FixedLogBuffer { private final ResearchLog mResearchLog; private Suggest mSuggest; - // The minimum periodicity with which n-grams can be sampled. E.g. mWinWordPeriod is 10 if - // every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc. - // for 11-18, and the bigram at words 19 and 20. If an n-gram is not safe (e.g. it contains a - // number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe - // n-gram does appear. - /* package for test */ int mMinWordPeriod; + /* package for test */ int mNumWordsBetweenNGrams; // Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod // after a sample is taken. - /* package for test */ int mWordsUntilSafeToSample; + /* package for test */ int mNumWordsUntilSafeToSample; public MainLogBuffer(final ResearchLog researchLog) { - super(N_GRAM_SIZE); + super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES); mResearchLog = researchLog; - mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE; + mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES; final Random random = new Random(); - mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod); + mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1); } public void setSuggest(final Suggest suggest) { mSuggest = suggest; } - @Override - public void shiftIn(final LogUnit newLogUnit) { - super.shiftIn(newLogUnit); - if (newLogUnit.hasWord()) { - if (mWordsUntilSafeToSample > 0) { - mWordsUntilSafeToSample--; - } - } - if (DEBUG) { - Log.d(TAG, "shiftedIn " + (newLogUnit.hasWord() ? newLogUnit.getWord() : "")); - } - } - public void resetWordCounter() { - mWordsUntilSafeToSample = mMinWordPeriod; + mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; } /** - * Determines whether the content of the MainLogBuffer can be safely uploaded in its complete - * form and still protect the user's privacy. + * Determines whether uploading the n words at the front the MainLogBuffer will not violate + * user privacy. * * The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any * non-character data that is typed between words. The decision about privacy is made based on @@ -97,10 +103,10 @@ public class MainLogBuffer extends FixedLogBuffer { * the screen orientation and other characteristics about the device can be uploaded without * revealing much about the user. */ - public boolean isSafeToLog() { + public boolean isNGramSafe() { // Check that we are not sampling too frequently. Having sampled recently might disclose // too much of the user's intended meaning. - if (mWordsUntilSafeToSample > 0) { + if (mNumWordsUntilSafeToSample > 0) { return false; } if (mSuggest == null || !mSuggest.hasMainDictionary()) { @@ -119,8 +125,8 @@ public class MainLogBuffer extends FixedLogBuffer { // complete buffer contents in detail. final LinkedList<LogUnit> logUnits = getLogUnits(); final int length = logUnits.size(); - int wordsFound = 0; - for (int i = 0; i < length; i++) { + int wordsNeeded = N_GRAM_SIZE; + for (int i = 0; i < length && wordsNeeded > 0; i++) { final LogUnit logUnit = logUnits.get(i); final String word = logUnit.getWord(); if (word == null) { @@ -136,26 +142,41 @@ public class MainLogBuffer extends FixedLogBuffer { + ", isValid: " + (dictionary.isValidWord(word))); } return false; - } else { - wordsFound++; } } } - if (wordsFound < N_GRAM_SIZE) { - // Not enough words. Not unsafe, but reject anyway. - if (DEBUG) { - Log.d(TAG, "not enough words"); - } - return false; - } // All checks have passed; this buffer's content can be safely uploaded. return true; } + public boolean isNGramComplete() { + final LinkedList<LogUnit> logUnits = getLogUnits(); + final int length = logUnits.size(); + int wordsNeeded = N_GRAM_SIZE; + for (int i = 0; i < length && wordsNeeded > 0; i++) { + final LogUnit logUnit = logUnits.get(i); + final String word = logUnit.getWord(); + if (word != null) { + wordsNeeded--; + } + } + return wordsNeeded == 0; + } + @Override protected void onShiftOut(final LogUnit logUnit) { if (mResearchLog != null) { - mResearchLog.publish(logUnit, false /* isIncludingPrivateData */); + mResearchLog.publish(logUnit, + ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */); + } + if (logUnit.hasWord()) { + if (mNumWordsUntilSafeToSample > 0) { + mNumWordsUntilSafeToSample--; + Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample); + } + } + if (DEBUG) { + Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : "")); } } } |