aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
authorKurt Partridge <kep@google.com>2013-01-10 10:51:17 -0800
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-01-10 10:51:17 -0800
commit700ce8df07eb242ce93f4f5e3e0ceb78473938ab (patch)
treefb9f105a96578dd6d2118b0dca810fd91b87e82f /java/src
parent4da2ed7a78c63284fa3869450a492ee7ae420ed9 (diff)
parent403c423940b197e56f4d203050341b7cd90ca0cd (diff)
downloadlatinime-700ce8df07eb242ce93f4f5e3e0ceb78473938ab.tar.gz
latinime-700ce8df07eb242ce93f4f5e3e0ceb78473938ab.tar.xz
latinime-700ce8df07eb242ce93f4f5e3e0ceb78473938ab.zip
Merge "[Rlog56] Buffer words before pushing out LogUnit"
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/research/FixedLogBuffer.java2
-rw-r--r--java/src/com/android/inputmethod/research/MainLogBuffer.java111
-rw-r--r--java/src/com/android/inputmethod/research/ResearchLogger.java38
3 files changed, 96 insertions, 55 deletions
diff --git a/java/src/com/android/inputmethod/research/FixedLogBuffer.java b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
index 9613c2db2..777111947 100644
--- a/java/src/com/android/inputmethod/research/FixedLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
@@ -81,7 +81,7 @@ public class FixedLogBuffer extends LogBuffer {
return logUnit;
}
- private void shiftOutThroughFirstWord() {
+ public void shiftOutThroughFirstWord() {
final LinkedList<LogUnit> logUnits = getLogUnits();
while (!logUnits.isEmpty()) {
final LogUnit logUnit = logUnits.removeFirst();
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
index 898a042d6..a8f255a41 100644
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -26,18 +26,42 @@ import java.util.LinkedList;
import java.util.Random;
/**
- * Provide a log buffer of fixed length that enforces privacy restrictions.
+ * MainLogBuffer is a FixedLogBuffer that tracks the state of LogUnits to make privacy guarantees.
*
- * The privacy restrictions include making sure that no numbers are logged, that all logged words
- * are in the dictionary, and that words are recorded infrequently enough that the user's meaning
- * cannot be easily determined.
+ * There are three forms of privacy protection: 1) only words in the main dictionary are allowed to
+ * be logged in enough detail to determine their contents, 2) only a subset of words are logged
+ * in detail, such as 10%, and 3) no numbers are logged.
+ *
+ * This class maintains a list of LogUnits, each corresponding to a word. As the user completes
+ * words, they are added here. But if the user backs up over their current word to edit a word
+ * entered earlier, then it is pulled out of this LogBuffer, changes are then added to the end of
+ * the LogUnit, and it is pushed back in here when the user is done. Because words may be pulled
+ * back out even after they are pushed in, we must not publish the contents of this LogBuffer too
+ * quickly. However, we cannot let the contents pile up either, or it will limit the editing that
+ * a user can perform.
+ *
+ * To balance these requirements (keep history so user can edit, flush history so it does not pile
+ * up), the LogBuffer is considered "complete" when the user has entered enough words to form an
+ * n-gram, followed by enough additional non-detailed words (that are in the 90%, as per above).
+ * Once complete, the n-gram may be published to flash storage (via the ResearchLog class).
+ * However, the additional non-detailed words are retained, in case the user backspaces to edit
+ * them. The MainLogBuffer then continues to add words, publishing individual non-detailed words
+ * as new words arrive. After enough non-detailed words have been pushed out to account for the
+ * 90% between words, the words at the front of the LogBuffer can be published as an n-gram again.
+ *
+ * If the words that would form the valid n-gram are not in the dictionary, then words are pushed
+ * through the LogBuffer one at a time until an n-gram is found that is entirely composed of
+ * dictionary words.
+ *
+ * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
+ * n-gram containing dictionary words.
*/
public class MainLogBuffer extends FixedLogBuffer {
private static final String TAG = MainLogBuffer.class.getSimpleName();
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
- private static final int N_GRAM_SIZE = 2;
+ public static final int N_GRAM_SIZE = 2;
// The number of words between n-grams to omit from the log. If debugging, record 50% of all
// words. Otherwise, only record 10%.
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
@@ -46,49 +70,31 @@ public class MainLogBuffer extends FixedLogBuffer {
private final ResearchLog mResearchLog;
private Suggest mSuggest;
- // The minimum periodicity with which n-grams can be sampled. E.g. mWinWordPeriod is 10 if
- // every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc.
- // for 11-18, and the bigram at words 19 and 20. If an n-gram is not safe (e.g. it contains a
- // number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe
- // n-gram does appear.
- /* package for test */ int mMinWordPeriod;
+ /* package for test */ int mNumWordsBetweenNGrams;
// Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod
// after a sample is taken.
- /* package for test */ int mWordsUntilSafeToSample;
+ /* package for test */ int mNumWordsUntilSafeToSample;
public MainLogBuffer(final ResearchLog researchLog) {
- super(N_GRAM_SIZE);
+ super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
mResearchLog = researchLog;
- mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE;
+ mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
final Random random = new Random();
- mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod);
+ mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
}
public void setSuggest(final Suggest suggest) {
mSuggest = suggest;
}
- @Override
- public void shiftIn(final LogUnit newLogUnit) {
- super.shiftIn(newLogUnit);
- if (newLogUnit.hasWord()) {
- if (mWordsUntilSafeToSample > 0) {
- mWordsUntilSafeToSample--;
- }
- }
- if (DEBUG) {
- Log.d(TAG, "shiftedIn " + (newLogUnit.hasWord() ? newLogUnit.getWord() : ""));
- }
- }
-
public void resetWordCounter() {
- mWordsUntilSafeToSample = mMinWordPeriod;
+ mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
}
/**
- * Determines whether the content of the MainLogBuffer can be safely uploaded in its complete
- * form and still protect the user's privacy.
+ * Determines whether uploading the n words at the front the MainLogBuffer will not violate
+ * user privacy.
*
* The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any
* non-character data that is typed between words. The decision about privacy is made based on
@@ -97,10 +103,10 @@ public class MainLogBuffer extends FixedLogBuffer {
* the screen orientation and other characteristics about the device can be uploaded without
* revealing much about the user.
*/
- public boolean isSafeToLog() {
+ public boolean isNGramSafe() {
// Check that we are not sampling too frequently. Having sampled recently might disclose
// too much of the user's intended meaning.
- if (mWordsUntilSafeToSample > 0) {
+ if (mNumWordsUntilSafeToSample > 0) {
return false;
}
if (mSuggest == null || !mSuggest.hasMainDictionary()) {
@@ -119,8 +125,8 @@ public class MainLogBuffer extends FixedLogBuffer {
// complete buffer contents in detail.
final LinkedList<LogUnit> logUnits = getLogUnits();
final int length = logUnits.size();
- int wordsFound = 0;
- for (int i = 0; i < length; i++) {
+ int wordsNeeded = N_GRAM_SIZE;
+ for (int i = 0; i < length && wordsNeeded > 0; i++) {
final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord();
if (word == null) {
@@ -136,26 +142,41 @@ public class MainLogBuffer extends FixedLogBuffer {
+ ", isValid: " + (dictionary.isValidWord(word)));
}
return false;
- } else {
- wordsFound++;
}
}
}
- if (wordsFound < N_GRAM_SIZE) {
- // Not enough words. Not unsafe, but reject anyway.
- if (DEBUG) {
- Log.d(TAG, "not enough words");
- }
- return false;
- }
// All checks have passed; this buffer's content can be safely uploaded.
return true;
}
+ public boolean isNGramComplete() {
+ final LinkedList<LogUnit> logUnits = getLogUnits();
+ final int length = logUnits.size();
+ int wordsNeeded = N_GRAM_SIZE;
+ for (int i = 0; i < length && wordsNeeded > 0; i++) {
+ final LogUnit logUnit = logUnits.get(i);
+ final String word = logUnit.getWord();
+ if (word != null) {
+ wordsNeeded--;
+ }
+ }
+ return wordsNeeded == 0;
+ }
+
@Override
protected void onShiftOut(final LogUnit logUnit) {
if (mResearchLog != null) {
- mResearchLog.publish(logUnit, false /* isIncludingPrivateData */);
+ mResearchLog.publish(logUnit,
+ ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
+ }
+ if (logUnit.hasWord()) {
+ if (mNumWordsUntilSafeToSample > 0) {
+ mNumWordsUntilSafeToSample--;
+ Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
+ }
+ }
+ if (DEBUG) {
+ Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : ""));
}
}
}
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index b61db272c..f464facf4 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -85,7 +85,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
private static final String TAG = ResearchLogger.class.getSimpleName();
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// Whether all n-grams should be logged. true will disclose private info.
- private static final boolean IS_LOGGING_EVERYTHING = false
+ public static final boolean IS_LOGGING_EVERYTHING = false
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// Whether the TextView contents are logged at the end of the session. true will disclose
// private info.
@@ -394,8 +394,16 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
commitCurrentLogUnit();
if (mMainLogBuffer != null) {
- publishLogBuffer(mMainLogBuffer, mMainResearchLog,
- IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
+ while (!mMainLogBuffer.isEmpty()) {
+ if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
+ mMainResearchLog != null) {
+ publishLogBuffer(mMainLogBuffer, mMainResearchLog,
+ true /* isIncludingPrivateData */);
+ mMainLogBuffer.resetWordCounter();
+ } else {
+ mMainLogBuffer.shiftOutThroughFirstWord();
+ }
+ }
mMainResearchLog.close(null /* callback */);
mMainLogBuffer = null;
}
@@ -702,8 +710,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
}
if (!mCurrentLogUnit.isEmpty()) {
if (mMainLogBuffer != null) {
- if ((mMainLogBuffer.isSafeToLog() || IS_LOGGING_EVERYTHING)
- && mMainResearchLog != null) {
+ if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
+ mMainLogBuffer.isNGramComplete() &&
+ mMainResearchLog != null) {
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
true /* isIncludingPrivateData */);
mMainLogBuffer.resetWordCounter();
@@ -714,6 +723,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
mFeedbackLogBuffer.shiftIn(mCurrentLogUnit);
}
mCurrentLogUnit = new LogUnit();
+ } else {
+ if (DEBUG) {
+ Log.d(TAG, "Warning: tried to commit empty log unit.");
+ }
}
}
@@ -756,8 +769,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
mFeedbackLogBuffer.unshiftIn();
}
if (DEBUG) {
- Log.d(TAG, "uncommitCurrentLogUnit back to " + (mCurrentLogUnit.hasWord()
- ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
+ Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
+ + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
}
}
@@ -773,12 +786,16 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
isIncludingPrivateData);
researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
LogUnit logUnit;
- while ((logUnit = logBuffer.shiftOut()) != null) {
+ int numWordsToPublish = MainLogBuffer.N_GRAM_SIZE;
+ while ((logUnit = logBuffer.shiftOut()) != null && numWordsToPublish > 0) {
if (DEBUG) {
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
: "<wordless>"));
}
researchLog.publish(logUnit, isIncludingPrivateData);
+ if (logUnit.getWord() != null) {
+ numWordsToPublish--;
+ }
}
final LogUnit closingLogUnit = new LogUnit();
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
@@ -1254,9 +1271,12 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
public static void latinIME_revertCommit(final String committedWord,
final String originallyTypedWord, final boolean isBatchMode) {
final ResearchLogger researchLogger = getInstance();
- final LogUnit logUnit = researchLogger.mMainLogBuffer.peekLastLogUnit();
+ // Assume that mCurrentLogUnit has been restored to contain the reverted word.
+ final LogUnit logUnit = researchLogger.mCurrentLogUnit;
if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
if (logUnit != null) {
+ // Probably not necessary, but setting as a precaution in case the word isn't
+ // committed later.
logUnit.setWord(originallyTypedWord);
}
}