aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/research/MainLogBuffer.java
diff options
context:
space:
mode:
authorTadashi G. Takaoka <takaoka@google.com>2014-05-28 23:02:16 +0900
committerTadashi G. Takaoka <takaoka@google.com>2014-05-29 15:10:28 +0900
commitbbf556e8e1f2b522c555e05ec06c4efefed16e02 (patch)
tree8e164529344673276bc7fb7066e4782f3dcb9b0b /java/src/com/android/inputmethod/research/MainLogBuffer.java
parent7a4f9b67c0f4dbdbfd73af7678e720d520d869f1 (diff)
downloadlatinime-bbf556e8e1f2b522c555e05ec06c4efefed16e02.tar.gz
latinime-bbf556e8e1f2b522c555e05ec06c4efefed16e02.tar.xz
latinime-bbf556e8e1f2b522c555e05ec06c4efefed16e02.zip
Remove researcher logger
This CL must be checked in together with I5cc76807e3. Bug: 15318007 Change-Id: I61423c3377ddc299fb332e742d6626c2e47145bb
Diffstat (limited to 'java/src/com/android/inputmethod/research/MainLogBuffer.java')
-rw-r--r--java/src/com/android/inputmethod/research/MainLogBuffer.java287
1 files changed, 0 insertions, 287 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
deleted file mode 100644
index 3806ac755..000000000
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.research;
-
-import android.util.Log;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.Dictionary;
-import com.android.inputmethod.latin.DictionaryFacilitator;
-import com.android.inputmethod.latin.define.ProductionFlag;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedList;
-
-/**
- * MainLogBuffer is a FixedLogBuffer that tracks the state of LogUnits to make privacy guarantees.
- *
- * There are three forms of privacy protection: 1) only words in the main dictionary are allowed to
- * be logged in enough detail to determine their contents, 2) only a subset of words are logged
- * in detail, such as 10%, and 3) no numbers are logged.
- *
- * This class maintains a list of LogUnits, each corresponding to a word. As the user completes
- * words, they are added here. But if the user backs up over their current word to edit a word
- * entered earlier, then it is pulled out of this LogBuffer, changes are then added to the end of
- * the LogUnit, and it is pushed back in here when the user is done. Because words may be pulled
- * back out even after they are pushed in, we must not publish the contents of this LogBuffer too
- * quickly. However, we cannot let the contents pile up either, or it will limit the editing that
- * a user can perform.
- *
- * To balance these requirements (keep history so user can edit, flush history so it does not pile
- * up), the LogBuffer is considered "complete" when the user has entered enough words to form an
- * n-gram, followed by enough additional non-detailed words (that are in the 90%, as per above).
- * Once complete, the n-gram may be published to flash storage (via the ResearchLog class).
- * However, the additional non-detailed words are retained, in case the user backspaces to edit
- * them. The MainLogBuffer then continues to add words, publishing individual non-detailed words
- * as new words arrive. After enough non-detailed words have been pushed out to account for the
- * 90% between words, the words at the front of the LogBuffer can be published as an n-gram again.
- *
- * If the words that would form the valid n-gram are not in the dictionary, then words are pushed
- * through the LogBuffer one at a time until an n-gram is found that is entirely composed of
- * dictionary words.
- *
- * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
- * n-gram containing dictionary words.
- */
-public abstract class MainLogBuffer extends FixedLogBuffer {
- private static final String TAG = MainLogBuffer.class.getSimpleName();
- private static final boolean DEBUG = false
- && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
-
- // Keep consistent with switch statement in Statistics.recordPublishabilityResultCode()
- public static final int PUBLISHABILITY_PUBLISHABLE = 0;
- public static final int PUBLISHABILITY_UNPUBLISHABLE_STOPPING = 1;
- public static final int PUBLISHABILITY_UNPUBLISHABLE_INCORRECT_WORD_COUNT = 2;
- public static final int PUBLISHABILITY_UNPUBLISHABLE_SAMPLED_TOO_RECENTLY = 3;
- public static final int PUBLISHABILITY_UNPUBLISHABLE_DICTIONARY_UNAVAILABLE = 4;
- public static final int PUBLISHABILITY_UNPUBLISHABLE_MAY_CONTAIN_DIGIT = 5;
- public static final int PUBLISHABILITY_UNPUBLISHABLE_NOT_IN_DICTIONARY = 6;
-
- // The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
- public static final int N_GRAM_SIZE = 2;
-
- private final DictionaryFacilitator mDictionaryFacilitator;
- @UsedForTesting
- private Dictionary mDictionaryForTesting;
- private boolean mIsStopping = false;
-
- /* package for test */ int mNumWordsBetweenNGrams;
-
- // Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod
- // after a sample is taken.
- /* package for test */ int mNumWordsUntilSafeToSample;
-
- public MainLogBuffer(final int wordsBetweenSamples, final int numInitialWordsToIgnore,
- final DictionaryFacilitator dictionaryFacilitator) {
- super(N_GRAM_SIZE + wordsBetweenSamples);
- mNumWordsBetweenNGrams = wordsBetweenSamples;
- mNumWordsUntilSafeToSample = DEBUG ? 0 : numInitialWordsToIgnore;
- mDictionaryFacilitator = dictionaryFacilitator;
- }
-
- @UsedForTesting
- /* package for test */ void setDictionaryForTesting(final Dictionary dictionary) {
- mDictionaryForTesting = dictionary;
- }
-
- private boolean isValidDictWord(final String word) {
- if (mDictionaryForTesting != null) {
- return mDictionaryForTesting.isValidWord(word);
- }
- if (mDictionaryFacilitator != null) {
- return mDictionaryFacilitator.isValidMainDictWord(word);
- }
- return false;
- }
-
- public void setIsStopping() {
- mIsStopping = true;
- }
-
- /**
- * Determines whether the string determined by a series of LogUnits will not violate user
- * privacy if published.
- *
- * @param logUnits a LogUnit list to check for publishability
- * @param nGramSize the smallest n-gram acceptable to be published. if
- * {@link ResearchLogger#IS_LOGGING_EVERYTHING} is true, then publish if there are more than
- * {@code minNGramSize} words in the logUnits, otherwise wait. if {@link
- * ResearchLogger#IS_LOGGING_EVERYTHING} is false, then ensure that there are exactly nGramSize
- * words in the LogUnits.
- *
- * @return one of the {@code PUBLISHABILITY_*} result codes defined in this class.
- */
- private int getPublishabilityResultCode(final ArrayList<LogUnit> logUnits,
- final int nGramSize) {
- // Bypass privacy checks when debugging.
- if (ResearchLogger.IS_LOGGING_EVERYTHING) {
- if (mIsStopping) {
- return PUBLISHABILITY_UNPUBLISHABLE_STOPPING;
- }
- // Only check that it is the right length. If not, wait for later words to make
- // complete n-grams.
- int numWordsInLogUnitList = 0;
- final int length = logUnits.size();
- for (int i = 0; i < length; i++) {
- final LogUnit logUnit = logUnits.get(i);
- numWordsInLogUnitList += logUnit.getNumWords();
- }
- if (numWordsInLogUnitList >= nGramSize) {
- return PUBLISHABILITY_PUBLISHABLE;
- } else {
- return PUBLISHABILITY_UNPUBLISHABLE_INCORRECT_WORD_COUNT;
- }
- }
-
- // Check that we are not sampling too frequently. Having sampled recently might disclose
- // too much of the user's intended meaning.
- if (mNumWordsUntilSafeToSample > 0) {
- return PUBLISHABILITY_UNPUBLISHABLE_SAMPLED_TOO_RECENTLY;
- }
- // Reload the dictionary in case it has changed (e.g., because the user has changed
- // languages).
- if ((mDictionaryFacilitator == null
- || !mDictionaryFacilitator.hasInitializedMainDictionary())
- && mDictionaryForTesting == null) {
- // Main dictionary is unavailable. Since we cannot check it, we cannot tell if a
- // word is out-of-vocabulary or not. Therefore, we must judge the entire buffer
- // contents to potentially pose a privacy risk.
- return PUBLISHABILITY_UNPUBLISHABLE_DICTIONARY_UNAVAILABLE;
- }
-
- // Check each word in the buffer. If any word poses a privacy threat, we cannot upload
- // the complete buffer contents in detail.
- int numWordsInLogUnitList = 0;
- for (final LogUnit logUnit : logUnits) {
- if (!logUnit.hasOneOrMoreWords()) {
- // Digits outside words are a privacy threat.
- if (logUnit.mayContainDigit()) {
- return PUBLISHABILITY_UNPUBLISHABLE_MAY_CONTAIN_DIGIT;
- }
- } else {
- numWordsInLogUnitList += logUnit.getNumWords();
- final String[] words = logUnit.getWordsAsStringArray();
- for (final String word : words) {
- // Words not in the dictionary are a privacy threat.
- if (ResearchLogger.hasLetters(word) && !isValidDictWord(word)) {
- if (DEBUG) {
- Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
- + ResearchLogger.hasLetters(word)
- + ", isValid: " + isValidDictWord(word));
- }
- return PUBLISHABILITY_UNPUBLISHABLE_NOT_IN_DICTIONARY;
- }
- }
- }
- }
-
- // Finally, only return true if the ngram is the right size.
- if (numWordsInLogUnitList == nGramSize) {
- return PUBLISHABILITY_PUBLISHABLE;
- } else {
- return PUBLISHABILITY_UNPUBLISHABLE_INCORRECT_WORD_COUNT;
- }
- }
-
- public void shiftAndPublishAll() throws IOException {
- final LinkedList<LogUnit> logUnits = getLogUnits();
- while (!logUnits.isEmpty()) {
- publishLogUnitsAtFrontOfBuffer();
- }
- }
-
- @Override
- protected final void onBufferFull() {
- try {
- publishLogUnitsAtFrontOfBuffer();
- } catch (final IOException e) {
- if (DEBUG) {
- Log.w(TAG, "IOException when publishing front of LogBuffer", e);
- }
- }
- }
-
- /**
- * If there is a safe n-gram at the front of this log buffer, publish it with all details, and
- * remove the LogUnits that constitute it.
- *
- * An n-gram might not be "safe" if it violates privacy controls. E.g., it might contain
- * numbers, an out-of-vocabulary word, or another n-gram may have been published recently. If
- * there is no safe n-gram, then the LogUnits up through the first word-containing LogUnit are
- * published, but without disclosing any privacy-related details, such as the word the LogUnit
- * generated, motion data, etc.
- *
- * Note that a LogUnit can hold more than one word if the user types without explicit spaces.
- * In this case, the words may be grouped together in such a way that pulling an n-gram off the
- * front would require splitting a LogUnit. Splitting a LogUnit is not possible, so this case
- * is treated just as the unsafe n-gram case. This may cause n-grams to be sampled at slightly
- * less than the target frequency.
- */
- protected final void publishLogUnitsAtFrontOfBuffer() throws IOException {
- // TODO: Refactor this method to require fewer passes through the LogUnits. Should really
- // require only one pass.
- ArrayList<LogUnit> logUnits = peekAtFirstNWords(N_GRAM_SIZE);
- final int publishabilityResultCode = getPublishabilityResultCode(logUnits, N_GRAM_SIZE);
- ResearchLogger.recordPublishabilityResultCode(publishabilityResultCode);
- if (publishabilityResultCode == MainLogBuffer.PUBLISHABILITY_PUBLISHABLE) {
- // Good n-gram at the front of the buffer. Publish it, disclosing details.
- publish(logUnits, true /* canIncludePrivateData */);
- shiftOutWords(N_GRAM_SIZE);
- mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
- return;
- }
- // No good n-gram at front, and buffer is full. Shift out up through the first logUnit
- // with associated words (or if there is none, all the existing logUnits).
- logUnits.clear();
- LogUnit logUnit = shiftOut();
- while (logUnit != null) {
- logUnits.add(logUnit);
- final int numWords = logUnit.getNumWords();
- if (numWords > 0) {
- mNumWordsUntilSafeToSample = Math.max(0, mNumWordsUntilSafeToSample - numWords);
- break;
- }
- logUnit = shiftOut();
- }
- publish(logUnits, false /* canIncludePrivateData */);
- }
-
- /**
- * Called when a list of logUnits should be published.
- *
- * It is the subclass's responsibility to implement the publication.
- *
- * @param logUnits The list of logUnits to be published.
- * @param canIncludePrivateData Whether the private data in the logUnits can be included in
- * publication.
- *
- * @throws IOException if publication to the log file is not possible
- */
- protected abstract void publish(final ArrayList<LogUnit> logUnits,
- final boolean canIncludePrivateData) throws IOException;
-
- @Override
- protected int shiftOutWords(final int numWords) {
- final int numWordsShiftedOut = super.shiftOutWords(numWords);
- mNumWordsUntilSafeToSample = Math.max(0, mNumWordsUntilSafeToSample - numWordsShiftedOut);
- if (DEBUG) {
- Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
- }
- return numWordsShiftedOut;
- }
-}