aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/research/MainLogBuffer.java
diff options
context:
space:
mode:
authorSatoshi Kataoka <satok@google.com>2012-08-06 16:04:07 +0900
committerSatoshi Kataoka <satok@google.com>2012-08-06 16:04:07 +0900
commit44456dcbf23c29b54e9372d932b79805957724cb (patch)
treea8167c70e9d7017f212066cf2364d1ad5b09fdca /java/src/com/android/inputmethod/research/MainLogBuffer.java
parent07e4f9eb530a795efd97986e28546c8c5e895c72 (diff)
parentd299bde24b4afd3955fa65c7ab372807a336c6df (diff)
downloadlatinime-44456dcbf23c29b54e9372d932b79805957724cb.tar.gz
latinime-44456dcbf23c29b54e9372d932b79805957724cb.tar.xz
latinime-44456dcbf23c29b54e9372d932b79805957724cb.zip
Merge remote-tracking branch 'goog/master' into mergescriptpackage
Conflicts: java/res/values-af/strings.xml java/res/values-cs/strings.xml java/res/values-et/strings.xml java/res/values-fi/strings.xml java/res/values-hi/strings.xml java/res/values-hr/strings.xml java/res/values-hu/strings.xml java/res/values-in/strings.xml java/res/values-ja/strings.xml java/res/values-ko/strings.xml java/res/values-lt/strings.xml java/res/values-lv/strings.xml java/res/values-ms/strings.xml java/res/values-pt/strings.xml java/res/values-ro/strings.xml java/res/values-ru/strings.xml java/res/values-sk/strings.xml java/res/values-sl/strings.xml java/res/values-sr/strings.xml java/res/values-sw/strings.xml java/res/values-tr/strings.xml java/res/values-vi/strings.xml java/res/values-zh-rCN/strings.xml java/res/values-zh-rTW/strings.xml Change-Id: Ib4141aca0b35148d62d22d4f32309f890c84303a
Diffstat (limited to 'java/src/com/android/inputmethod/research/MainLogBuffer.java')
-rw-r--r--java/src/com/android/inputmethod/research/MainLogBuffer.java127
1 files changed, 127 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
new file mode 100644
index 000000000..745768d35
--- /dev/null
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.research;
+
+import com.android.inputmethod.latin.Dictionary;
+import com.android.inputmethod.latin.Suggest;
+
+import java.util.Random;
+
+public class MainLogBuffer extends LogBuffer {
+ // The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
+ private static final int N_GRAM_SIZE = 2;
+ // The number of words between n-grams to omit from the log.
+ private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES = 18;
+
+ private final ResearchLog mResearchLog;
+ private Suggest mSuggest;
+
+ // The minimum periodicity with which n-grams can be sampled. E.g. mWinWordPeriod is 10 if
+ // every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc.
+ // for 11-18, and the bigram at words 19 and 20. If an n-gram is not safe (e.g. it contains a
+ // number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe
+ // n-gram does appear.
+ /* package for test */ int mMinWordPeriod;
+
+ // Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod
+ // after a sample is taken.
+ /* package for test */ int mWordsUntilSafeToSample;
+
+ public MainLogBuffer(final ResearchLog researchLog) {
+ super(N_GRAM_SIZE);
+ mResearchLog = researchLog;
+ mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE;
+ final Random random = new Random();
+ mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod);
+ }
+
+ public void setSuggest(Suggest suggest) {
+ mSuggest = suggest;
+ }
+
+ @Override
+ public void shiftIn(final LogUnit newLogUnit) {
+ super.shiftIn(newLogUnit);
+ if (newLogUnit.hasWord()) {
+ if (mWordsUntilSafeToSample > 0) {
+ mWordsUntilSafeToSample--;
+ }
+ }
+ }
+
+ public void resetWordCounter() {
+ mWordsUntilSafeToSample = mMinWordPeriod;
+ }
+
+ /**
+ * Determines whether the content of the MainLogBuffer can be safely uploaded in its complete
+ * form and still protect the user's privacy.
+ *
+ * The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any
+ * non-character data that is typed between words. The decision about privacy is made based on
+ * the buffer's entire content. If it is decided that the privacy risks are too great to upload
+ * the contents of this buffer, a censored version of the LogItems may still be uploaded. E.g.,
+ * the screen orientation and other characteristics about the device can be uploaded without
+ * revealing much about the user.
+ */
+ public boolean isSafeToLog() {
+ // Check that we are not sampling too frequently. Having sampled recently might disclose
+ // too much of the user's intended meaning.
+ if (mWordsUntilSafeToSample > 0) {
+ return false;
+ }
+ if (mSuggest == null || !mSuggest.hasMainDictionary()) {
+ // Main dictionary is unavailable. Since we cannot check it, we cannot tell if a word
+ // is out-of-vocabulary or not. Therefore, we must judge the entire buffer contents to
+ // potentially pose a privacy risk.
+ return false;
+ }
+ // Reload the dictionary in case it has changed (e.g., because the user has changed
+ // languages).
+ final Dictionary dictionary = mSuggest.getMainDictionary();
+ if (dictionary == null) {
+ return false;
+ }
+ // Check each word in the buffer. If any word poses a privacy threat, we cannot upload the
+ // complete buffer contents in detail.
+ final int length = mLogUnits.size();
+ for (int i = 0; i < length; i++) {
+ final LogUnit logUnit = mLogUnits.get(i);
+ final String word = logUnit.getWord();
+ if (word == null) {
+ // Digits outside words are a privacy threat.
+ if (logUnit.hasDigit()) {
+ return false;
+ }
+ } else {
+ // Words not in the dictionary are a privacy threat.
+ if (!(dictionary.isValidWord(word))) {
+ return false;
+ }
+ }
+ }
+ // All checks have passed; this buffer's content can be safely uploaded.
+ return true;
+ }
+
+ @Override
+ protected void onShiftOut(LogUnit logUnit) {
+ if (mResearchLog != null) {
+ mResearchLog.publish(logUnit, false /* isIncludingPrivateData */);
+ }
+ }
+}