aboutsummaryrefslogtreecommitdiffstats
path: root/java
diff options
context:
space:
mode:
Diffstat (limited to 'java')
-rw-r--r--java/src/com/android/inputmethod/research/FixedLogBuffer.java51
-rw-r--r--java/src/com/android/inputmethod/research/LogUnit.java99
-rw-r--r--java/src/com/android/inputmethod/research/MainLogBuffer.java44
-rw-r--r--java/src/com/android/inputmethod/research/ResearchLogger.java39
4 files changed, 147 insertions, 86 deletions
diff --git a/java/src/com/android/inputmethod/research/FixedLogBuffer.java b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
index 641bf7eae..4249af544 100644
--- a/java/src/com/android/inputmethod/research/FixedLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
@@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer {
*/
@Override
public void shiftIn(final LogUnit newLogUnit) {
- if (!newLogUnit.hasWord()) {
- // This LogUnit isn't a word, so it doesn't count toward the word-limit.
+ if (!newLogUnit.hasOneOrMoreWords()) {
+ // This LogUnit doesn't contain any word, so it doesn't count toward the word-limit.
super.shiftIn(newLogUnit);
return;
}
+ final int numWordsIncoming = newLogUnit.getNumWords();
if (mNumActualWords >= mWordCapacity) {
// Give subclass a chance to handle the buffer full condition by shifting out logUnits.
onBufferFull();
// If still full, evict.
if (mNumActualWords >= mWordCapacity) {
- shiftOutWords(1);
+ shiftOutWords(numWordsIncoming);
}
}
super.shiftIn(newLogUnit);
- mNumActualWords++; // Must be a word, or we wouldn't be here.
+ mNumActualWords += numWordsIncoming;
}
@Override
public LogUnit unshiftIn() {
final LogUnit logUnit = super.unshiftIn();
- if (logUnit != null && logUnit.hasWord()) {
- mNumActualWords--;
+ if (logUnit != null && logUnit.hasOneOrMoreWords()) {
+ mNumActualWords -= logUnit.getNumWords();
}
return logUnit;
}
@@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer {
@Override
public LogUnit shiftOut() {
final LogUnit logUnit = super.shiftOut();
- if (logUnit != null && logUnit.hasWord()) {
- mNumActualWords--;
+ if (logUnit != null && logUnit.hasOneOrMoreWords()) {
+ mNumActualWords -= logUnit.getNumWords();
}
return logUnit;
}
@@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer {
* If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out
* all {@code LogUnit}s in the buffer.
*
- * @param numWords the number of word-containing {@link LogUnit}s to shift out
+ * @param numWords the minimum number of word-containing {@link LogUnit}s to shift out
* @return the number of actual {@code LogUnit}s shifted out
*/
protected int shiftOutWords(final int numWords) {
int numWordContainingLogUnitsShiftedOut = 0;
for (LogUnit logUnit = shiftOut(); logUnit != null
&& numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) {
- if (logUnit.hasWord()) {
- numWordContainingLogUnitsShiftedOut++;
+ if (logUnit.hasOneOrMoreWords()) {
+ numWordContainingLogUnitsShiftedOut += logUnit.getNumWords();
}
}
return numWordContainingLogUnitsShiftedOut;
@@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer {
}
/**
- * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words. No
- * more than {@code n} LogUnits will have words associated with them. If there are not enough
- * LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
+ * Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with
+ * them.
+ *
+ * There will be no more than {@code n} words in the returned list. So if 2 words are
+ * requested, and the first LogUnit has 3 words, it is not returned. If 2 words are requested,
+ * and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit
+ * is returned. If the first LogUnit has no words associated with it, and the second LogUnit
+ * has three words, then only the first LogUnit (which has no associated words) is returned. If
+ * there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits
+ * will be returned.
*
* @param n The maximum number of {@link LogUnit}s with words to return.
* @return The list of the {@link LogUnit}s containing the first n words
*/
public ArrayList<LogUnit> peekAtFirstNWords(int n) {
final LinkedList<LogUnit> logUnits = getLogUnits();
- final int length = logUnits.size();
// Allocate space for n*2 logUnits. There will be at least n, one for each word, and
// there may be additional for punctuation, between-word commands, etc. This should be
// enough that reallocation won't be necessary.
- final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
- for (int i = 0; i < length && n > 0; i++) {
- final LogUnit logUnit = logUnits.get(i);
- list.add(logUnit);
- if (logUnit.hasWord()) {
- n--;
- }
+ final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2);
+ for (final LogUnit logUnit : logUnits) {
+ n -= logUnit.getNumWords();
+ if (n < 0) break;
+ resultList.add(logUnit);
}
- return list;
+ return resultList;
}
}
diff --git a/java/src/com/android/inputmethod/research/LogUnit.java b/java/src/com/android/inputmethod/research/LogUnit.java
index 1c01675bd..4d60bda53 100644
--- a/java/src/com/android/inputmethod/research/LogUnit.java
+++ b/java/src/com/android/inputmethod/research/LogUnit.java
@@ -25,10 +25,10 @@ import com.android.inputmethod.latin.SuggestedWords;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.define.ProductionFlag;
-import java.io.IOException;
-import java.io.StringWriter;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
+import java.util.regex.Pattern;
/**
* A group of log statements related to each other.
@@ -49,27 +49,45 @@ public class LogUnit {
private static final boolean DEBUG = false
&& ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
+ private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+ private static final String[] EMPTY_STRING_ARRAY = new String[0];
+
private final ArrayList<LogStatement> mLogStatementList;
private final ArrayList<Object[]> mValuesList;
// Assume that mTimeList is sorted in increasing order. Do not insert null values into
// mTimeList.
private final ArrayList<Long> mTimeList;
- // Word that this LogUnit generates. Should be null if the LogUnit does not generate a genuine
- // word (i.e. separators alone do not count as a word). Should never be empty.
- private String mWord;
+ // Words that this LogUnit generates. Should be null if the data in the LogUnit does not
+ // generate a genuine word (i.e. separators alone do not count as a word). Should never be
+ // empty. Note that if the user types spaces explicitly, then normally mWords should contain
+ // only a single word; it will only contain space-separate multiple words if the user does not
+ // enter a space, and the system enters one automatically.
+ private String mWords;
+ private String[] mWordArray = EMPTY_STRING_ARRAY;
private boolean mMayContainDigit;
private boolean mIsPartOfMegaword;
private boolean mContainsCorrection;
- // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was
- // to a different word or just a "typo" correction. It is considered a "typo" if the final
- // word was listed in the suggestions available the first time the word was gestured or
- // tapped.
+ // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
+ // correction.
private int mCorrectionType;
+ // LogUnits start in this state. If a word is entered without being corrected, it will have
+ // this CorrectiontType.
public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
+ // The LogUnit was corrected manually by the user in an unspecified way.
public static final int CORRECTIONTYPE_CORRECTION = 1;
+ // The LogUnit was corrected manually by the user to a word not in the list of suggestions of
+ // the first word typed here. (Note: this is a heuristic value, it may be incorrect, for
+ // example, if the user repositions the cursor).
public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
+ // The LogUnit was corrected manually by the user to a word that was in the list of suggestions
+ // of the first word typed here. (Again, a heuristic). It is probably a typo correction.
public static final int CORRECTIONTYPE_TYPO = 3;
+ // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
+ // state and statistics. This should include how many times it has been corrected, whether
+ // other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit
+ // previously contained a word, but was corrected to empty (because it was deleted, and there is
+ // no known replacement).
private SuggestedWords mSuggestedWords;
@@ -166,7 +184,7 @@ public class LogUnit {
final LogStatement logStatement;
if (canIncludePrivateData) {
LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
- SystemClock.uptimeMillis(), getWord(), getCorrectionType());
+ SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
} else {
LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
SystemClock.uptimeMillis());
@@ -181,22 +199,22 @@ public class LogUnit {
}
/**
- * Mark the current logUnit as containing data to generate {@code word}.
+ * Mark the current logUnit as containing data to generate {@code newWords}.
*
* If {@code setWord()} was previously called for this LogUnit, then the method will try to
* determine what kind of correction it is, and update its internal state of the correctionType
* accordingly.
*
- * @param word The word this LogUnit generates. Caller should not pass null or the empty
+ * @param newWords The words this LogUnit generates. Caller should not pass null or the empty
* string.
*/
- public void setWord(final String word) {
- if (hasWord()) {
+ public void setWords(final String newWords) {
+ if (hasOneOrMoreWords()) {
// The word was already set once, and it is now being changed. See if the new word
// is close to the old word. If so, then the change is probably a typo correction.
// If not, the user may have decided to enter a different word, so flag it.
if (mSuggestedWords != null) {
- if (isInSuggestedWords(word, mSuggestedWords)) {
+ if (isInSuggestedWords(newWords, mSuggestedWords)) {
mCorrectionType = CORRECTIONTYPE_TYPO;
} else {
mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
@@ -206,38 +224,71 @@ public class LogUnit {
// Mark it as a generic correction.
mCorrectionType = CORRECTIONTYPE_CORRECTION;
}
+ } else {
+ mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
+ }
+ mWords = newWords;
+
+ // Update mWordArray
+ mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
+ : WHITESPACE_PATTERN.split(mWords);
+ if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
+ // Empty string at beginning of array. Must have been whitespace at the start of the
+ // word. Remove the empty string.
+ mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
}
- mWord = word;
}
- public String getWord() {
- return mWord;
+ public String getWordsAsString() {
+ return mWords;
+ }
+
+ /**
+ * Retuns the words generated by the data in this LogUnit.
+ *
+ * The first word may be an empty string, if the data in the LogUnit started by generating
+ * whitespace.
+ *
+ * @return the array of words. an empty list of there are no words associated with this LogUnit.
+ */
+ public String[] getWordsAsStringArray() {
+ return mWordArray;
+ }
+
+ public boolean hasOneOrMoreWords() {
+ return mWordArray.length >= 1;
}
- public boolean hasWord() {
- return mWord != null && !TextUtils.isEmpty(mWord.trim());
+ public int getNumWords() {
+ return mWordArray.length;
}
+ // TODO: Refactor to eliminate getter/setters
public void setMayContainDigit() {
mMayContainDigit = true;
}
+ // TODO: Refactor to eliminate getter/setters
public boolean mayContainDigit() {
return mMayContainDigit;
}
+ // TODO: Refactor to eliminate getter/setters
public void setContainsCorrection() {
mContainsCorrection = true;
}
+ // TODO: Refactor to eliminate getter/setters
public boolean containsCorrection() {
return mContainsCorrection;
}
+ // TODO: Refactor to eliminate getter/setters
public void setCorrectionType(final int correctionType) {
mCorrectionType = correctionType;
}
+ // TODO: Refactor to eliminate getter/setters
public int getCorrectionType() {
return mCorrectionType;
}
@@ -267,7 +318,7 @@ public class LogUnit {
new ArrayList<Object[]>(laterValues),
new ArrayList<Long>(laterTimes),
true /* isPartOfMegaword */);
- newLogUnit.mWord = null;
+ newLogUnit.mWords = null;
newLogUnit.mMayContainDigit = mMayContainDigit;
newLogUnit.mContainsCorrection = mContainsCorrection;
@@ -287,9 +338,9 @@ public class LogUnit {
mLogStatementList.addAll(logUnit.mLogStatementList);
mValuesList.addAll(logUnit.mValuesList);
mTimeList.addAll(logUnit.mTimeList);
- mWord = null;
- if (logUnit.mWord != null) {
- setWord(logUnit.mWord);
+ mWords = null;
+ if (logUnit.mWords != null) {
+ setWords(logUnit.mWords);
}
mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
index cd4c1db6e..42ef5d3b6 100644
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
- final String word = logUnit.getWord();
- if (word != null) {
- numWordsInLogUnitList++;
- }
+ numWordsInLogUnitList += logUnit.getNumWords();
}
return numWordsInLogUnitList >= minNGramSize;
}
@@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
// the complete buffer contents in detail.
int numWordsInLogUnitList = 0;
final int length = logUnits.size();
- for (int i = 0; i < length; i++) {
- final LogUnit logUnit = logUnits.get(i);
- if (!logUnit.hasWord()) {
+ for (final LogUnit logUnit : logUnits) {
+ if (!logUnit.hasOneOrMoreWords()) {
// Digits outside words are a privacy threat.
if (logUnit.mayContainDigit()) {
return false;
}
} else {
- numWordsInLogUnitList++;
- final String word = logUnit.getWord();
- // Words not in the dictionary are a privacy threat.
- if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
- if (DEBUG) {
- Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word)
- + ", isValid: " + (dictionary.isValidWord(word)));
+ numWordsInLogUnitList += logUnit.getNumWords();
+ final String[] words = logUnit.getWordsAsStringArray();
+ for (final String word : words) {
+ // Words not in the dictionary are a privacy threat.
+ if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
+ if (DEBUG) {
+ Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
+ + ResearchLogger.hasLetters(word)
+ + ", isValid: " + (dictionary.isValidWord(word)));
+ }
+ return false;
}
- return false;
}
}
}
- // Finally, only return true if the minNGramSize is met.
- return numWordsInLogUnitList >= minNGramSize;
+ // Finally, only return true if the ngram is the right size.
+ return numWordsInLogUnitList == minNGramSize;
}
public void shiftAndPublishAll() {
@@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
shiftOutWords(N_GRAM_SIZE);
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
} else {
- // No good n-gram at front, and buffer is full. Shift out the first word (or if there
- // is none, the existing logUnits).
- logUnits = peekAtFirstNWords(1);
+ // No good n-gram at front, and buffer is full. Shift out up through the first logUnit
+ // with associated words (or if there is none, all the existing logUnits).
+ logUnits.clear();
+ for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords();
+ logUnit = shiftOut()) {
+ logUnits.add(logUnit);
+ }
publish(logUnits, false /* canIncludePrivateData */);
- shiftOutWords(1);
}
}
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index cd18e3de6..1f6845c8b 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -397,13 +397,14 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
protected void publish(final ArrayList<LogUnit> logUnits,
boolean canIncludePrivateData) {
canIncludePrivateData |= IS_LOGGING_EVERYTHING;
- final int length = logUnits.size();
- for (int i = 0; i < length; i++) {
- final LogUnit logUnit = logUnits.get(i);
- final String word = logUnit.getWord();
- if (word != null && word.length() > 0 && hasLetters(word)) {
- Log.d(TAG, "onPublish: " + word + ", hc: "
- + logUnit.containsCorrection());
+ for (final LogUnit logUnit : logUnits) {
+ if (DEBUG) {
+ final String wordsString = logUnit.getWordsAsString();
+ Log.d(TAG, "onPublish: '" + wordsString
+ + "', hc: " + logUnit.containsCorrection()
+ + ", cipd: " + canIncludePrivateData);
+ }
+ for (final String word : logUnit.getWordsAsStringArray()) {
final Dictionary dictionary = getDictionary();
mStatistics.recordWordEntered(
dictionary != null && dictionary.isValidWord(word),
@@ -852,8 +853,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
/* package for test */ void commitCurrentLogUnit() {
if (DEBUG) {
- Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ?
- ": " + mCurrentLogUnit.getWord() : ""));
+ Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasOneOrMoreWords() ?
+ ": " + mCurrentLogUnit.getWordsAsString() : ""));
}
if (!mCurrentLogUnit.isEmpty()) {
if (mMainLogBuffer != null) {
@@ -893,8 +894,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
// Check that expected word matches.
if (oldLogUnit != null) {
- final String oldLogUnitWord = oldLogUnit.getWord();
- if (oldLogUnitWord != null && !oldLogUnitWord.equals(expectedWord)) {
+ final String oldLogUnitWords = oldLogUnit.getWordsAsString();
+ if (oldLogUnitWords != null && !oldLogUnitWords.equals(expectedWord)) {
return;
}
}
@@ -916,7 +917,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
enqueueEvent(LOGSTATEMENT_UNCOMMIT_CURRENT_LOGUNIT);
if (DEBUG) {
Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
- + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
+ + (mCurrentLogUnit.hasOneOrMoreWords() ? ": '"
+ + mCurrentLogUnit.getWordsAsString() + "'" : ""));
}
}
@@ -950,8 +952,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
}
for (LogUnit logUnit : logUnits) {
if (DEBUG) {
- Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
- : "<wordless>") + ", correction?: " + logUnit.containsCorrection());
+ Log.d(TAG, "publishLogBuffer: " + (logUnit.hasOneOrMoreWords()
+ ? logUnit.getWordsAsString() : "<wordless>")
+ + ", correction?: " + logUnit.containsCorrection());
}
researchLog.publish(logUnit, canIncludePrivateData);
}
@@ -986,7 +989,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
return;
}
if (word.length() > 0 && hasLetters(word)) {
- mCurrentLogUnit.setWord(word);
+ mCurrentLogUnit.setWords(word);
}
final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
enqueueCommitText(word, isBatchMode);
@@ -1478,7 +1481,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
}
if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
if (logUnit != null) {
- logUnit.setWord(originallyTypedWord);
+ logUnit.setWords(originallyTypedWord);
}
}
researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit,
@@ -1616,7 +1619,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
* Log a call to LatinIME.commitCurrentAutoCorrection().
*
* SystemResponse: The IME has committed an auto-correction. An auto-correction changes the raw
- * text input to another word that the user more likely desired to type.
+ * text input to another word (or words) that the user more likely desired to type.
*/
private static final LogStatement LOGSTATEMENT_LATINIME_COMMITCURRENTAUTOCORRECTION =
new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord",
@@ -1827,7 +1830,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
final int enteredWordPos, final SuggestedWords suggestedWords) {
final ResearchLogger researchLogger = getInstance();
if (!TextUtils.isEmpty(enteredText) && hasLetters(enteredText.toString())) {
- researchLogger.mCurrentLogUnit.setWord(enteredText.toString());
+ researchLogger.mCurrentLogUnit.setWords(enteredText.toString());
}
researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText,
enteredWordPos);