From 28d765ed901bfd1e736056db1cd807c13ef88c35 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 10 Sep 2012 19:27:45 +0900 Subject: Make Latin IME aware of its surrounding text. This is a preparatory change for Bug: 4967874 Bug: 6617760 Bug: 6950087 Change-Id: I3abf8e45c0d02c42491421f108370220134b9602 --- .../inputmethod/latin/RichInputConnection.java | 179 ++++++++++++++++++++- 1 file changed, 178 insertions(+), 1 deletion(-) (limited to 'java/src/com/android/inputmethod/latin/RichInputConnection.java') diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 41e59e92d..37e1dbb69 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -33,16 +33,47 @@ import com.android.inputmethod.research.ResearchLogger; import java.util.regex.Pattern; /** - * Wrapper for InputConnection to simplify interaction + * Enrichment class for InputConnection to simplify interaction and add functionality. + * + * This class serves as a wrapper to be able to simply add hooks to any calls to the underlying + * InputConnection. It also keeps track of a number of things to avoid having to call upon IPC + * all the time to find out what text is in the buffer, when we need it to determine caps mode + * for example. */ public class RichInputConnection { private static final String TAG = RichInputConnection.class.getSimpleName(); private static final boolean DBG = false; + private static final boolean DEBUG_PREVIOUS_TEXT = false; // Provision for a long word pair and a separator private static final int LOOKBACK_CHARACTER_NUM = BinaryDictionary.MAX_WORD_LENGTH * 2 + 1; private static final Pattern spaceRegex = Pattern.compile("\\s+"); private static final int INVALID_CURSOR_POSITION = -1; + /** + * This variable contains the value LatinIME thinks the cursor position should be at now. + * This is a few steps in advance of what the TextView thinks it is, because TextView will + * only know after the IPC calls gets through. + */ + private int mCurrentCursorPosition = INVALID_CURSOR_POSITION; // in chars, not code points + /** + * This contains the committed text immediately preceding the cursor and the composing + * text if any. It is refreshed when the cursor moves by calling upon the TextView. + */ + private StringBuilder mCommittedTextBeforeComposingText = new StringBuilder(); + /** + * This contains the currently composing text, as LatinIME thinks the TextView is seeing it. + */ + private StringBuilder mComposingText = new StringBuilder(); + /** + * This is a one-character string containing the character after the cursor. Since LatinIME + * never touches it directly, it's never modified by any means other than re-reading from the + * TextView when the cursor position is changed by the user. + */ + private CharSequence mCharAfterTheCursor = ""; + // A hint on how many characters to cache from the TextView. A good value of this is given by + // how many characters we need to be able to almost always find the caps mode. + private static final int DEFAULT_TEXT_CACHE_SIZE = 100; + private final InputMethodService mParent; InputConnection mIC; int mNestLevel; @@ -52,6 +83,37 @@ public class RichInputConnection { mNestLevel = 0; } + private void checkConsistencyForDebug() { + final ExtractedTextRequest r = new ExtractedTextRequest(); + r.hintMaxChars = 0; + r.hintMaxLines = 0; + r.token = 1; + r.flags = 0; + final ExtractedText et = mIC.getExtractedText(r, 0); + final CharSequence beforeCursor = getTextBeforeCursor(DEFAULT_TEXT_CACHE_SIZE, 0); + final StringBuilder internal = new StringBuilder().append(mCommittedTextBeforeComposingText) + .append(mComposingText); + if (null == et || null == beforeCursor) return; + final int actualLength = Math.min(beforeCursor.length(), internal.length()); + if (internal.length() > actualLength) { + internal.delete(0, internal.length() - actualLength); + } + final String reference = (beforeCursor.length() <= actualLength) ? beforeCursor.toString() + : beforeCursor.subSequence(beforeCursor.length() - actualLength, + beforeCursor.length()).toString(); + if (et.selectionStart != mCurrentCursorPosition + || !(reference.equals(internal.toString()))) { + final String context = "Expected cursor position = " + mCurrentCursorPosition + + "\nActual cursor position = " + et.selectionStart + + "\nExpected text = " + internal.length() + " " + internal + + "\nActual text = " + reference.length() + " " + reference; + ((LatinIME)mParent).debugDumpStateAndCrashWithException(context); + } else { + Log.e(TAG, Utils.getStackTrace(2)); + Log.e(TAG, "Exp <> Actual : " + mCurrentCursorPosition + " <> " + et.selectionStart); + } + } + public void beginBatchEdit() { if (++mNestLevel == 1) { mIC = mParent.getCurrentInputConnection(); @@ -65,12 +127,30 @@ public class RichInputConnection { Log.e(TAG, "Nest level too deep : " + mNestLevel); } } + checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); } + public void endBatchEdit() { if (mNestLevel <= 0) Log.e(TAG, "Batch edit not in progress!"); // TODO: exception instead if (--mNestLevel == 0 && null != mIC) { mIC.endBatchEdit(); } + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + } + + public void resetCachesUponCursorMove(final int newCursorPosition) { + mCurrentCursorPosition = newCursorPosition; + mComposingText.setLength(0); + mCommittedTextBeforeComposingText.setLength(0); + mCommittedTextBeforeComposingText.append(getTextBeforeCursor(DEFAULT_TEXT_CACHE_SIZE, 0)); + mCharAfterTheCursor = getTextAfterCursor(1, 0); + if (null != mIC) { + mIC.finishComposingText(); + if (ProductionFlag.IS_EXPERIMENTAL) { + ResearchLogger.richInputConnection_finishComposingText(); + } + } } private void checkBatchEdit() { @@ -83,6 +163,10 @@ public class RichInputConnection { public void finishComposingText() { checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + mCommittedTextBeforeComposingText.append(mComposingText); + mCurrentCursorPosition += mComposingText.length(); + mComposingText.setLength(0); if (null != mIC) { mIC.finishComposingText(); if (ProductionFlag.IS_EXPERIMENTAL) { @@ -93,6 +177,10 @@ public class RichInputConnection { public void commitText(final CharSequence text, final int i) { checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + mCommittedTextBeforeComposingText.append(text); + mCurrentCursorPosition += text.length() - mComposingText.length(); + mComposingText.setLength(0); if (null != mIC) { mIC.commitText(text, i); if (ProductionFlag.IS_EXPERIMENTAL) { @@ -121,12 +209,28 @@ public class RichInputConnection { public void deleteSurroundingText(final int i, final int j) { checkBatchEdit(); + final int remainingChars = mComposingText.length() - i; + if (remainingChars >= 0) { + mComposingText.setLength(remainingChars); + } else { + mComposingText.setLength(0); + // Never cut under 0 + final int len = Math.max(mCommittedTextBeforeComposingText.length() + + remainingChars, 0); + mCommittedTextBeforeComposingText.setLength(len); + } + if (mCurrentCursorPosition > i) { + mCurrentCursorPosition -= i; + } else { + mCurrentCursorPosition = 0; + } if (null != mIC) { mIC.deleteSurroundingText(i, j); if (ProductionFlag.IS_EXPERIMENTAL) { ResearchLogger.richInputConnection_deleteSurroundingText(i, j); } } + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); } public void performEditorAction(final int actionId) { @@ -141,6 +245,44 @@ public class RichInputConnection { public void sendKeyEvent(final KeyEvent keyEvent) { checkBatchEdit(); + if (keyEvent.getAction() == KeyEvent.ACTION_DOWN) { + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + // This method is only called for enter or backspace when speaking to old + // applications (target SDK <= 15), or for digits. + // When talking to new applications we never use this method because it's inherently + // racy and has unpredictable results, but for backward compatibility we continue + // sending the key events for only Enter and Backspace because some applications + // mistakenly catch them to do some stuff. + switch (keyEvent.getKeyCode()) { + case KeyEvent.KEYCODE_ENTER: + mCommittedTextBeforeComposingText.append("\n"); + mCurrentCursorPosition += 1; + break; + case KeyEvent.KEYCODE_DEL: + if (0 == mComposingText.length()) { + if (mCommittedTextBeforeComposingText.length() > 0) { + mCommittedTextBeforeComposingText.delete( + mCommittedTextBeforeComposingText.length() - 1, + mCommittedTextBeforeComposingText.length()); + } + } else { + mComposingText.delete(mComposingText.length() - 1, mComposingText.length()); + } + if (mCurrentCursorPosition > 0) mCurrentCursorPosition -= 1; + break; + case KeyEvent.KEYCODE_UNKNOWN: + if (null != keyEvent.getCharacters()) { + mCommittedTextBeforeComposingText.append(keyEvent.getCharacters()); + mCurrentCursorPosition += keyEvent.getCharacters().length(); + } + break; + default: + final String text = new String(new int[] { keyEvent.getUnicodeChar() }, 0, 1); + mCommittedTextBeforeComposingText.append(text); + mCurrentCursorPosition += text.length(); + break; + } + } if (null != mIC) { mIC.sendKeyEvent(keyEvent); if (ProductionFlag.IS_EXPERIMENTAL) { @@ -151,48 +293,83 @@ public class RichInputConnection { public void setComposingText(final CharSequence text, final int i) { checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + mCurrentCursorPosition += text.length() - mComposingText.length(); + mComposingText.setLength(0); + mComposingText.append(text); + // TODO: support values of i != 1. At this time, this is never called with i != 1. if (null != mIC) { mIC.setComposingText(text, i); if (ProductionFlag.IS_EXPERIMENTAL) { ResearchLogger.richInputConnection_setComposingText(text, i); } } + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); } public void setSelection(final int from, final int to) { checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); if (null != mIC) { mIC.setSelection(from, to); if (ProductionFlag.IS_EXPERIMENTAL) { ResearchLogger.richInputConnection_setSelection(from, to); } } + mCurrentCursorPosition = from; + mCommittedTextBeforeComposingText.setLength(0); + mCommittedTextBeforeComposingText.append(getTextBeforeCursor(DEFAULT_TEXT_CACHE_SIZE, 0)); } public void commitCorrection(final CorrectionInfo correctionInfo) { checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + // This has no effect on the text field and does not change its content. It only makes + // TextView flash the text for a second based on indices contained in the argument. if (null != mIC) { mIC.commitCorrection(correctionInfo); if (ProductionFlag.IS_EXPERIMENTAL) { ResearchLogger.richInputConnection_commitCorrection(correctionInfo); } } + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); } public void commitCompletion(final CompletionInfo completionInfo) { checkBatchEdit(); + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); + final CharSequence text = completionInfo.getText(); + mCommittedTextBeforeComposingText.append(text); + mCurrentCursorPosition += text.length() - mComposingText.length(); + mComposingText.setLength(0); if (null != mIC) { mIC.commitCompletion(completionInfo); if (ProductionFlag.IS_EXPERIMENTAL) { ResearchLogger.richInputConnection_commitCompletion(completionInfo); } } + if (DEBUG_PREVIOUS_TEXT) checkConsistencyForDebug(); } public CharSequence getNthPreviousWord(final String sentenceSeperators, final int n) { mIC = mParent.getCurrentInputConnection(); if (null == mIC) return null; final CharSequence prev = mIC.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0); + if (DEBUG_PREVIOUS_TEXT && null != prev) { + final int checkLength = LOOKBACK_CHARACTER_NUM - 1; + final String reference = prev.length() <= checkLength ? prev.toString() + : prev.subSequence(prev.length() - checkLength, prev.length()).toString(); + final StringBuilder internal = new StringBuilder() + .append(mCommittedTextBeforeComposingText).append(mComposingText); + if (internal.length() > checkLength) { + internal.delete(0, internal.length() - checkLength); + if (!(reference.equals(internal.toString()))) { + final String context = + "Expected text = " + internal + "\nActual text = " + reference; + ((LatinIME)mParent).debugDumpStateAndCrashWithException(context); + } + } + } return getNthPreviousWord(prev, sentenceSeperators, n); } -- cgit v1.2.3-83-g751a From 5ed88457bf9ef3305d4a5aa4ac05b513433ad0dd Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Wed, 12 Sep 2012 12:40:36 +0900 Subject: Make onUpdateSelection much more resilient to race conditions. This is pretty much as strong as it gets. It should be impossible to get false positives and nearly impossible to get true negatives with this new code. Bug: 6981089 Change-Id: Ia32ab62f89c5943f0be169b979abab652e67bf5b --- .../com/android/inputmethod/latin/LatinIME.java | 3 ++- .../inputmethod/latin/RichInputConnection.java | 30 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'java/src/com/android/inputmethod/latin/RichInputConnection.java') diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 78c65e0c7..b3f7e674d 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -825,7 +825,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // we know for sure the cursor moved while we were composing and we should reset // the state. final boolean noComposingSpan = composingSpanStart == -1 && composingSpanEnd == -1; - if (!mExpectingUpdateSelection) { + if (!mExpectingUpdateSelection + && !mConnection.isBelatedExpectedUpdate(oldSelStart, newSelStart)) { // TAKE CARE: there is a race condition when we enter this test even when the user // did not explicitly move the cursor. This happens when typing fast, where two keys // turn this flag on in succession and both onUpdateSelection() calls arrive after diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 37e1dbb69..ce7049f4f 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -629,4 +629,34 @@ public class RichInputConnection { commitText(" " + textBeforeCursor.subSequence(0, 1), 1); return true; } + + /** + * Heuristic to determine if this is an expected update of the cursor. + * + * Sometimes updates to the cursor position are late because of their asynchronous nature. + * This method tries to determine if this update is one, based on the values of the cursor + * position in the update, and the currently expected position of the cursor according to + * LatinIME's internal accounting. If this is not a belated expected update, then it should + * mean that the user moved the cursor explicitly. + * This is quite robust, but of course it's not perfect. In particular, it will fail in the + * case we get an update A, the user types in N characters so as to move the cursor to A+N but + * we don't get those, and then the user places the cursor between A and A+N, and we get only + * this update and not the ones in-between. This is almost impossible to achieve even trying + * very very hard. + * + * @param oldSelStart The value of the old cursor position in the update. + * @param newSelStart The value of the new cursor position in the update. + * @return whether this is a belated expected update or not. + */ + public boolean isBelatedExpectedUpdate(final int oldSelStart, final int newSelStart) { + // If this is an update that arrives at our expected position, it's a belated update. + if (newSelStart == mCurrentCursorPosition) return true; + // If this is an update that moves the cursor from our expected position, it must be + // an explicit move. + if (oldSelStart == mCurrentCursorPosition) return false; + // The following returns true if newSelStart is between oldSelStart and + // mCurrentCursorPosition. We assume that if the updated position is between the old + // position and the expected position, then it must be a belated update. + return (newSelStart - oldSelStart) * (mCurrentCursorPosition - newSelStart) >= 0; + } } -- cgit v1.2.3-83-g751a From 9d1c73ffd88cd1bfef3de048b0b3a9a7dfbcfa70 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 10 Sep 2012 19:27:45 +0900 Subject: Import TextUtils.getCapsMode to fix it internally (A1) This should have on effect at all on behavior, except an increase in performance. Bug: 4967874 Bug: 6950087 Change-Id: Ie2b51efefe84ca767f5dc8e3b80bfef7e1faab3d --- .../com/android/inputmethod/latin/LatinIME.java | 4 +- .../inputmethod/latin/RichInputConnection.java | 15 +++- .../com/android/inputmethod/latin/StringUtils.java | 94 ++++++++++++++++++++++ 3 files changed, 110 insertions(+), 3 deletions(-) (limited to 'java/src/com/android/inputmethod/latin/RichInputConnection.java') diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 78c65e0c7..d8b1c292b 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -700,6 +700,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } } + mConnection.resetCachesUponCursorMove(mLastSelectionStart); + if (isDifferentTextField) { mainKeyboardView.closing(); loadSettings(); @@ -733,8 +735,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mainKeyboardView.setGesturePreviewMode(mCurrentSettings.mGesturePreviewTrailEnabled, mCurrentSettings.mGestureFloatingPreviewTextEnabled); - mConnection.resetCachesUponCursorMove(mLastSelectionStart); - if (TRACE) Debug.startMethodTracing("/data/trace/latinime"); } diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 37e1dbb69..efda623e5 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -192,7 +192,20 @@ public class RichInputConnection { public int getCursorCapsMode(final int inputType) { mIC = mParent.getCurrentInputConnection(); if (null == mIC) return Constants.TextUtils.CAP_MODE_OFF; - return mIC.getCursorCapsMode(inputType); + if (!TextUtils.isEmpty(mComposingText)) return Constants.TextUtils.CAP_MODE_OFF; + // TODO: this will generally work, but there may be cases where the buffer contains SOME + // information but not enough to determine the caps mode accurately. This may happen after + // heavy pressing of delete, for example DEFAULT_TEXT_CACHE_SIZE - 5 times or so. + // getCapsMode should be updated to be able to return a "not enough info" result so that + // we can get more context only when needed. + if (TextUtils.isEmpty(mCommittedTextBeforeComposingText) && 0 != mCurrentCursorPosition) { + mCommittedTextBeforeComposingText.append( + getTextBeforeCursor(DEFAULT_TEXT_CACHE_SIZE, 0)); + } + // This never calls InputConnection#getCapsMode - in fact, it's a static method that + // never blocks or initiates IPC. + return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, + mCommittedTextBeforeComposingText.length(), inputType); } public CharSequence getTextBeforeCursor(final int i, final int j) { diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index 9c47a38c2..d6509d6a6 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -197,4 +197,98 @@ public final class StringUtils { codePoints[dsti] = codePoint; return codePoints; } + + /** + * Determine what caps mode should be in effect at the current offset in + * the text. Only the mode bits set in reqModes will be + * checked. Note that the caps mode flags here are explicitly defined + * to match those in {@link InputType}. + * + * This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting + * issues). This will change in the future as we simplify the code for our use and fix bugs. + * + * @param cs The text that should be checked for caps modes. + * @param off Location in the text at which to check. + * @param reqModes The modes to be checked: may be any combination of + * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and + * {@link #CAP_MODE_SENTENCES}. + * + * @return Returns the actual capitalization modes that can be in effect + * at the current position, which is any combination of + * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and + * {@link #CAP_MODE_SENTENCES}. + */ + public static int getCapsMode(CharSequence cs, int off, int reqModes) { + if (off < 0) { + return 0; + } + + int i; + char c; + int mode = 0; + + if ((reqModes & TextUtils.CAP_MODE_CHARACTERS) != 0) { + mode |= TextUtils.CAP_MODE_CHARACTERS; + } + if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) { + return mode; + } + + // Back over allowed opening punctuation. + for (i = off; i > 0; i--) { + c = cs.charAt(i - 1); + if (c != '"' && c != '\'' && Character.getType(c) != Character.START_PUNCTUATION) { + break; + } + } + + // Start of paragraph, with optional whitespace. + int j = i; + while (j > 0 && ((c = cs.charAt(j - 1)) == ' ' || c == '\t')) { + j--; + } + if (j == 0 || cs.charAt(j - 1) == '\n') { + return mode | TextUtils.CAP_MODE_WORDS; + } + + // Or start of word if we are that style. + if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) { + if (i != j) mode |= TextUtils.CAP_MODE_WORDS; + return mode; + } + + // There must be a space if not the start of paragraph. + if (i == j) { + return mode; + } + + // Back over allowed closing punctuation. + for (; j > 0; j--) { + c = cs.charAt(j - 1); + if (c != '"' && c != '\'' && Character.getType(c) != Character.END_PUNCTUATION) { + break; + } + } + + if (j > 0) { + c = cs.charAt(j - 1); + if (c == '.' || c == '?' || c == '!') { + // Do not capitalize if the word ends with a period but + // also contains a period, in which case it is an abbreviation. + if (c == '.') { + for (int k = j - 2; k >= 0; k--) { + c = cs.charAt(k); + if (c == '.') { + return mode; + } + if (!Character.isLetter(c)) { + break; + } + } + } + return mode | TextUtils.CAP_MODE_SENTENCES; + } + } + return mode; + } } -- cgit v1.2.3-83-g751a From 3d54e1c1eccf58e184c065ebe78f0f467cd04606 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Thu, 13 Sep 2012 14:56:56 +0900 Subject: Simplify a call, and add comments (A2) Since the function has to be modified heavily but does a lot of non-trivial work, add a wealth of comments explaining what it does and why so as to facilitate understanding the changes to come. Bug: 4967874 Change-Id: I6c21aea15f161d807035f279dfb7d1b98b3e9144 --- .../inputmethod/latin/RichInputConnection.java | 3 +- .../com/android/inputmethod/latin/StringUtils.java | 82 +++++++++++++++++----- 2 files changed, 67 insertions(+), 18 deletions(-) (limited to 'java/src/com/android/inputmethod/latin/RichInputConnection.java') diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index efda623e5..2ba274de1 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -204,8 +204,7 @@ public class RichInputConnection { } // This never calls InputConnection#getCapsMode - in fact, it's a static method that // never blocks or initiates IPC. - return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, - mCommittedTextBeforeComposingText.length(), inputType); + return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType); } public CharSequence getTextBeforeCursor(final int i, final int j) { diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index d6509d6a6..4dec7881b 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -208,7 +208,6 @@ public final class StringUtils { * issues). This will change in the future as we simplify the code for our use and fix bugs. * * @param cs The text that should be checked for caps modes. - * @param off Location in the text at which to check. * @param reqModes The modes to be checked: may be any combination of * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and * {@link #CAP_MODE_SENTENCES}. @@ -218,52 +217,93 @@ public final class StringUtils { * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and * {@link #CAP_MODE_SENTENCES}. */ - public static int getCapsMode(CharSequence cs, int off, int reqModes) { - if (off < 0) { - return 0; - } - + public static int getCapsMode(CharSequence cs, int reqModes) { int i; char c; int mode = 0; + // Quick description of what we want to do: + // CAP_MODE_CHARACTERS is always on. + // CAP_MODE_WORDS is on if there is some whitespace before the cursor. + // CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end + // of a sentence just before that. + // We ignore opening parentheses and the like just before the cursor for purposes of + // finding whitespace for WORDS and SENTENCES modes. + // The end of a sentence ends with a period, question mark or exclamation mark. If it's + // a period, it also needs not to be an abbreviation, which means it also needs to either + // be immediately preceded by punctuation, or by a string of only letters with single + // periods interleaved. + + // Step 1 : check for cap mode characters. If it's looked for, it's always on. if ((reqModes & TextUtils.CAP_MODE_CHARACTERS) != 0) { mode |= TextUtils.CAP_MODE_CHARACTERS; } if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) { + // Here we are not looking for words or sentences modes, so since we already evaluated + // mode characters, we can return. return mode; } - // Back over allowed opening punctuation. - for (i = off; i > 0; i--) { + // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes + // opening parentheses, brackets, opening quotes, everything that *opens* a span of + // text in the linguistic sense. In RTL languages, this is still an opening sign, although + // it may look like a right parenthesis for example. We also include double quote and + // single quote since they aren't start punctuation in the unicode sense, but should still + // be skipped for English. TODO: does this depend on the language? + for (i = cs.length(); i > 0; i--) { c = cs.charAt(i - 1); if (c != '"' && c != '\'' && Character.getType(c) != Character.START_PUNCTUATION) { break; } } - // Start of paragraph, with optional whitespace. + // We are now on the character that precedes any starting punctuation, so in the most + // frequent case this will be whitespace or a letter, although it may occasionally be a + // start of line, or some symbol. + + // Step 3 : Search for the start of a paragraph. From the starting point computed in step 2, + // we go back over any space or tab char sitting there. We find the start of a paragraph + // if the first char that's not a space or tab is a start of line (as in, either \n or + // start of text). int j = i; while (j > 0 && ((c = cs.charAt(j - 1)) == ' ' || c == '\t')) { j--; } if (j == 0 || cs.charAt(j - 1) == '\n') { + // Here we know we are at the start of a paragraph, so we turn on word mode. + // Note: I think this is entirely buggy. It will return mode words even if the app + // didn't request it, and it will fail to return sentence mode even if this is actually + // the start of a sentence. As it happens, Latin IME client code considers that mode + // word *implies* mode sentence and tests for non-zeroness, so it happens to work. return mode | TextUtils.CAP_MODE_WORDS; } - - // Or start of word if we are that style. if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) { + // If we don't have to check for mode sentence, then we know all we need to know + // already. Either we have whitespace immediately before index i and we are at the + // start of a word, or we don't and we aren't. But we just went over any whitespace + // just before i and in fact j points before any whitespace, so if i != j that means + // there is such whitespace. In this case, we have mode words. if (i != j) mode |= TextUtils.CAP_MODE_WORDS; return mode; } - - // There must be a space if not the start of paragraph. if (i == j) { + // Finally, if we don't have whitespace before index i, it means neither mode words + // nor mode sentences should be on so we can return right away. return mode; } + // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above, + // we know that mode sentences is being requested. - // Back over allowed closing punctuation. + // Step 4 : Search for sentence mode. for (; j > 0; j--) { + // Here we look to go over any closing punctuation. This is because in dominant variants + // of English, the final period is placed within double quotes and maybe other closing + // punctuation signs. + // TODO: this is wrong for almost everything except American typography rules for + // English. It's wrong for British typography rules for English, it's wrong for French, + // it's wrong for German, it's wrong for Spanish, and possibly everything else. + // (note that American rules and British rules have nothing to do with en_US and en_GB, + // as both rules are used in both countries - it's merely a name for the set of rules) c = cs.charAt(j - 1); if (c != '"' && c != '\'' && Character.getType(c) != Character.END_PUNCTUATION) { break; @@ -273,8 +313,18 @@ public final class StringUtils { if (j > 0) { c = cs.charAt(j - 1); if (c == '.' || c == '?' || c == '!') { - // Do not capitalize if the word ends with a period but - // also contains a period, in which case it is an abbreviation. + // Here we found a marker for sentence end (we consider these to be one of + // either . or ? or ! only). So this is probably the end of a sentence, but if we + // found a period, we still want to check the case where this is a abbreviation + // period rather than a full stop. To do this, we look for a period within a word + // before the period we just found; if any, we take that to mean it was an + // abbreviation. + // A typical example of the above is "In the U.S. ", where the last period is + // not a full stop and we should not capitalize. + // TODO: the rule below is broken. In particular it fails for runs of periods, + // whatever the reason. In the example "in the U.S..", the last period is a full + // stop following the abbreviation period, and we should capitalize but we don't. + // Likewise, "I don't know... " should capitalize, but fails to do so. if (c == '.') { for (int k = j - 2; k >= 0; k--) { c = cs.charAt(k); -- cgit v1.2.3-83-g751a From 252da38fcd1a40b8c308d6754d644064032094f9 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Fri, 14 Sep 2012 16:27:04 +0900 Subject: Take locale into account for caps (A10) Bug: 4967874 Change-Id: Ic7ce7b2de088308fa00865c81246c84c605db1e5 --- .../com/android/inputmethod/latin/LatinIME.java | 2 +- .../inputmethod/latin/RichInputConnection.java | 5 +- .../com/android/inputmethod/latin/StringUtils.java | 34 +++++++----- .../inputmethod/latin/StringUtilsTests.java | 61 +++++++++++++--------- 4 files changed, 59 insertions(+), 43 deletions(-) (limited to 'java/src/com/android/inputmethod/latin/RichInputConnection.java') diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 03de03d25..db8f269eb 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1118,7 +1118,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // Note: getCursorCapsMode() returns the current capitalization mode that is any // combination of CAP_MODE_CHARACTERS, CAP_MODE_WORDS, and CAP_MODE_SENTENCES. 0 means none // of them. - return mConnection.getCursorCapsMode(inputType); + return mConnection.getCursorCapsMode(inputType, mSubtypeSwitcher.getCurrentSubtypeLocale()); } // Factor in auto-caps and manual caps and compute the current caps mode. diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 43b9ba7a9..b85f9dcd7 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -30,6 +30,7 @@ import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.latin.define.ProductionFlag; import com.android.inputmethod.research.ResearchLogger; +import java.util.Locale; import java.util.regex.Pattern; /** @@ -189,7 +190,7 @@ public class RichInputConnection { } } - public int getCursorCapsMode(final int inputType) { + public int getCursorCapsMode(final int inputType, final Locale locale) { mIC = mParent.getCurrentInputConnection(); if (null == mIC) return Constants.TextUtils.CAP_MODE_OFF; if (!TextUtils.isEmpty(mComposingText)) return Constants.TextUtils.CAP_MODE_OFF; @@ -204,7 +205,7 @@ public class RichInputConnection { } // This never calls InputConnection#getCapsMode - in fact, it's a static method that // never blocks or initiates IPC. - return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType); + return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType, locale); } public CharSequence getTextBeforeCursor(final int i, final int j) { diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index 0fc6c32d7..6dc1ea807 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -196,13 +196,14 @@ public final class StringUtils { * @param reqModes The modes to be checked: may be any combination of * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and * {@link TextUtils#CAP_MODE_SENTENCES}. + * @param locale The locale to consider for capitalization rules * * @return Returns the actual capitalization modes that can be in effect * at the current position, which is any combination of * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and * {@link TextUtils#CAP_MODE_SENTENCES}. */ - public static int getCapsMode(final CharSequence cs, final int reqModes) { + public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale) { // Quick description of what we want to do: // CAP_MODE_CHARACTERS is always on. // CAP_MODE_WORDS is on if there is some whitespace before the cursor. @@ -270,19 +271,24 @@ public final class StringUtils { // we know that MODE_SENTENCES is being requested. // Step 4 : Search for MODE_SENTENCES. - for (; j > 0; j--) { - // Here we look to go over any closing punctuation. This is because in dominant variants - // of English, the final period is placed within double quotes and maybe other closing - // punctuation signs. - // TODO: this is wrong for almost everything except American typography rules for - // English. It's wrong for British typography rules for English, it's wrong for French, - // it's wrong for German, it's wrong for Spanish, and possibly everything else. - // (note that American rules and British rules have nothing to do with en_US and en_GB, - // as both rules are used in both countries - it's merely a name for the set of rules) - final char c = cs.charAt(j - 1); - if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE - && Character.getType(c) != Character.END_PUNCTUATION) { - break; + // English is a special case in that "American typography" rules, which are the most common + // in English, state that a sentence terminator immediately following a quotation mark + // should be swapped with it and de-duplicated (included in the quotation mark), + // e.g. <> + // No other language has such a rule as far as I know, instead putting inside the quotation + // mark as the exact thing quoted and handling the surrounding punctuation independently, + // e.g. <> + // Hence, specifically for English, we treat this special case here. + if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) { + for (; j > 0; j--) { + // Here we look to go over any closing punctuation. This is because in dominant + // variants of English, the final period is placed within double quotes and maybe + // other closing punctuation signs. This is generally not true in other languages. + final char c = cs.charAt(j - 1); + if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE + && Character.getType(c) != Character.END_PUNCTUATION) { + break; + } } } diff --git a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java index c3d9c0616..00cca9d3b 100644 --- a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java @@ -19,6 +19,8 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.text.TextUtils; +import java.util.Locale; + public class StringUtilsTests extends AndroidTestCase { public void testContainsInArray() { assertFalse("empty array", StringUtils.containsInArray("key", new String[0])); @@ -90,43 +92,50 @@ public class StringUtilsTests extends AndroidTestCase { StringUtils.removeFromCsvIfExists("key", "key1,key,key3,key,key5")); } - private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask) { + private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask, + final Locale l) { int oneTimeResult = expectedResult & mask; - assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask)); + assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask, l)); } - private void allPathsForCaps(final CharSequence cs, final int expectedResult) { + private void allPathsForCaps(final CharSequence cs, final int expectedResult, final Locale l) { final int c = TextUtils.CAP_MODE_CHARACTERS; final int w = TextUtils.CAP_MODE_WORDS; final int s = TextUtils.CAP_MODE_SENTENCES; - onePathForCaps(cs, expectedResult, c | w | s); - onePathForCaps(cs, expectedResult, w | s); - onePathForCaps(cs, expectedResult, c | s); - onePathForCaps(cs, expectedResult, c | w); - onePathForCaps(cs, expectedResult, c); - onePathForCaps(cs, expectedResult, w); - onePathForCaps(cs, expectedResult, s); + onePathForCaps(cs, expectedResult, c | w | s, l); + onePathForCaps(cs, expectedResult, w | s, l); + onePathForCaps(cs, expectedResult, c | s, l); + onePathForCaps(cs, expectedResult, c | w, l); + onePathForCaps(cs, expectedResult, c, l); + onePathForCaps(cs, expectedResult, w, l); + onePathForCaps(cs, expectedResult, s, l); } public void testGetCapsMode() { final int c = TextUtils.CAP_MODE_CHARACTERS; final int w = TextUtils.CAP_MODE_WORDS; final int s = TextUtils.CAP_MODE_SENTENCES; - allPathsForCaps("", c | w | s); - allPathsForCaps("Word", c); - allPathsForCaps("Word.", c); - allPathsForCaps("Word ", c | w); - allPathsForCaps("Word. ", c | w | s); - allPathsForCaps("Word..", c); - allPathsForCaps("Word.. ", c | w | s); - allPathsForCaps("Word... ", c | w | s); - allPathsForCaps("Word ... ", c | w | s); - allPathsForCaps("Word . ", c | w); - allPathsForCaps("In the U.S ", c | w); - allPathsForCaps("In the U.S. ", c | w); - allPathsForCaps("Some stuff (e.g. ", c | w); - allPathsForCaps("In the U.S.. ", c | w | s); - allPathsForCaps("\"Word.\" ", c | w | s); - allPathsForCaps("\"Word\" ", c | w); + Locale l = Locale.ENGLISH; + allPathsForCaps("", c | w | s, l); + allPathsForCaps("Word", c, l); + allPathsForCaps("Word.", c, l); + allPathsForCaps("Word ", c | w, l); + allPathsForCaps("Word. ", c | w | s, l); + allPathsForCaps("Word..", c, l); + allPathsForCaps("Word.. ", c | w | s, l); + allPathsForCaps("Word... ", c | w | s, l); + allPathsForCaps("Word ... ", c | w | s, l); + allPathsForCaps("Word . ", c | w, l); + allPathsForCaps("In the U.S ", c | w, l); + allPathsForCaps("In the U.S. ", c | w, l); + allPathsForCaps("Some stuff (e.g. ", c | w, l); + allPathsForCaps("In the U.S.. ", c | w | s, l); + allPathsForCaps("\"Word.\" ", c | w | s, l); + allPathsForCaps("\"Word\". ", c | w | s, l); + allPathsForCaps("\"Word\" ", c | w, l); + l = Locale.FRENCH; + allPathsForCaps("\"Word.\" ", c | w, l); + allPathsForCaps("\"Word\". ", c | w | s, l); + allPathsForCaps("\"Word\" ", c | w, l); } } -- cgit v1.2.3-83-g751a