From 1e11c44d1b5f9ddf593c5407cb14c458be0056f2 Mon Sep 17 00:00:00 2001
From: "Tadashi G. Takaoka" <takaoka@google.com>
Date: Wed, 29 Aug 2012 17:26:00 +0900
Subject: Make utility classes final

Change-Id: Ic4f69b4c8da33c9fca50d6829724179310c2f875
---
 java/src/com/android/inputmethod/latin/StringUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 39c59b44c..9c47a38c2 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -21,7 +21,7 @@ import android.text.TextUtils;
 import java.util.ArrayList;
 import java.util.Locale;
 
-public class StringUtils {
+public final class StringUtils {
     private StringUtils() {
         // This utility class is not publicly instantiable.
     }
-- 
cgit v1.2.3-83-g751a


From 9d1c73ffd88cd1bfef3de048b0b3a9a7dfbcfa70 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Mon, 10 Sep 2012 19:27:45 +0900
Subject: Import TextUtils.getCapsMode to fix it internally (A1)

This should have on effect at all on behavior,
except an increase in performance.

Bug: 4967874
Bug: 6950087
Change-Id: Ie2b51efefe84ca767f5dc8e3b80bfef7e1faab3d
---
 .../com/android/inputmethod/latin/LatinIME.java    |  4 +-
 .../inputmethod/latin/RichInputConnection.java     | 15 +++-
 .../com/android/inputmethod/latin/StringUtils.java | 94 ++++++++++++++++++++++
 3 files changed, 110 insertions(+), 3 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 78c65e0c7..d8b1c292b 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -700,6 +700,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
             }
         }
 
+        mConnection.resetCachesUponCursorMove(mLastSelectionStart);
+
         if (isDifferentTextField) {
             mainKeyboardView.closing();
             loadSettings();
@@ -733,8 +735,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
         mainKeyboardView.setGesturePreviewMode(mCurrentSettings.mGesturePreviewTrailEnabled,
                 mCurrentSettings.mGestureFloatingPreviewTextEnabled);
 
-        mConnection.resetCachesUponCursorMove(mLastSelectionStart);
-
         if (TRACE) Debug.startMethodTracing("/data/trace/latinime");
     }
 
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 37e1dbb69..efda623e5 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -192,7 +192,20 @@ public class RichInputConnection {
     public int getCursorCapsMode(final int inputType) {
         mIC = mParent.getCurrentInputConnection();
         if (null == mIC) return Constants.TextUtils.CAP_MODE_OFF;
-        return mIC.getCursorCapsMode(inputType);
+        if (!TextUtils.isEmpty(mComposingText)) return Constants.TextUtils.CAP_MODE_OFF;
+        // TODO: this will generally work, but there may be cases where the buffer contains SOME
+        // information but not enough to determine the caps mode accurately. This may happen after
+        // heavy pressing of delete, for example DEFAULT_TEXT_CACHE_SIZE - 5 times or so.
+        // getCapsMode should be updated to be able to return a "not enough info" result so that
+        // we can get more context only when needed.
+        if (TextUtils.isEmpty(mCommittedTextBeforeComposingText) && 0 != mCurrentCursorPosition) {
+            mCommittedTextBeforeComposingText.append(
+                    getTextBeforeCursor(DEFAULT_TEXT_CACHE_SIZE, 0));
+        }
+        // This never calls InputConnection#getCapsMode - in fact, it's a static method that
+        // never blocks or initiates IPC.
+        return StringUtils.getCapsMode(mCommittedTextBeforeComposingText,
+                mCommittedTextBeforeComposingText.length(), inputType);
     }
 
     public CharSequence getTextBeforeCursor(final int i, final int j) {
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 9c47a38c2..d6509d6a6 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -197,4 +197,98 @@ public final class StringUtils {
         codePoints[dsti] = codePoint;
         return codePoints;
     }
+
+    /**
+     * Determine what caps mode should be in effect at the current offset in
+     * the text. Only the mode bits set in <var>reqModes</var> will be
+     * checked. Note that the caps mode flags here are explicitly defined
+     * to match those in {@link InputType}.
+     *
+     * This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting
+     * issues). This will change in the future as we simplify the code for our use and fix bugs.
+     *
+     * @param cs The text that should be checked for caps modes.
+     * @param off Location in the text at which to check.
+     * @param reqModes The modes to be checked: may be any combination of
+     * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and
+     * {@link #CAP_MODE_SENTENCES}.
+     *
+     * @return Returns the actual capitalization modes that can be in effect
+     * at the current position, which is any combination of
+     * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and
+     * {@link #CAP_MODE_SENTENCES}.
+     */
+    public static int getCapsMode(CharSequence cs, int off, int reqModes) {
+        if (off < 0) {
+            return 0;
+        }
+
+        int i;
+        char c;
+        int mode = 0;
+
+        if ((reqModes & TextUtils.CAP_MODE_CHARACTERS) != 0) {
+            mode |= TextUtils.CAP_MODE_CHARACTERS;
+        }
+        if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
+            return mode;
+        }
+
+        // Back over allowed opening punctuation.
+        for (i = off; i > 0; i--) {
+            c = cs.charAt(i - 1);
+            if (c != '"' && c != '\'' && Character.getType(c) != Character.START_PUNCTUATION) {
+                break;
+            }
+        }
+
+        // Start of paragraph, with optional whitespace.
+        int j = i;
+        while (j > 0 && ((c = cs.charAt(j - 1)) == ' ' || c == '\t')) {
+            j--;
+        }
+        if (j == 0 || cs.charAt(j - 1) == '\n') {
+            return mode | TextUtils.CAP_MODE_WORDS;
+        }
+
+        // Or start of word if we are that style.
+        if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
+            if (i != j) mode |= TextUtils.CAP_MODE_WORDS;
+            return mode;
+        }
+
+        // There must be a space if not the start of paragraph.
+        if (i == j) {
+            return mode;
+        }
+
+        // Back over allowed closing punctuation.
+        for (; j > 0; j--) {
+            c = cs.charAt(j - 1);
+            if (c != '"' && c != '\'' && Character.getType(c) != Character.END_PUNCTUATION) {
+                break;
+            }
+        }
+
+        if (j > 0) {
+            c = cs.charAt(j - 1);
+            if (c == '.' || c == '?' || c == '!') {
+                // Do not capitalize if the word ends with a period but
+                // also contains a period, in which case it is an abbreviation.
+                if (c == '.') {
+                    for (int k = j - 2; k >= 0; k--) {
+                        c = cs.charAt(k);
+                        if (c == '.') {
+                            return mode;
+                        }
+                        if (!Character.isLetter(c)) {
+                            break;
+                        }
+                    }
+                }
+                return mode | TextUtils.CAP_MODE_SENTENCES;
+            }
+        }
+        return mode;
+    }
 }
-- 
cgit v1.2.3-83-g751a


From 3d54e1c1eccf58e184c065ebe78f0f467cd04606 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 13 Sep 2012 14:56:56 +0900
Subject: Simplify a call, and add comments (A2)

Since the function has to be modified heavily but does a lot
of non-trivial work, add a wealth of comments explaining what
it does and why so as to facilitate understanding the changes
to come.

Bug: 4967874
Change-Id: I6c21aea15f161d807035f279dfb7d1b98b3e9144
---
 .../inputmethod/latin/RichInputConnection.java     |  3 +-
 .../com/android/inputmethod/latin/StringUtils.java | 82 +++++++++++++++++-----
 2 files changed, 67 insertions(+), 18 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index efda623e5..2ba274de1 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -204,8 +204,7 @@ public class RichInputConnection {
         }
         // This never calls InputConnection#getCapsMode - in fact, it's a static method that
         // never blocks or initiates IPC.
-        return StringUtils.getCapsMode(mCommittedTextBeforeComposingText,
-                mCommittedTextBeforeComposingText.length(), inputType);
+        return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType);
     }
 
     public CharSequence getTextBeforeCursor(final int i, final int j) {
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index d6509d6a6..4dec7881b 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -208,7 +208,6 @@ public final class StringUtils {
      * issues). This will change in the future as we simplify the code for our use and fix bugs.
      *
      * @param cs The text that should be checked for caps modes.
-     * @param off Location in the text at which to check.
      * @param reqModes The modes to be checked: may be any combination of
      * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and
      * {@link #CAP_MODE_SENTENCES}.
@@ -218,52 +217,93 @@ public final class StringUtils {
      * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and
      * {@link #CAP_MODE_SENTENCES}.
      */
-    public static int getCapsMode(CharSequence cs, int off, int reqModes) {
-        if (off < 0) {
-            return 0;
-        }
-
+    public static int getCapsMode(CharSequence cs, int reqModes) {
         int i;
         char c;
         int mode = 0;
 
+        // Quick description of what we want to do:
+        // CAP_MODE_CHARACTERS is always on.
+        // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
+        // CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end
+        //   of a sentence just before that.
+        // We ignore opening parentheses and the like just before the cursor for purposes of
+        // finding whitespace for WORDS and SENTENCES modes.
+        // The end of a sentence ends with a period, question mark or exclamation mark. If it's
+        // a period, it also needs not to be an abbreviation, which means it also needs to either
+        // be immediately preceded by punctuation, or by a string of only letters with single
+        // periods interleaved.
+
+        // Step 1 : check for cap mode characters. If it's looked for, it's always on.
         if ((reqModes & TextUtils.CAP_MODE_CHARACTERS) != 0) {
             mode |= TextUtils.CAP_MODE_CHARACTERS;
         }
         if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
+            // Here we are not looking for words or sentences modes, so since we already evaluated
+            // mode characters, we can return.
             return mode;
         }
 
-        // Back over allowed opening punctuation.
-        for (i = off; i > 0; i--) {
+        // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
+        // opening parentheses, brackets, opening quotes, everything that *opens* a span of
+        // text in the linguistic sense. In RTL languages, this is still an opening sign, although
+        // it may look like a right parenthesis for example. We also include double quote and
+        // single quote since they aren't start punctuation in the unicode sense, but should still
+        // be skipped for English. TODO: does this depend on the language?
+        for (i = cs.length(); i > 0; i--) {
             c = cs.charAt(i - 1);
             if (c != '"' && c != '\'' && Character.getType(c) != Character.START_PUNCTUATION) {
                 break;
             }
         }
 
-        // Start of paragraph, with optional whitespace.
+        // We are now on the character that precedes any starting punctuation, so in the most
+        // frequent case this will be whitespace or a letter, although it may occasionally be a
+        // start of line, or some symbol.
+
+        // Step 3 : Search for the start of a paragraph. From the starting point computed in step 2,
+        // we go back over any space or tab char sitting there. We find the start of a paragraph
+        // if the first char that's not a space or tab is a start of line (as in, either \n or
+        // start of text).
         int j = i;
         while (j > 0 && ((c = cs.charAt(j - 1)) == ' ' || c == '\t')) {
             j--;
         }
         if (j == 0 || cs.charAt(j - 1) == '\n') {
+            // Here we know we are at the start of a paragraph, so we turn on word mode.
+            // Note: I think this is entirely buggy. It will return mode words even if the app
+            // didn't request it, and it will fail to return sentence mode even if this is actually
+            // the start of a sentence. As it happens, Latin IME client code considers that mode
+            // word *implies* mode sentence and tests for non-zeroness, so it happens to work.
             return mode | TextUtils.CAP_MODE_WORDS;
         }
-
-        // Or start of word if we are that style.
         if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
+            // If we don't have to check for mode sentence, then we know all we need to know
+            // already. Either we have whitespace immediately before index i and we are at the
+            // start of a word, or we don't and we aren't. But we just went over any whitespace
+            // just before i and in fact j points before any whitespace, so if i != j that means
+            // there is such whitespace. In this case, we have mode words.
             if (i != j) mode |= TextUtils.CAP_MODE_WORDS;
             return mode;
         }
-
-        // There must be a space if not the start of paragraph.
         if (i == j) {
+            // Finally, if we don't have whitespace before index i, it means neither mode words
+            // nor mode sentences should be on so we can return right away.
             return mode;
         }
+        // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
+        // we know that mode sentences is being requested.
 
-        // Back over allowed closing punctuation.
+        // Step 4 : Search for sentence mode.
         for (; j > 0; j--) {
+            // Here we look to go over any closing punctuation. This is because in dominant variants
+            // of English, the final period is placed within double quotes and maybe other closing
+            // punctuation signs.
+            // TODO: this is wrong for almost everything except American typography rules for
+            // English. It's wrong for British typography rules for English, it's wrong for French,
+            // it's wrong for German, it's wrong for Spanish, and possibly everything else.
+            // (note that American rules and British rules have nothing to do with en_US and en_GB,
+            // as both rules are used in both countries - it's merely a name for the set of rules)
             c = cs.charAt(j - 1);
             if (c != '"' && c != '\'' && Character.getType(c) != Character.END_PUNCTUATION) {
                 break;
@@ -273,8 +313,18 @@ public final class StringUtils {
         if (j > 0) {
             c = cs.charAt(j - 1);
             if (c == '.' || c == '?' || c == '!') {
-                // Do not capitalize if the word ends with a period but
-                // also contains a period, in which case it is an abbreviation.
+                // Here we found a marker for sentence end (we consider these to be one of
+                // either . or ? or ! only). So this is probably the end of a sentence, but if we
+                // found a period, we still want to check the case where this is a abbreviation
+                // period rather than a full stop. To do this, we look for a period within a word
+                // before the period we just found; if any, we take that to mean it was an
+                // abbreviation.
+                // A typical example of the above is "In the U.S. ", where the last period is
+                // not a full stop and we should not capitalize.
+                // TODO: the rule below is broken. In particular it fails for runs of periods,
+                // whatever the reason. In the example "in the U.S..", the last period is a full
+                // stop following the abbreviation period, and we should capitalize but we don't.
+                // Likewise, "I don't know... " should capitalize, but fails to do so.
                 if (c == '.') {
                     for (int k = j - 2; k >= 0; k--) {
                         c = cs.charAt(k);
-- 
cgit v1.2.3-83-g751a


From 5fbe3764ae074a7bb79dfa78d3ed67d437a49700 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 13 Sep 2012 16:19:21 +0900
Subject: Small simplification (A3)

Change-Id: I2ea9566ce5cb1fbccd33a42836244a899365de6a
---
 java/src/com/android/inputmethod/latin/StringUtils.java | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 4dec7881b..10aa105e4 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -220,7 +220,7 @@ public final class StringUtils {
     public static int getCapsMode(CharSequence cs, int reqModes) {
         int i;
         char c;
-        int mode = 0;
+        int mode = TextUtils.CAP_MODE_CHARACTERS;
 
         // Quick description of what we want to do:
         // CAP_MODE_CHARACTERS is always on.
@@ -235,13 +235,10 @@ public final class StringUtils {
         // periods interleaved.
 
         // Step 1 : check for cap mode characters. If it's looked for, it's always on.
-        if ((reqModes & TextUtils.CAP_MODE_CHARACTERS) != 0) {
-            mode |= TextUtils.CAP_MODE_CHARACTERS;
-        }
         if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
             // Here we are not looking for words or sentences modes, so since we already evaluated
             // mode characters, we can return.
-            return mode;
+            return mode & reqModes;
         }
 
         // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
-- 
cgit v1.2.3-83-g751a


From 6f8dfd92b1b19eb3e3f3bfd2cd837c6b70b50c33 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 13 Sep 2012 17:31:29 +0900
Subject: Simplify the code some more (A4)

Change-Id: I1b1763ceefda6357dc698195928d0f16bc425830
---
 .../com/android/inputmethod/latin/StringUtils.java | 98 ++++++++++------------
 1 file changed, 45 insertions(+), 53 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 10aa105e4..1dfec7ee0 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -220,8 +220,6 @@ public final class StringUtils {
     public static int getCapsMode(CharSequence cs, int reqModes) {
         int i;
         char c;
-        int mode = TextUtils.CAP_MODE_CHARACTERS;
-
         // Quick description of what we want to do:
         // CAP_MODE_CHARACTERS is always on.
         // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
@@ -234,11 +232,11 @@ public final class StringUtils {
         // be immediately preceded by punctuation, or by a string of only letters with single
         // periods interleaved.
 
-        // Step 1 : check for cap mode characters. If it's looked for, it's always on.
+        // Step 1 : check for cap MODE_CHARACTERS. If it's looked for, it's always on.
         if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
-            // Here we are not looking for words or sentences modes, so since we already evaluated
-            // mode characters, we can return.
-            return mode & reqModes;
+            // Here we are not looking for MODE_WORDS or MODE_SENTENCES, so since we already
+            // evaluated MODE_CHARACTERS, we can return.
+            return TextUtils.CAP_MODE_CHARACTERS & reqModes;
         }
 
         // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
@@ -263,35 +261,30 @@ public final class StringUtils {
         // if the first char that's not a space or tab is a start of line (as in, either \n or
         // start of text).
         int j = i;
-        while (j > 0 && ((c = cs.charAt(j - 1)) == ' ' || c == '\t')) {
+        while (j > 0 && Character.isWhitespace(cs.charAt(j - 1))) {
             j--;
         }
-        if (j == 0 || cs.charAt(j - 1) == '\n') {
-            // Here we know we are at the start of a paragraph, so we turn on word mode.
-            // Note: I think this is entirely buggy. It will return mode words even if the app
-            // didn't request it, and it will fail to return sentence mode even if this is actually
-            // the start of a sentence. As it happens, Latin IME client code considers that mode
-            // word *implies* mode sentence and tests for non-zeroness, so it happens to work.
-            return mode | TextUtils.CAP_MODE_WORDS;
-        }
-        if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
-            // If we don't have to check for mode sentence, then we know all we need to know
-            // already. Either we have whitespace immediately before index i and we are at the
-            // start of a word, or we don't and we aren't. But we just went over any whitespace
-            // just before i and in fact j points before any whitespace, so if i != j that means
-            // there is such whitespace. In this case, we have mode words.
-            if (i != j) mode |= TextUtils.CAP_MODE_WORDS;
-            return mode;
+        if (j == 0) {
+            // There is only whitespace between the start of the text and the cursor. Both
+            // MODE_WORDS and MODE_SENTENCES should be active.
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
+                    | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         }
         if (i == j) {
-            // Finally, if we don't have whitespace before index i, it means neither mode words
+            // If we don't have whitespace before index i, it means neither MODE_WORDS
             // nor mode sentences should be on so we can return right away.
-            return mode;
+            return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+        }
+        if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
+            // Here we know we have whitespace before the cursor (if not, we returned in the above
+            // if i == j clause), so we need MODE_WORDS to be on. And we don't need to evaluate
+            // MODE_SENTENCES so we can return right away.
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
         }
         // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
-        // we know that mode sentences is being requested.
+        // we know that MODE_SENTENCES is being requested.
 
-        // Step 4 : Search for sentence mode.
+        // Step 4 : Search for MODE_SENTENCES.
         for (; j > 0; j--) {
             // Here we look to go over any closing punctuation. This is because in dominant variants
             // of English, the final period is placed within double quotes and maybe other closing
@@ -307,35 +300,34 @@ public final class StringUtils {
             }
         }
 
-        if (j > 0) {
-            c = cs.charAt(j - 1);
-            if (c == '.' || c == '?' || c == '!') {
-                // Here we found a marker for sentence end (we consider these to be one of
-                // either . or ? or ! only). So this is probably the end of a sentence, but if we
-                // found a period, we still want to check the case where this is a abbreviation
-                // period rather than a full stop. To do this, we look for a period within a word
-                // before the period we just found; if any, we take that to mean it was an
-                // abbreviation.
-                // A typical example of the above is "In the U.S. ", where the last period is
-                // not a full stop and we should not capitalize.
-                // TODO: the rule below is broken. In particular it fails for runs of periods,
-                // whatever the reason. In the example "in the U.S..", the last period is a full
-                // stop following the abbreviation period, and we should capitalize but we don't.
-                // Likewise, "I don't know... " should capitalize, but fails to do so.
-                if (c == '.') {
-                    for (int k = j - 2; k >= 0; k--) {
-                        c = cs.charAt(k);
-                        if (c == '.') {
-                            return mode;
-                        }
-                        if (!Character.isLetter(c)) {
-                            break;
-                        }
+        if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+        c = cs.charAt(j - 1);
+        if (c == '.' || c == '?' || c == '!') {
+            // Here we found a marker for sentence end (we consider these to be one of
+            // either . or ? or ! only). So this is probably the end of a sentence, but if we
+            // found a period, we still want to check the case where this is a abbreviation
+            // period rather than a full stop. To do this, we look for a period within a word
+            // before the period we just found; if any, we take that to mean it was an
+            // abbreviation.
+            // A typical example of the above is "In the U.S. ", where the last period is
+            // not a full stop and we should not capitalize.
+            // TODO: the rule below is broken. In particular it fails for runs of periods,
+            // whatever the reason. In the example "in the U.S..", the last period is a full
+            // stop following the abbreviation period, and we should capitalize but we don't.
+            // Likewise, "I don't know... " should capitalize, but fails to do so.
+            if (c == '.') {
+                for (int k = j - 2; k >= 0; k--) {
+                    c = cs.charAt(k);
+                    if (c == '.') {
+                        return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+                    }
+                    if (!Character.isLetter(c)) {
+                        break;
                     }
                 }
-                return mode | TextUtils.CAP_MODE_SENTENCES;
             }
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         }
-        return mode;
+        return TextUtils.CAP_MODE_CHARACTERS & reqModes;
     }
 }
-- 
cgit v1.2.3-83-g751a


From e94c2766909df750cc4937a9fd3275f1471ec96e Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 13 Sep 2012 18:33:45 +0900
Subject: Simplification again (A5)

Change-Id: Id3ba200e06244e6d13cf57d346001d61f85ef1a2
---
 java/src/com/android/inputmethod/latin/StringUtils.java | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 1dfec7ee0..e38983fbd 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -218,8 +218,6 @@ public final class StringUtils {
      * {@link #CAP_MODE_SENTENCES}.
      */
     public static int getCapsMode(CharSequence cs, int reqModes) {
-        int i;
-        char c;
         // Quick description of what we want to do:
         // CAP_MODE_CHARACTERS is always on.
         // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
@@ -245,8 +243,9 @@ public final class StringUtils {
         // it may look like a right parenthesis for example. We also include double quote and
         // single quote since they aren't start punctuation in the unicode sense, but should still
         // be skipped for English. TODO: does this depend on the language?
+        int i;
         for (i = cs.length(); i > 0; i--) {
-            c = cs.charAt(i - 1);
+            final char c = cs.charAt(i - 1);
             if (c != '"' && c != '\'' && Character.getType(c) != Character.START_PUNCTUATION) {
                 break;
             }
@@ -294,14 +293,14 @@ public final class StringUtils {
             // it's wrong for German, it's wrong for Spanish, and possibly everything else.
             // (note that American rules and British rules have nothing to do with en_US and en_GB,
             // as both rules are used in both countries - it's merely a name for the set of rules)
-            c = cs.charAt(j - 1);
+            final char c = cs.charAt(j - 1);
             if (c != '"' && c != '\'' && Character.getType(c) != Character.END_PUNCTUATION) {
                 break;
             }
         }
 
         if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
-        c = cs.charAt(j - 1);
+        char c = cs.charAt(j - 1);
         if (c == '.' || c == '?' || c == '!') {
             // Here we found a marker for sentence end (we consider these to be one of
             // either . or ? or ! only). So this is probably the end of a sentence, but if we
-- 
cgit v1.2.3-83-g751a


From 35dc8cf223f1e9c8939092139f7acba5c06a1ded Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 13 Sep 2012 19:07:15 +0900
Subject: Replace immediate values with named constants for characters (A6)

Change-Id: I6248f9f3ff99003ab62be22eac673df0f7f5c015
---
 java/src/com/android/inputmethod/keyboard/Keyboard.java |  2 ++
 java/src/com/android/inputmethod/latin/StringUtils.java | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/keyboard/Keyboard.java b/java/src/com/android/inputmethod/keyboard/Keyboard.java
index 261d1eba7..b7c7f415d 100644
--- a/java/src/com/android/inputmethod/keyboard/Keyboard.java
+++ b/java/src/com/android/inputmethod/keyboard/Keyboard.java
@@ -57,6 +57,8 @@ public class Keyboard {
     public static final int CODE_DASH = '-';
     public static final int CODE_SINGLE_QUOTE = '\'';
     public static final int CODE_DOUBLE_QUOTE = '"';
+    public static final int CODE_QUESTION_MARK = '?';
+    public static final int CODE_EXCLAMATION_MARK = '!';
     // TODO: Check how this should work for right-to-left languages. It seems to stand
     // that for rtl languages, a closing parenthesis is a left parenthesis. Is this
     // managed by the font? Or is it a different char?
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index e38983fbd..8696a5caf 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -18,6 +18,8 @@ package com.android.inputmethod.latin;
 
 import android.text.TextUtils;
 
+import com.android.inputmethod.keyboard.Keyboard; // For character constants
+
 import java.util.ArrayList;
 import java.util.Locale;
 
@@ -246,7 +248,8 @@ public final class StringUtils {
         int i;
         for (i = cs.length(); i > 0; i--) {
             final char c = cs.charAt(i - 1);
-            if (c != '"' && c != '\'' && Character.getType(c) != Character.START_PUNCTUATION) {
+            if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE
+                    && Character.getType(c) != Character.START_PUNCTUATION) {
                 break;
             }
         }
@@ -294,14 +297,16 @@ public final class StringUtils {
             // (note that American rules and British rules have nothing to do with en_US and en_GB,
             // as both rules are used in both countries - it's merely a name for the set of rules)
             final char c = cs.charAt(j - 1);
-            if (c != '"' && c != '\'' && Character.getType(c) != Character.END_PUNCTUATION) {
+            if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE
+                    && Character.getType(c) != Character.END_PUNCTUATION) {
                 break;
             }
         }
 
         if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
         char c = cs.charAt(j - 1);
-        if (c == '.' || c == '?' || c == '!') {
+        if (c == Keyboard.CODE_PERIOD || c == Keyboard.CODE_QUESTION_MARK
+                || c == Keyboard.CODE_EXCLAMATION_MARK) {
             // Here we found a marker for sentence end (we consider these to be one of
             // either . or ? or ! only). So this is probably the end of a sentence, but if we
             // found a period, we still want to check the case where this is a abbreviation
@@ -314,10 +319,10 @@ public final class StringUtils {
             // whatever the reason. In the example "in the U.S..", the last period is a full
             // stop following the abbreviation period, and we should capitalize but we don't.
             // Likewise, "I don't know... " should capitalize, but fails to do so.
-            if (c == '.') {
+            if (c == Keyboard.CODE_PERIOD) {
                 for (int k = j - 2; k >= 0; k--) {
                     c = cs.charAt(k);
-                    if (c == '.') {
+                    if (c == Keyboard.CODE_PERIOD) {
                         return TextUtils.CAP_MODE_CHARACTERS & reqModes;
                     }
                     if (!Character.isLetter(c)) {
-- 
cgit v1.2.3-83-g751a


From 2c0d91628733ab502a47f6a60411b1f7e3d0b9a2 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 13 Sep 2012 20:09:52 +0900
Subject: Fix abbreviations processing for English (A7)

It's still incorrect for non-English languages.

Bug: 4967874
Change-Id: I3d4379265f72e1c0435eba90ab878a80bbcf55ce
---
 .../com/android/inputmethod/latin/StringUtils.java | 107 ++++++++++++++++-----
 .../inputmethod/latin/StringUtilsTests.java        |  41 ++++++++
 2 files changed, 122 insertions(+), 26 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 8696a5caf..b4f3b4bc9 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -304,34 +304,89 @@ public final class StringUtils {
         }
 
         if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
-        char c = cs.charAt(j - 1);
-        if (c == Keyboard.CODE_PERIOD || c == Keyboard.CODE_QUESTION_MARK
-                || c == Keyboard.CODE_EXCLAMATION_MARK) {
-            // Here we found a marker for sentence end (we consider these to be one of
-            // either . or ? or ! only). So this is probably the end of a sentence, but if we
-            // found a period, we still want to check the case where this is a abbreviation
-            // period rather than a full stop. To do this, we look for a period within a word
-            // before the period we just found; if any, we take that to mean it was an
-            // abbreviation.
-            // A typical example of the above is "In the U.S. ", where the last period is
-            // not a full stop and we should not capitalize.
-            // TODO: the rule below is broken. In particular it fails for runs of periods,
-            // whatever the reason. In the example "in the U.S..", the last period is a full
-            // stop following the abbreviation period, and we should capitalize but we don't.
-            // Likewise, "I don't know... " should capitalize, but fails to do so.
-            if (c == Keyboard.CODE_PERIOD) {
-                for (int k = j - 2; k >= 0; k--) {
-                    c = cs.charAt(k);
-                    if (c == Keyboard.CODE_PERIOD) {
-                        return TextUtils.CAP_MODE_CHARACTERS & reqModes;
-                    }
-                    if (!Character.isLetter(c)) {
-                        break;
-                    }
+        char c = cs.charAt(--j);
+
+        // We found the next interesting chunk of text ; next we need to determine if it's the
+        // end of a sentence. If we have a question mark or an exclamation mark, it's the end of
+        // a sentence. If it's neither, the only remaining case is the period so we get the opposite
+        // case out of the way.
+        if (c == Keyboard.CODE_QUESTION_MARK || c == Keyboard.CODE_EXCLAMATION_MARK) {
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
+        }
+        if (c != Keyboard.CODE_PERIOD || j <= 0) {
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
+        }
+
+        // We found out that we have a period. We need to determine if this is a full stop or
+        // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
+        // looks like (\w\.){2,}
+        // To find out, we will have a simple state machine with the following states :
+        // START, WORD, PERIOD, ABBREVIATION
+        // On START : (just before the first period)
+        //           letter => WORD
+        //           whitespace => end with no caps (it was a stand-alone period)
+        //           otherwise => end with caps (several periods/symbols in a row)
+        // On WORD : (within the word just before the first period)
+        //           letter => WORD
+        //           period => PERIOD
+        //           otherwise => end with caps (it was a word with a full stop at the end)
+        // On PERIOD : (period within a potential abbreviation)
+        //           letter => LETTER
+        //           otherwise => end with caps (it was not an abbreviation)
+        // On LETTER : (letter within a potential abbreviation)
+        //           letter => LETTER
+        //           period => PERIOD
+        //           otherwise => end with no caps (it was an abbreviation)
+        // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
+        // should capitalize.
+
+        final int START = 0;
+        final int WORD = 1;
+        final int PERIOD = 2;
+        final int LETTER = 3;
+        final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
+                | TextUtils.CAP_MODE_SENTENCES) & reqModes;
+        final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
+        int state = START;
+        while (j > 0) {
+            c = cs.charAt(--j);
+            switch (state) {
+            case START:
+                if (Character.isLetter(c)) {
+                    state = WORD;
+                } else if (Character.isWhitespace(c)) {
+                    return noCaps;
+                } else {
+                    return caps;
+                }
+                break;
+            case WORD:
+                if (Character.isLetter(c)) {
+                    state = WORD;
+                } else if (c == Keyboard.CODE_PERIOD) {
+                    state = PERIOD;
+                } else {
+                    return caps;
+                }
+                break;
+            case PERIOD:
+                if (Character.isLetter(c)) {
+                    state = LETTER;
+                } else {
+                    return caps;
+                }
+                break;
+            case LETTER:
+                if (Character.isLetter(c)) {
+                    state = LETTER;
+                } else if (c == Keyboard.CODE_PERIOD) {
+                    state = PERIOD;
+                } else {
+                    return noCaps;
                 }
             }
-            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
         }
-        return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+        // Here we arrived at the start of the line. This should behave exactly like whitespace.
+        return (START == state || LETTER == state) ? noCaps : caps;
     }
 }
diff --git a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
index 5db06ef51..0cef28c74 100644
--- a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
@@ -17,6 +17,7 @@
 package com.android.inputmethod.latin;
 
 import android.test.AndroidTestCase;
+import android.text.TextUtils;
 
 public class StringUtilsTests extends AndroidTestCase {
     public void testContainsInArray() {
@@ -99,4 +100,44 @@ public class StringUtilsTests extends AndroidTestCase {
         assertFalse("lower-case string", StringUtils.hasUpperCase("string"));
         assertFalse("lower-case string with non-letters", StringUtils.hasUpperCase("he's"));
     }
+
+    private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask) {
+        int oneTimeResult = expectedResult & mask;
+        assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask));
+    }
+
+    private void allPathsForCaps(final CharSequence cs, final int expectedResult) {
+        final int c = TextUtils.CAP_MODE_CHARACTERS;
+        final int w = TextUtils.CAP_MODE_WORDS;
+        final int s = TextUtils.CAP_MODE_SENTENCES;
+        onePathForCaps(cs, expectedResult, c | w | s);
+        onePathForCaps(cs, expectedResult, w | s);
+        onePathForCaps(cs, expectedResult, c | s);
+        onePathForCaps(cs, expectedResult, c | w);
+        onePathForCaps(cs, expectedResult, c);
+        onePathForCaps(cs, expectedResult, w);
+        onePathForCaps(cs, expectedResult, s);
+    }
+
+    public void testGetCapsMode() {
+        final int c = TextUtils.CAP_MODE_CHARACTERS;
+        final int w = TextUtils.CAP_MODE_WORDS;
+        final int s = TextUtils.CAP_MODE_SENTENCES;
+        allPathsForCaps("", c | w | s);
+        allPathsForCaps("Word", c);
+        allPathsForCaps("Word.", c);
+        allPathsForCaps("Word ", c | w);
+        allPathsForCaps("Word. ", c | w | s);
+        allPathsForCaps("Word..", c);
+        allPathsForCaps("Word.. ", c | w | s);
+        allPathsForCaps("Word... ", c | w | s);
+        allPathsForCaps("Word ... ", c | w | s);
+        allPathsForCaps("Word . ", c | w);
+        allPathsForCaps("In the U.S ", c | w);
+        allPathsForCaps("In the U.S. ", c | w);
+        allPathsForCaps("Some stuff (e.g. ", c | w);
+        allPathsForCaps("In the U.S.. ", c | w | s);
+        allPathsForCaps("\"Word.\" ", c | w | s);
+        allPathsForCaps("\"Word\" ", c | w);
+    }
 }
-- 
cgit v1.2.3-83-g751a


From 50e61b86287fbb4cf76c26c22ac05fe07a659fe2 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Fri, 14 Sep 2012 15:15:42 +0900
Subject: Remove an unused function (A8)

Change-Id: I0578068946d32192c4faf14b3e86624b7301c7d9
---
 java/src/com/android/inputmethod/latin/StringUtils.java | 17 -----------------
 .../com/android/inputmethod/latin/StringUtilsTests.java | 11 -----------
 2 files changed, 28 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index b4f3b4bc9..52fda044a 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -124,23 +124,6 @@ public final class StringUtils {
         return true;
     }
 
-    /**
-     * Returns true if cs contains any upper case characters.
-     *
-     * @param cs the CharSequence to check
-     * @return {@code true} if cs contains any upper case characters, {@code false} otherwise.
-     */
-    public static boolean hasUpperCase(final CharSequence cs) {
-        final int length = cs.length();
-        for (int i = 0, cp = 0; i < length; i += Character.charCount(cp)) {
-            cp = Character.codePointAt(cs, i);
-            if (Character.isUpperCase(cp)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
     /**
      * Remove duplicates from an array of strings.
      *
diff --git a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
index 0cef28c74..c3d9c0616 100644
--- a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
@@ -90,17 +90,6 @@ public class StringUtilsTests extends AndroidTestCase {
                 StringUtils.removeFromCsvIfExists("key", "key1,key,key3,key,key5"));
     }
 
-    public void testHasUpperCase() {
-        assertTrue("single upper-case string", StringUtils.hasUpperCase("String"));
-        assertTrue("multi upper-case string", StringUtils.hasUpperCase("stRInG"));
-        assertTrue("all upper-case string", StringUtils.hasUpperCase("STRING"));
-        assertTrue("upper-case string with non-letters", StringUtils.hasUpperCase("He's"));
-
-        assertFalse("empty string", StringUtils.hasUpperCase(""));
-        assertFalse("lower-case string", StringUtils.hasUpperCase("string"));
-        assertFalse("lower-case string with non-letters", StringUtils.hasUpperCase("he's"));
-    }
-
     private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask) {
         int oneTimeResult = expectedResult & mask;
         assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask));
-- 
cgit v1.2.3-83-g751a


From 6e65ff80012048d7b85f16744f9c8e1c5ed1516d Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Fri, 14 Sep 2012 15:20:52 +0900
Subject: Fix comment warnings (A9)

Thanks Eclipse

Change-Id: Ie2b83f1ec7ab38a76155f8c264e3944685ae934d
---
 java/src/com/android/inputmethod/latin/StringUtils.java | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 52fda044a..0fc6c32d7 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -194,15 +194,15 @@ public final class StringUtils {
      *
      * @param cs The text that should be checked for caps modes.
      * @param reqModes The modes to be checked: may be any combination of
-     * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and
-     * {@link #CAP_MODE_SENTENCES}.
+     * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
+     * {@link TextUtils#CAP_MODE_SENTENCES}.
      *
      * @return Returns the actual capitalization modes that can be in effect
      * at the current position, which is any combination of
-     * {@link #CAP_MODE_CHARACTERS}, {@link #CAP_MODE_WORDS}, and
-     * {@link #CAP_MODE_SENTENCES}.
+     * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
+     * {@link TextUtils#CAP_MODE_SENTENCES}.
      */
-    public static int getCapsMode(CharSequence cs, int reqModes) {
+    public static int getCapsMode(final CharSequence cs, final int reqModes) {
         // Quick description of what we want to do:
         // CAP_MODE_CHARACTERS is always on.
         // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
-- 
cgit v1.2.3-83-g751a


From 252da38fcd1a40b8c308d6754d644064032094f9 Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Fri, 14 Sep 2012 16:27:04 +0900
Subject: Take locale into account for caps (A10)

Bug: 4967874
Change-Id: Ic7ce7b2de088308fa00865c81246c84c605db1e5
---
 .../com/android/inputmethod/latin/LatinIME.java    |  2 +-
 .../inputmethod/latin/RichInputConnection.java     |  5 +-
 .../com/android/inputmethod/latin/StringUtils.java | 34 +++++++-----
 .../inputmethod/latin/StringUtilsTests.java        | 61 +++++++++++++---------
 4 files changed, 59 insertions(+), 43 deletions(-)

(limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')

diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 03de03d25..db8f269eb 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1118,7 +1118,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
         // Note: getCursorCapsMode() returns the current capitalization mode that is any
         // combination of CAP_MODE_CHARACTERS, CAP_MODE_WORDS, and CAP_MODE_SENTENCES. 0 means none
         // of them.
-        return mConnection.getCursorCapsMode(inputType);
+        return mConnection.getCursorCapsMode(inputType, mSubtypeSwitcher.getCurrentSubtypeLocale());
     }
 
     // Factor in auto-caps and manual caps and compute the current caps mode.
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 43b9ba7a9..b85f9dcd7 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -30,6 +30,7 @@ import com.android.inputmethod.keyboard.Keyboard;
 import com.android.inputmethod.latin.define.ProductionFlag;
 import com.android.inputmethod.research.ResearchLogger;
 
+import java.util.Locale;
 import java.util.regex.Pattern;
 
 /**
@@ -189,7 +190,7 @@ public class RichInputConnection {
         }
     }
 
-    public int getCursorCapsMode(final int inputType) {
+    public int getCursorCapsMode(final int inputType, final Locale locale) {
         mIC = mParent.getCurrentInputConnection();
         if (null == mIC) return Constants.TextUtils.CAP_MODE_OFF;
         if (!TextUtils.isEmpty(mComposingText)) return Constants.TextUtils.CAP_MODE_OFF;
@@ -204,7 +205,7 @@ public class RichInputConnection {
         }
         // This never calls InputConnection#getCapsMode - in fact, it's a static method that
         // never blocks or initiates IPC.
-        return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType);
+        return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType, locale);
     }
 
     public CharSequence getTextBeforeCursor(final int i, final int j) {
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 0fc6c32d7..6dc1ea807 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -196,13 +196,14 @@ public final class StringUtils {
      * @param reqModes The modes to be checked: may be any combination of
      * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
      * {@link TextUtils#CAP_MODE_SENTENCES}.
+     * @param locale The locale to consider for capitalization rules
      *
      * @return Returns the actual capitalization modes that can be in effect
      * at the current position, which is any combination of
      * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
      * {@link TextUtils#CAP_MODE_SENTENCES}.
      */
-    public static int getCapsMode(final CharSequence cs, final int reqModes) {
+    public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale) {
         // Quick description of what we want to do:
         // CAP_MODE_CHARACTERS is always on.
         // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
@@ -270,19 +271,24 @@ public final class StringUtils {
         // we know that MODE_SENTENCES is being requested.
 
         // Step 4 : Search for MODE_SENTENCES.
-        for (; j > 0; j--) {
-            // Here we look to go over any closing punctuation. This is because in dominant variants
-            // of English, the final period is placed within double quotes and maybe other closing
-            // punctuation signs.
-            // TODO: this is wrong for almost everything except American typography rules for
-            // English. It's wrong for British typography rules for English, it's wrong for French,
-            // it's wrong for German, it's wrong for Spanish, and possibly everything else.
-            // (note that American rules and British rules have nothing to do with en_US and en_GB,
-            // as both rules are used in both countries - it's merely a name for the set of rules)
-            final char c = cs.charAt(j - 1);
-            if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE
-                    && Character.getType(c) != Character.END_PUNCTUATION) {
-                break;
+        // English is a special case in that "American typography" rules, which are the most common
+        // in English, state that a sentence terminator immediately following a quotation mark
+        // should be swapped with it and de-duplicated (included in the quotation mark),
+        // e.g. <<Did he say, "let's go home?">>
+        // No other language has such a rule as far as I know, instead putting inside the quotation
+        // mark as the exact thing quoted and handling the surrounding punctuation independently,
+        // e.g. <<Did he say, "let's go home"?>>
+        // Hence, specifically for English, we treat this special case here.
+        if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) {
+            for (; j > 0; j--) {
+                // Here we look to go over any closing punctuation. This is because in dominant
+                // variants of English, the final period is placed within double quotes and maybe
+                // other closing punctuation signs. This is generally not true in other languages.
+                final char c = cs.charAt(j - 1);
+                if (c != Keyboard.CODE_DOUBLE_QUOTE && c != Keyboard.CODE_SINGLE_QUOTE
+                        && Character.getType(c) != Character.END_PUNCTUATION) {
+                    break;
+                }
             }
         }
 
diff --git a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
index c3d9c0616..00cca9d3b 100644
--- a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
@@ -19,6 +19,8 @@ package com.android.inputmethod.latin;
 import android.test.AndroidTestCase;
 import android.text.TextUtils;
 
+import java.util.Locale;
+
 public class StringUtilsTests extends AndroidTestCase {
     public void testContainsInArray() {
         assertFalse("empty array", StringUtils.containsInArray("key", new String[0]));
@@ -90,43 +92,50 @@ public class StringUtilsTests extends AndroidTestCase {
                 StringUtils.removeFromCsvIfExists("key", "key1,key,key3,key,key5"));
     }
 
-    private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask) {
+    private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask,
+            final Locale l) {
         int oneTimeResult = expectedResult & mask;
-        assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask));
+        assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask, l));
     }
 
-    private void allPathsForCaps(final CharSequence cs, final int expectedResult) {
+    private void allPathsForCaps(final CharSequence cs, final int expectedResult, final Locale l) {
         final int c = TextUtils.CAP_MODE_CHARACTERS;
         final int w = TextUtils.CAP_MODE_WORDS;
         final int s = TextUtils.CAP_MODE_SENTENCES;
-        onePathForCaps(cs, expectedResult, c | w | s);
-        onePathForCaps(cs, expectedResult, w | s);
-        onePathForCaps(cs, expectedResult, c | s);
-        onePathForCaps(cs, expectedResult, c | w);
-        onePathForCaps(cs, expectedResult, c);
-        onePathForCaps(cs, expectedResult, w);
-        onePathForCaps(cs, expectedResult, s);
+        onePathForCaps(cs, expectedResult, c | w | s, l);
+        onePathForCaps(cs, expectedResult, w | s, l);
+        onePathForCaps(cs, expectedResult, c | s, l);
+        onePathForCaps(cs, expectedResult, c | w, l);
+        onePathForCaps(cs, expectedResult, c, l);
+        onePathForCaps(cs, expectedResult, w, l);
+        onePathForCaps(cs, expectedResult, s, l);
     }
 
     public void testGetCapsMode() {
         final int c = TextUtils.CAP_MODE_CHARACTERS;
         final int w = TextUtils.CAP_MODE_WORDS;
         final int s = TextUtils.CAP_MODE_SENTENCES;
-        allPathsForCaps("", c | w | s);
-        allPathsForCaps("Word", c);
-        allPathsForCaps("Word.", c);
-        allPathsForCaps("Word ", c | w);
-        allPathsForCaps("Word. ", c | w | s);
-        allPathsForCaps("Word..", c);
-        allPathsForCaps("Word.. ", c | w | s);
-        allPathsForCaps("Word... ", c | w | s);
-        allPathsForCaps("Word ... ", c | w | s);
-        allPathsForCaps("Word . ", c | w);
-        allPathsForCaps("In the U.S ", c | w);
-        allPathsForCaps("In the U.S. ", c | w);
-        allPathsForCaps("Some stuff (e.g. ", c | w);
-        allPathsForCaps("In the U.S.. ", c | w | s);
-        allPathsForCaps("\"Word.\" ", c | w | s);
-        allPathsForCaps("\"Word\" ", c | w);
+        Locale l = Locale.ENGLISH;
+        allPathsForCaps("", c | w | s, l);
+        allPathsForCaps("Word", c, l);
+        allPathsForCaps("Word.", c, l);
+        allPathsForCaps("Word ", c | w, l);
+        allPathsForCaps("Word. ", c | w | s, l);
+        allPathsForCaps("Word..", c, l);
+        allPathsForCaps("Word.. ", c | w | s, l);
+        allPathsForCaps("Word... ", c | w | s, l);
+        allPathsForCaps("Word ... ", c | w | s, l);
+        allPathsForCaps("Word . ", c | w, l);
+        allPathsForCaps("In the U.S ", c | w, l);
+        allPathsForCaps("In the U.S. ", c | w, l);
+        allPathsForCaps("Some stuff (e.g. ", c | w, l);
+        allPathsForCaps("In the U.S.. ", c | w | s, l);
+        allPathsForCaps("\"Word.\" ", c | w | s, l);
+        allPathsForCaps("\"Word\". ", c | w | s, l);
+        allPathsForCaps("\"Word\" ", c | w, l);
+        l = Locale.FRENCH;
+        allPathsForCaps("\"Word.\" ", c | w, l);
+        allPathsForCaps("\"Word\". ", c | w | s, l);
+        allPathsForCaps("\"Word\" ", c | w, l);
     }
 }
-- 
cgit v1.2.3-83-g751a