1 files changed, 137 insertions, 3 deletions
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 59ad28fc9..d5ee58a63 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -106,10 +106,19 @@ public final class StringUtils {
         }
     }
 
-    public static String toTitleCase(final String s, final Locale locale) {
+    public static String capitalizeFirstCodePoint(final String s, final Locale locale) {
         if (s.length() <= 1) {
-            // TODO: is this really correct? Shouldn't this be s.toUpperCase()?
-            return s;
+            return s.toUpperCase(locale);
+        }
+        // Please refer to the comment below in
+        // {@link #capitalizeFirstAndDowncaseRest(String,Locale)} as this has the same shortcomings
+        final int cutoff = s.offsetByCodePoints(0, 1);
+        return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff);
+    }
+
+    public static String capitalizeFirstAndDowncaseRest(final String s, final Locale locale) {
+        if (s.length() <= 1) {
+            return s.toUpperCase(locale);
         }
         // TODO: fix the bugs below
         // - This does not work for Greek, because it returns upper case instead of title case.
@@ -213,4 +222,129 @@ public final class StringUtils {
         if (1 == capsCount) return CAPITALIZE_FIRST;
         return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
     }
+
+    public static boolean isIdenticalAfterUpcase(final String text) {
+        final int len = text.length();
+        for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) {
+            final int codePoint = text.codePointAt(i);
+            if (Character.isLetter(codePoint) && !Character.isUpperCase(codePoint)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public static boolean isIdenticalAfterDowncase(final String text) {
+        final int len = text.length();
+        for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) {
+            final int codePoint = text.codePointAt(i);
+            if (Character.isLetter(codePoint) && !Character.isLowerCase(codePoint)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public static boolean isIdenticalAfterCapitalizeEachWord(final String text,
+            final String separators) {
+        boolean needCapsNext = true;
+        final int len = text.length();
+        for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) {
+            final int codePoint = text.codePointAt(i);
+            if (Character.isLetter(codePoint)) {
+                if ((needCapsNext && !Character.isUpperCase(codePoint))
+                        || (!needCapsNext && !Character.isLowerCase(codePoint))) {
+                    return false;
+                }
+            }
+            // We need a capital letter next if this is a separator.
+            needCapsNext = (-1 != separators.indexOf(codePoint));
+        }
+        return true;
+    }
+
+    // TODO: like capitalizeFirst*, this does not work perfectly for Dutch because of the IJ digraph
+    // which should be capitalized together in *some* cases.
+    public static String capitalizeEachWord(final String text, final String separators,
+            final Locale locale) {
+        final StringBuilder builder = new StringBuilder();
+        boolean needCapsNext = true;
+        final int len = text.length();
+        for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) {
+            final String nextChar = text.substring(i, text.offsetByCodePoints(i, 1));
+            if (needCapsNext) {
+                builder.append(nextChar.toUpperCase(locale));
+            } else {
+                builder.append(nextChar.toLowerCase(locale));
+            }
+            // We need a capital letter next if this is a separator.
+            needCapsNext = (-1 != separators.indexOf(nextChar.codePointAt(0)));
+        }
+        return builder.toString();
+    }
+
+    /**
+     * Approximates whether the text before the cursor looks like a URL.
+     *
+     * This is not foolproof, but it should work well in the practice.
+     * Essentially it walks backward from the cursor until it finds something that's not a letter,
+     * digit, or common URL symbol like underscore. If it hasn't found a period yet, then it
+     * does not look like a URL.
+     * If the text:
+     * - starts with www and contains a period
+     * - starts with a slash preceded by either a slash, whitespace, or start-of-string
+     * Then it looks like a URL and we return true. Otherwise, we return false.
+     *
+     * Note: this method is called quite often, and should be fast.
+     *
+     * TODO: This will return that "abc./def" and ".abc/def" look like URLs to keep down the
+     * code complexity, but ideally it should not. It's acceptable for now.
+     */
+    public static boolean lastPartLooksLikeURL(final CharSequence text) {
+        int i = text.length();
+        if (0 == i) return false;
+        int wCount = 0;
+        int slashCount = 0;
+        boolean hasSlash = false;
+        boolean hasPeriod = false;
+        int codePoint = 0;
+        while (i > 0) {
+            codePoint =  Character.codePointBefore(text, i);
+            if (codePoint < Constants.CODE_PERIOD || codePoint > 'z') {
+                // Handwavy heuristic to see if that's a URL character. Anything between period
+                // and z. This includes all lower- and upper-case ascii letters, period,
+                // underscore, arrobase, question mark, equal sign. It excludes spaces, exclamation
+                // marks, double quotes...
+                // Anything that's not a URL-like character causes us to break from here and
+                // evaluate normally.
+                break;
+            }
+            if (Constants.CODE_PERIOD == codePoint) {
+                hasPeriod = true;
+            }
+            if (Constants.CODE_SLASH == codePoint) {
+                hasSlash = true;
+                if (2 == ++slashCount) {
+                    return true;
+                }
+            } else {
+                slashCount = 0;
+            }
+            if ('w' == codePoint) {
+                ++wCount;
+            } else {
+                wCount = 0;
+            }
+            i = Character.offsetByCodePoints(text, i, -1);
+        }
+        // End of the text run.
+        // If it starts with www and includes a period, then it looks like a URL.
+        if (wCount >= 3 && hasPeriod) return true;
+        // If it starts with a slash, and the code point before is whitespace, it looks like an URL.
+        if (1 == slashCount && (0 == i || Character.isWhitespace(codePoint))) return true;
+        // If it has both a period and a slash, it looks like an URL.
+        if (hasPeriod && hasSlash) return true;
+        // Otherwise, it doesn't look like an URL.
+        return false;
+    }
 }