aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/StringUtils.java
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2013-04-15 17:33:48 +0900
committerJean Chalard <jchalard@google.com>2013-04-16 18:03:49 +0900
commit73ec85b8ad3102ce1c7e6013be73afe83475e589 (patch)
tree7d1545c756bf5ab4cd1f6a2e14acc11645808a58 /java/src/com/android/inputmethod/latin/StringUtils.java
parent252412d7eb4573f91588b06b0fe49ef9f0ac38ac (diff)
downloadlatinime-73ec85b8ad3102ce1c7e6013be73afe83475e589.tar.gz
latinime-73ec85b8ad3102ce1c7e6013be73afe83475e589.tar.xz
latinime-73ec85b8ad3102ce1c7e6013be73afe83475e589.zip
Don't insert automatic spaces when text looks like a URL
This is about as ad-hoc as it gets, but then again, what we want is probably as ad-hoc as it gets. All URL boxes I know of double as search bars, and not adding automatic spaces there sucks (e.g. in Chrome URL bar). And in other boxes actually you don't want to add a space if it looks like a URL. QSB isn't even a search box, and it behaves like this. So I think this is actually the right answer to the problem. Bug: 7062925 Change-Id: Ib09472b34644fd5bf2dc84bb97cedeeba28bcd02
Diffstat (limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')
-rw-r--r--java/src/com/android/inputmethod/latin/StringUtils.java65
1 files changed, 65 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index 7f1e7c619..d5ee58a63 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -282,4 +282,69 @@ public final class StringUtils {
}
return builder.toString();
}
+
+ /**
+ * Approximates whether the text before the cursor looks like a URL.
+ *
+ * This is not foolproof, but it should work well in the practice.
+ * Essentially it walks backward from the cursor until it finds something that's not a letter,
+ * digit, or common URL symbol like underscore. If it hasn't found a period yet, then it
+ * does not look like a URL.
+ * If the text:
+ * - starts with www and contains a period
+ * - starts with a slash preceded by either a slash, whitespace, or start-of-string
+ * Then it looks like a URL and we return true. Otherwise, we return false.
+ *
+ * Note: this method is called quite often, and should be fast.
+ *
+ * TODO: This will return that "abc./def" and ".abc/def" look like URLs to keep down the
+ * code complexity, but ideally it should not. It's acceptable for now.
+ */
+ public static boolean lastPartLooksLikeURL(final CharSequence text) {
+ int i = text.length();
+ if (0 == i) return false;
+ int wCount = 0;
+ int slashCount = 0;
+ boolean hasSlash = false;
+ boolean hasPeriod = false;
+ int codePoint = 0;
+ while (i > 0) {
+ codePoint = Character.codePointBefore(text, i);
+ if (codePoint < Constants.CODE_PERIOD || codePoint > 'z') {
+ // Handwavy heuristic to see if that's a URL character. Anything between period
+ // and z. This includes all lower- and upper-case ascii letters, period,
+ // underscore, arrobase, question mark, equal sign. It excludes spaces, exclamation
+ // marks, double quotes...
+ // Anything that's not a URL-like character causes us to break from here and
+ // evaluate normally.
+ break;
+ }
+ if (Constants.CODE_PERIOD == codePoint) {
+ hasPeriod = true;
+ }
+ if (Constants.CODE_SLASH == codePoint) {
+ hasSlash = true;
+ if (2 == ++slashCount) {
+ return true;
+ }
+ } else {
+ slashCount = 0;
+ }
+ if ('w' == codePoint) {
+ ++wCount;
+ } else {
+ wCount = 0;
+ }
+ i = Character.offsetByCodePoints(text, i, -1);
+ }
+ // End of the text run.
+ // If it starts with www and includes a period, then it looks like a URL.
+ if (wCount >= 3 && hasPeriod) return true;
+ // If it starts with a slash, and the code point before is whitespace, it looks like an URL.
+ if (1 == slashCount && (0 == i || Character.isWhitespace(codePoint))) return true;
+ // If it has both a period and a slash, it looks like an URL.
+ if (hasPeriod && hasSlash) return true;
+ // Otherwise, it doesn't look like an URL.
+ return false;
+ }
}