diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/StringUtils.java | 67 |
1 files changed, 59 insertions, 8 deletions
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index 90c3fcdd2..11ef60dc9 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -22,6 +22,10 @@ import java.util.ArrayList; import java.util.Locale; public final class StringUtils { + public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case + public static final int CAPITALIZE_FIRST = 1; // First only + public static final int CAPITALIZE_ALL = 2; // All caps + private StringUtils() { // This utility class is not publicly instantiable. } @@ -102,20 +106,30 @@ public final class StringUtils { } } - public static String toTitleCase(final String s, final Locale locale) { + public static String capitalizeFirstCodePoint(final String s, final Locale locale) { + if (s.length() <= 1) { + return s.toUpperCase(locale); + } + // Please refer to the comment below in + // {@link #capitalizeFirstAndDowncaseRest(String,Locale)} as this has the same shortcomings + final int cutoff = s.offsetByCodePoints(0, 1); + return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff); + } + + public static String capitalizeFirstAndDowncaseRest(final String s, final Locale locale) { if (s.length() <= 1) { - // TODO: is this really correct? Shouldn't this be s.toUpperCase()? - return s; + return s.toUpperCase(locale); } // TODO: fix the bugs below // - This does not work for Greek, because it returns upper case instead of title case. // - It does not work for Serbian, because it fails to account for the "lj" character, // which should be "Lj" in title case and "LJ" in upper case. - // - It does not work for Dutch, because it fails to account for the "ij" digraph, which - // are two different characters but both should be capitalized as "IJ" as if they were - // a single letter. - // - It also does not work with unicode surrogate code points. - return s.toUpperCase(locale).charAt(0) + s.substring(1); + // - It does not work for Dutch, because it fails to account for the "ij" digraph when it's + // written as two separate code points. They are two different characters but both should + // be capitalized as "IJ" as if they were a single letter in most words (not all). If the + // unicode char for the ligature is used however, it works. + final int cutoff = s.offsetByCodePoints(0, 1); + return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff).toLowerCase(locale); } private static final int[] EMPTY_CODEPOINTS = {}; @@ -171,4 +185,41 @@ public final class StringUtils { } return list.toArray(new String[list.size()]); } + + // This method assumes the text is not null. For the empty string, it returns CAPITALIZE_NONE. + public static int getCapitalizationType(final String text) { + // If the first char is not uppercase, then the word is either all lower case or + // camel case, and in either case we return CAPITALIZE_NONE. + final int len = text.length(); + int index = 0; + for (; index < len; index = text.offsetByCodePoints(index, 1)) { + if (Character.isLetter(text.codePointAt(index))) { + break; + } + } + if (index == len) return CAPITALIZE_NONE; + if (!Character.isUpperCase(text.codePointAt(index))) { + return CAPITALIZE_NONE; + } + int capsCount = 1; + int letterCount = 1; + for (index = text.offsetByCodePoints(index, 1); index < len; + index = text.offsetByCodePoints(index, 1)) { + if (1 != capsCount && letterCount != capsCount) break; + final int codePoint = text.codePointAt(index); + if (Character.isUpperCase(codePoint)) { + ++capsCount; + ++letterCount; + } else if (Character.isLetter(codePoint)) { + // We need to discount non-letters since they may not be upper-case, but may + // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME") + ++letterCount; + } + } + // We know the first char is upper case. So we want to test if either every letter other + // than the first is lower case, or if they are all upper case. If the string is exactly + // one char long, then we will arrive here with letterCount 1, and this is correct, too. + if (1 == capsCount) return CAPITALIZE_FIRST; + return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); + } } |