diff options
author | 2013-04-05 12:07:16 +0900 | |
---|---|---|
committer | 2013-04-05 12:11:19 +0900 | |
commit | c87449497cb54c092cfd46037320fedf1c4504b6 (patch) | |
tree | 9c0aaad30f6cf71547b001745f61d455eadd8ef2 /java/src/com/android/inputmethod/latin/StringUtils.java | |
parent | 7d3836d63a2eb4b79c4ad93cdae4f1f61cdb518e (diff) | |
download | latinime-c87449497cb54c092cfd46037320fedf1c4504b6.tar.gz latinime-c87449497cb54c092cfd46037320fedf1c4504b6.tar.xz latinime-c87449497cb54c092cfd46037320fedf1c4504b6.zip |
Fix bugs and add tests
Change-Id: I6b56b91ace57f4a49584b5dceb71b145859f839e
Diffstat (limited to 'java/src/com/android/inputmethod/latin/StringUtils.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/StringUtils.java | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java index dcb514a5e..59ad28fc9 100644 --- a/java/src/com/android/inputmethod/latin/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/StringUtils.java @@ -115,11 +115,12 @@ public final class StringUtils { // - This does not work for Greek, because it returns upper case instead of title case. // - It does not work for Serbian, because it fails to account for the "lj" character, // which should be "Lj" in title case and "LJ" in upper case. - // - It does not work for Dutch, because it fails to account for the "ij" digraph, which - // are two different characters but both should be capitalized as "IJ" as if they were - // a single letter. - // - It also does not work with unicode surrogate code points. - return s.toUpperCase(locale).charAt(0) + s.substring(1); + // - It does not work for Dutch, because it fails to account for the "ij" digraph when it's + // written as two separate code points. They are two different characters but both should + // be capitalized as "IJ" as if they were a single letter in most words (not all). If the + // unicode char for the ligature is used however, it works. + final int cutoff = s.offsetByCodePoints(0, 1); + return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff).toLowerCase(locale); } private static final int[] EMPTY_CODEPOINTS = {}; @@ -176,17 +177,27 @@ public final class StringUtils { return list.toArray(new String[list.size()]); } - // This method assumes the text is not empty or null. + // This method assumes the text is not null. For the empty string, it returns CAPITALIZE_NONE. public static int getCapitalizationType(final String text) { // If the first char is not uppercase, then the word is either all lower case or // camel case, and in either case we return CAPITALIZE_NONE. - if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; final int len = text.length(); + int index = 0; + for (; index < len; index = text.offsetByCodePoints(index, 1)) { + if (Character.isLetter(text.codePointAt(index))) { + break; + } + } + if (index == len) return CAPITALIZE_NONE; + if (!Character.isUpperCase(text.codePointAt(index))) { + return CAPITALIZE_NONE; + } int capsCount = 1; int letterCount = 1; - for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { + for (index = text.offsetByCodePoints(index, 1); index < len; + index = text.offsetByCodePoints(index, 1)) { if (1 != capsCount && letterCount != capsCount) break; - final int codePoint = text.codePointAt(i); + final int codePoint = text.codePointAt(index); if (Character.isUpperCase(codePoint)) { ++capsCount; ++letterCount; |