13 files changed, 427 insertions, 276 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java
index d4cdc6c5c..165116ae0 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java
@@ -16,14 +16,19 @@
 
 package com.android.inputmethod.latin;
 
+import android.content.ContentProviderClient;
 import android.content.ContentResolver;
+import android.content.ContentValues;
 import android.content.Context;
 import android.content.res.AssetFileDescriptor;
 import android.database.Cursor;
 import android.net.Uri;
+import android.os.RemoteException;
 import android.text.TextUtils;
 import android.util.Log;
 
+import com.android.inputmethod.latin.DictionaryInfoUtils.DictionaryInfo;
+
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.File;
@@ -32,6 +37,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
@@ -68,6 +74,10 @@ public final class BinaryDictionaryFileDumper {
 
     // The path fragment to append after the client ID for dictionary info requests.
     private static final String QUERY_PATH_DICT_INFO = "dict";
+    // The path fragment to append after the client ID for updating the metadata URI.
+    private static final String QUERY_PATH_METADATA = "metadata";
+    private static final String INSERT_METADATA_CLIENT_ID_COLUMN = "clientid";
+    private static final String INSERT_METADATA_METADATA_URI_COLUMN = "uri";
 
     // Prevents this class to be accidentally instantiated.
     private BinaryDictionaryFileDumper() {
@@ -91,25 +101,31 @@ public final class BinaryDictionaryFileDumper {
      */
     private static List<WordListInfo> getWordListWordListInfos(final Locale locale,
             final Context context, final boolean hasDefaultWordList) {
-        final ContentResolver resolver = context.getContentResolver();
         final String clientId = context.getString(R.string.dictionary_pack_client_id);
         final Uri.Builder builder = getProviderUriBuilder(clientId);
         builder.appendPath(QUERY_PATH_DICT_INFO);
         builder.appendPath(locale.toString());
         builder.appendQueryParameter(QUERY_PARAMETER_PROTOCOL, QUERY_PARAMETER_PROTOCOL_VALUE);
         if (!hasDefaultWordList) {
-            builder.appendQueryParameter(QUERY_PARAMETER_MAY_PROMPT_USER, QUERY_PARAMETER_TRUE);
+            builder.appendQueryParameter(QUERY_PARAMETER_MAY_PROMPT_USER,
+                    QUERY_PARAMETER_TRUE);
         }
         final Uri dictionaryPackUri = builder.build();
 
-        final Cursor c = resolver.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null, null);
-        if (null == c) return Collections.<WordListInfo>emptyList();
-        if (c.getCount() <= 0 || !c.moveToFirst()) {
-            c.close();
-            return Collections.<WordListInfo>emptyList();
-        }
-
+        final ContentProviderClient client = context.getContentResolver().
+                acquireContentProviderClient(getProviderUriBuilder("").build());
+        if (null == client) return Collections.<WordListInfo>emptyList();
         try {
+            final Cursor c = client.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null,
+                    null);
+            if (null == c) {
+                reinitializeClientRecordInDictionaryContentProvider(context, client, clientId);
+                return Collections.<WordListInfo>emptyList();
+            }
+            if (c.getCount() <= 0 || !c.moveToFirst()) {
+                c.close();
+                return Collections.<WordListInfo>emptyList();
+            }
             final List<WordListInfo> list = CollectionUtils.newArrayList();
             do {
                 final String wordListId = c.getString(0);
@@ -119,11 +135,20 @@ public final class BinaryDictionaryFileDumper {
             } while (c.moveToNext());
             c.close();
             return list;
+        } catch (RemoteException e) {
+            // The documentation is unclear as to in which cases this may happen, but it probably
+            // happens when the content provider got suddenly killed because it crashed or because
+            // the user disabled it through Settings.
+            Log.e(TAG, "RemoteException: communication with the dictionary pack cut", e);
+            return Collections.<WordListInfo>emptyList();
         } catch (Exception e) {
-            // Just in case we hit a problem in communication with the dictionary pack.
-            // We don't want to die.
-            Log.e(TAG, "Exception communicating with the dictionary pack : " + e);
+            // A crash here is dangerous because crashing here would brick any encrypted device -
+            // we need the keyboard to be up and working to enter the password, so we don't want
+            // to die no matter what. So let's be as safe as possible.
+            Log.e(TAG, "Unexpected exception communicating with the dictionary pack", e);
             return Collections.<WordListInfo>emptyList();
+        } finally {
+            client.release();
         }
     }
 
@@ -237,7 +262,7 @@ public final class BinaryDictionaryFileDumper {
                 return AssetFileAddress.makeFromFileName(finalFileName);
             } catch (Exception e) {
                 if (DEBUG) {
-                    Log.i(TAG, "Can't open word list in mode " + mode + " : " + e);
+                    Log.i(TAG, "Can't open word list in mode " + mode, e);
                 }
                 if (null != outputFile) {
                     // This may or may not fail. The file may not have been created if the
@@ -255,12 +280,12 @@ public final class BinaryDictionaryFileDumper {
                     if (null != decryptedStream) decryptedStream.close();
                     if (null != bufferedInputStream) bufferedInputStream.close();
                 } catch (Exception e) {
-                    Log.e(TAG, "Exception while closing a file descriptor : " + e);
+                    Log.e(TAG, "Exception while closing a file descriptor", e);
                 }
                 try {
                     if (null != bufferedOutputStream) bufferedOutputStream.close();
                 } catch (Exception e) {
-                    Log.e(TAG, "Exception while closing a file : " + e);
+                    Log.e(TAG, "Exception while closing a file", e);
                 }
             }
         }
@@ -335,4 +360,35 @@ public final class BinaryDictionaryFileDumper {
             output.write(buffer, 0, readBytes);
         input.close();
     }
+
+    private static void reinitializeClientRecordInDictionaryContentProvider(final Context context,
+            final ContentProviderClient client, final String clientId) throws RemoteException {
+        final String metadataFileUri = context.getString(R.string.dictionary_pack_metadata_uri);
+        if (TextUtils.isEmpty(metadataFileUri)) return;
+        // Tell the content provider to reset all information about this client id
+        final Uri metadataContentUri = getProviderUriBuilder(clientId)
+                .appendPath(QUERY_PATH_METADATA)
+                .appendQueryParameter(QUERY_PARAMETER_PROTOCOL, QUERY_PARAMETER_PROTOCOL_VALUE)
+                .build();
+        client.delete(metadataContentUri, null, null);
+        // Update the metadata URI
+        final ContentValues metadataValues = new ContentValues();
+        metadataValues.put(INSERT_METADATA_CLIENT_ID_COLUMN, clientId);
+        metadataValues.put(INSERT_METADATA_METADATA_URI_COLUMN, metadataFileUri);
+        client.insert(metadataContentUri, metadataValues);
+
+        // Update the dictionary list.
+        final Uri dictionaryContentUriBase = getProviderUriBuilder(clientId)
+                .appendPath(QUERY_PATH_DICT_INFO)
+                .appendQueryParameter(QUERY_PARAMETER_PROTOCOL, QUERY_PARAMETER_PROTOCOL_VALUE)
+                .build();
+        final ArrayList<DictionaryInfo> dictionaryList =
+                DictionaryInfoUtils.getCurrentDictionaryFileNameAndVersionInfo(context);
+        final int length = dictionaryList.size();
+        for (int i = 0; i < length; ++i) {
+            final DictionaryInfo info = dictionaryList.get(i);
+            client.insert(Uri.withAppendedPath(dictionaryContentUriBase, info.mId),
+                    info.toContentValues());
+        }
+    }
 }
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
index 1cdc3b564..a96738b3e 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
@@ -210,7 +210,7 @@ final class BinaryDictionaryGetter {
                 }
             }
         } catch (java.io.IOException e) {
-            Log.e(TAG, "IOException trying to cleanup files : " + e);
+            Log.e(TAG, "IOException trying to cleanup files", e);
         }
     }
 
diff --git a/java/src/com/android/inputmethod/latin/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/CapsModeUtils.java
new file mode 100644
index 000000000..1012cd519
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/CapsModeUtils.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.text.InputType;
+import android.text.TextUtils;
+
+import java.util.Locale;
+
+public final class CapsModeUtils {
+    private CapsModeUtils() {
+        // This utility class is not publicly instantiable.
+    }
+
+    /**
+     * Apply an auto-caps mode to a string.
+     *
+     * This intentionally does NOT apply manual caps mode. It only changes the capitalization if
+     * the mode is one of the auto-caps modes.
+     * @param s The string to capitalize.
+     * @param capitalizeMode The mode in which to capitalize.
+     * @param locale The locale for capitalizing.
+     * @return The capitalized string.
+     */
+    public static String applyAutoCapsMode(final String s, final int capitalizeMode,
+            final Locale locale) {
+        if (WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == capitalizeMode) {
+            return s.toUpperCase(locale);
+        } else if (WordComposer.CAPS_MODE_AUTO_SHIFTED == capitalizeMode) {
+            return StringUtils.toTitleCase(s, locale);
+        } else {
+            return s;
+        }
+    }
+
+    /**
+     * Return whether a constant represents an auto-caps mode (either auto-shift or auto-shift-lock)
+     * @param mode The mode to test for
+     * @return true if this represents an auto-caps mode, false otherwise
+     */
+    public static boolean isAutoCapsMode(final int mode) {
+        return WordComposer.CAPS_MODE_AUTO_SHIFTED == mode
+                || WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == mode;
+    }
+
+    /**
+     * Determine what caps mode should be in effect at the current offset in
+     * the text. Only the mode bits set in <var>reqModes</var> will be
+     * checked. Note that the caps mode flags here are explicitly defined
+     * to match those in {@link InputType}.
+     *
+     * This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting
+     * issues). This will change in the future as we simplify the code for our use and fix bugs.
+     *
+     * @param cs The text that should be checked for caps modes.
+     * @param reqModes The modes to be checked: may be any combination of
+     * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
+     * {@link TextUtils#CAP_MODE_SENTENCES}.
+     * @param locale The locale to consider for capitalization rules
+     * @param hasSpaceBefore Whether we should consider there is a space inserted at the end of cs
+     *
+     * @return Returns the actual capitalization modes that can be in effect
+     * at the current position, which is any combination of
+     * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
+     * {@link TextUtils#CAP_MODE_SENTENCES}.
+     */
+    public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale,
+            final boolean hasSpaceBefore) {
+        // Quick description of what we want to do:
+        // CAP_MODE_CHARACTERS is always on.
+        // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
+        // CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end
+        //   of a sentence just before that.
+        // We ignore opening parentheses and the like just before the cursor for purposes of
+        // finding whitespace for WORDS and SENTENCES modes.
+        // The end of a sentence ends with a period, question mark or exclamation mark. If it's
+        // a period, it also needs not to be an abbreviation, which means it also needs to either
+        // be immediately preceded by punctuation, or by a string of only letters with single
+        // periods interleaved.
+
+        // Step 1 : check for cap MODE_CHARACTERS. If it's looked for, it's always on.
+        if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
+            // Here we are not looking for MODE_WORDS or MODE_SENTENCES, so since we already
+            // evaluated MODE_CHARACTERS, we can return.
+            return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+        }
+
+        // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
+        // opening parentheses, brackets, opening quotes, everything that *opens* a span of
+        // text in the linguistic sense. In RTL languages, this is still an opening sign, although
+        // it may look like a right parenthesis for example. We also include double quote and
+        // single quote since they aren't start punctuation in the unicode sense, but should still
+        // be skipped for English. TODO: does this depend on the language?
+        int i;
+        if (hasSpaceBefore) {
+            i = cs.length() + 1;
+        } else {
+            for (i = cs.length(); i > 0; i--) {
+                final char c = cs.charAt(i - 1);
+                if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
+                        && Character.getType(c) != Character.START_PUNCTUATION) {
+                    break;
+                }
+            }
+        }
+
+        // We are now on the character that precedes any starting punctuation, so in the most
+        // frequent case this will be whitespace or a letter, although it may occasionally be a
+        // start of line, or some symbol.
+
+        // Step 3 : Search for the start of a paragraph. From the starting point computed in step 2,
+        // we go back over any space or tab char sitting there. We find the start of a paragraph
+        // if the first char that's not a space or tab is a start of line (as in \n, start of text,
+        // or some other similar characters).
+        int j = i;
+        char prevChar = Constants.CODE_SPACE;
+        if (hasSpaceBefore) --j;
+        while (j > 0) {
+            prevChar = cs.charAt(j - 1);
+            if (!Character.isSpaceChar(prevChar) && prevChar != Constants.CODE_TAB) break;
+            j--;
+        }
+        if (j <= 0 || Character.isWhitespace(prevChar)) {
+            // There are only spacing chars between the start of the paragraph and the cursor,
+            // defined as a isWhitespace() char that is neither a isSpaceChar() nor a tab. Both
+            // MODE_WORDS and MODE_SENTENCES should be active.
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
+                    | TextUtils.CAP_MODE_SENTENCES) & reqModes;
+        }
+        if (i == j) {
+            // If we don't have whitespace before index i, it means neither MODE_WORDS
+            // nor mode sentences should be on so we can return right away.
+            return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+        }
+        if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
+            // Here we know we have whitespace before the cursor (if not, we returned in the above
+            // if i == j clause), so we need MODE_WORDS to be on. And we don't need to evaluate
+            // MODE_SENTENCES so we can return right away.
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
+        }
+        // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
+        // we know that MODE_SENTENCES is being requested.
+
+        // Step 4 : Search for MODE_SENTENCES.
+        // English is a special case in that "American typography" rules, which are the most common
+        // in English, state that a sentence terminator immediately following a quotation mark
+        // should be swapped with it and de-duplicated (included in the quotation mark),
+        // e.g. <<Did he say, "let's go home?">>
+        // No other language has such a rule as far as I know, instead putting inside the quotation
+        // mark as the exact thing quoted and handling the surrounding punctuation independently,
+        // e.g. <<Did he say, "let's go home"?>>
+        // Hence, specifically for English, we treat this special case here.
+        if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) {
+            for (; j > 0; j--) {
+                // Here we look to go over any closing punctuation. This is because in dominant
+                // variants of English, the final period is placed within double quotes and maybe
+                // other closing punctuation signs. This is generally not true in other languages.
+                final char c = cs.charAt(j - 1);
+                if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
+                        && Character.getType(c) != Character.END_PUNCTUATION) {
+                    break;
+                }
+            }
+        }
+
+        if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
+        char c = cs.charAt(--j);
+
+        // We found the next interesting chunk of text ; next we need to determine if it's the
+        // end of a sentence. If we have a question mark or an exclamation mark, it's the end of
+        // a sentence. If it's neither, the only remaining case is the period so we get the opposite
+        // case out of the way.
+        if (c == Constants.CODE_QUESTION_MARK || c == Constants.CODE_EXCLAMATION_MARK) {
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
+        }
+        if (c != Constants.CODE_PERIOD || j <= 0) {
+            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
+        }
+
+        // We found out that we have a period. We need to determine if this is a full stop or
+        // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
+        // looks like (\w\.){2,}
+        // To find out, we will have a simple state machine with the following states :
+        // START, WORD, PERIOD, ABBREVIATION
+        // On START : (just before the first period)
+        //           letter => WORD
+        //           whitespace => end with no caps (it was a stand-alone period)
+        //           otherwise => end with caps (several periods/symbols in a row)
+        // On WORD : (within the word just before the first period)
+        //           letter => WORD
+        //           period => PERIOD
+        //           otherwise => end with caps (it was a word with a full stop at the end)
+        // On PERIOD : (period within a potential abbreviation)
+        //           letter => LETTER
+        //           otherwise => end with caps (it was not an abbreviation)
+        // On LETTER : (letter within a potential abbreviation)
+        //           letter => LETTER
+        //           period => PERIOD
+        //           otherwise => end with no caps (it was an abbreviation)
+        // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
+        // should capitalize.
+
+        final int START = 0;
+        final int WORD = 1;
+        final int PERIOD = 2;
+        final int LETTER = 3;
+        final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
+                | TextUtils.CAP_MODE_SENTENCES) & reqModes;
+        final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
+        int state = START;
+        while (j > 0) {
+            c = cs.charAt(--j);
+            switch (state) {
+            case START:
+                if (Character.isLetter(c)) {
+                    state = WORD;
+                } else if (Character.isWhitespace(c)) {
+                    return noCaps;
+                } else {
+                    return caps;
+                }
+                break;
+            case WORD:
+                if (Character.isLetter(c)) {
+                    state = WORD;
+                } else if (c == Constants.CODE_PERIOD) {
+                    state = PERIOD;
+                } else {
+                    return caps;
+                }
+                break;
+            case PERIOD:
+                if (Character.isLetter(c)) {
+                    state = LETTER;
+                } else {
+                    return caps;
+                }
+                break;
+            case LETTER:
+                if (Character.isLetter(c)) {
+                    state = LETTER;
+                } else if (c == Constants.CODE_PERIOD) {
+                    state = PERIOD;
+                } else {
+                    return noCaps;
+                }
+            }
+        }
+        // Here we arrived at the start of the line. This should behave exactly like whitespace.
+        return (START == state || LETTER == state) ? noCaps : caps;
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java
index 85cc55232..422448edf 100644
--- a/java/src/com/android/inputmethod/latin/Constants.java
+++ b/java/src/com/android/inputmethod/latin/Constants.java
@@ -215,6 +215,10 @@ public final class Constants {
         }
     }
 
+    // Constants for CSV parsing.
+    public static final char CSV_SEPARATOR = ',';
+    public static final char CSV_ESCAPE = '\\';
+
     private Constants() {
         // This utility class is not publicly instantiable.
     }
diff --git a/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java b/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
index 8f16a8e4d..d2a946bf5 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
@@ -16,9 +16,11 @@
 
 package com.android.inputmethod.latin;
 
+import android.content.ContentValues;
 import android.content.Context;
 import android.content.res.AssetManager;
 import android.content.res.Resources;
+import android.text.format.DateUtils;
 import android.util.Log;
 
 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
@@ -39,19 +41,40 @@ public class DictionaryInfoUtils {
     private static final String RESOURCE_PACKAGE_NAME =
             DictionaryInfoUtils.class.getPackage().getName();
     private static final String DEFAULT_MAIN_DICT = "main";
+    private static final String ID_CATEGORY_SEPARATOR =
+            BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR;
     private static final String MAIN_DICT_PREFIX = "main_";
     // 6 digits - unicode is limited to 21 bits
     private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
 
     public static class DictionaryInfo {
+        private static final String LOCALE_COLUMN = "locale";
+        private static final String WORDLISTID_COLUMN = "id";
+        private static final String LOCAL_FILENAME_COLUMN = "filename";
+        private static final String DATE_COLUMN = "date";
+        private static final String FILESIZE_COLUMN = "filesize";
+        private static final String VERSION_COLUMN = "version";
         public final Locale mLocale;
         public final AssetFileAddress mFileAddress;
         public final int mVersion;
+        public final String mId;
         public DictionaryInfo(final Locale locale, final AssetFileAddress fileAddress,
                 final int version) {
             mLocale = locale;
             mFileAddress = fileAddress;
             mVersion = version;
+            mId = DEFAULT_MAIN_DICT + ID_CATEGORY_SEPARATOR + mLocale;
+        }
+        public ContentValues toContentValues() {
+            final ContentValues values = new ContentValues();
+            values.put(WORDLISTID_COLUMN, mId);
+            values.put(LOCALE_COLUMN, mLocale.toString());
+            values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename);
+            values.put(DATE_COLUMN,
+                    new File(mFileAddress.mFilename).lastModified() / DateUtils.SECOND_IN_MILLIS);
+            values.put(FILESIZE_COLUMN, mFileAddress.mLength);
+            values.put(VERSION_COLUMN, mVersion);
+            return values;
         }
     }
 
@@ -284,21 +307,23 @@ public class DictionaryInfoUtils {
 
         // Retrieve downloaded dictionaries
         final File[] directoryList = getCachedDirectoryList(context);
-        for (final File directory : directoryList) {
-            final String localeString = getWordListIdFromFileName(directory.getName());
-            File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context);
-            for (final File dict : dicts) {
-                final String wordListId = getWordListIdFromFileName(dict.getName());
-                if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue;
-                final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
-                final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict);
-                final DictionaryInfo dictionaryInfo =
-                        createDictionaryInfoFromFileAddress(fileAddress);
-                // Protect against cases of a less-specific dictionary being found, like an
-                // en dictionary being used for an en_US locale. In this case, the en dictionary
-                // should be used for en_US but discounted for listing purposes.
-                if (!dictionaryInfo.mLocale.equals(locale)) continue;
-                addOrUpdateDictInfo(dictList, dictionaryInfo);
+        if (null != directoryList) {
+            for (final File directory : directoryList) {
+                final String localeString = getWordListIdFromFileName(directory.getName());
+                File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context);
+                for (final File dict : dicts) {
+                    final String wordListId = getWordListIdFromFileName(dict.getName());
+                    if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue;
+                    final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
+                    final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict);
+                    final DictionaryInfo dictionaryInfo =
+                            createDictionaryInfoFromFileAddress(fileAddress);
+                    // Protect against cases of a less-specific dictionary being found, like an
+                    // en dictionary being used for an en_US locale. In this case, the en dictionary
+                    // should be used for en_US but discounted for listing purposes.
+                    if (!dictionaryInfo.mLocale.equals(locale)) continue;
+                    addOrUpdateDictInfo(dictList, dictionaryInfo);
+                }
             }
         }
 
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index 28ed88a73..97dc6a8ac 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -321,9 +321,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
             tempFile.renameTo(file);
             clearFusionDictionary();
         } catch (IOException e) {
-            Log.e(TAG, "IO exception while writing file: " + e);
+            Log.e(TAG, "IO exception while writing file", e);
         } catch (UnsupportedFormatException e) {
-            Log.e(TAG, "Unsupported format: " + e);
+            Log.e(TAG, "Unsupported format", e);
         } finally {
             if (out != null) {
                 try {
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 1c49bb0cc..73ace2bfa 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1228,7 +1228,7 @@ public final class LatinIME extends InputMethodService implements KeyboardAction
             return;
         }
         final String wordToEdit;
-        if (StringUtils.isAutoCapsMode(mLastComposedWord.mCapitalizedMode)) {
+        if (CapsModeUtils.isAutoCapsMode(mLastComposedWord.mCapitalizedMode)) {
             wordToEdit = word.toLowerCase(mSubtypeSwitcher.getCurrentSubtypeLocale());
         } else {
             wordToEdit = word;
diff --git a/java/src/com/android/inputmethod/latin/PositionalInfoForUserDictPendingAddition.java b/java/src/com/android/inputmethod/latin/PositionalInfoForUserDictPendingAddition.java
index 9fdbf8703..a8800007a 100644
--- a/java/src/com/android/inputmethod/latin/PositionalInfoForUserDictPendingAddition.java
+++ b/java/src/com/android/inputmethod/latin/PositionalInfoForUserDictPendingAddition.java
@@ -96,7 +96,7 @@ public final class PositionalInfoForUserDictPendingAddition {
         if (currentCursorPosition != mCursorPos) return true;
         // We have made all the checks : do the replacement and report success
         // If this was auto-capitalized, we need to restore the case before committing
-        final String wordWithCaseFixed = StringUtils.applyAutoCapsMode(mActualWordBeingAdded,
+        final String wordWithCaseFixed = CapsModeUtils.applyAutoCapsMode(mActualWordBeingAdded,
                 mCapitalizedMode, locale);
         connection.setComposingRegion(currentCursorPosition - mOriginalWord.length(),
                 currentCursorPosition);
diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java
index 521cea977..7300dbd23 100644
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@@ -221,7 +221,7 @@ public final class RichInputConnection {
         }
         // This never calls InputConnection#getCapsMode - in fact, it's a static method that
         // never blocks or initiates IPC.
-        return StringUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType, locale,
+        return CapsModeUtils.getCapsMode(mCommittedTextBeforeComposingText, inputType, locale,
                 hasSpaceBefore);
     }
 
diff --git a/java/src/com/android/inputmethod/latin/SettingsValues.java b/java/src/com/android/inputmethod/latin/SettingsValues.java
index 728f6b281..d05868029 100644
--- a/java/src/com/android/inputmethod/latin/SettingsValues.java
+++ b/java/src/com/android/inputmethod/latin/SettingsValues.java
@@ -89,8 +89,8 @@ public final class SettingsValues {
         mWordConnectors =
                 StringUtils.toCodePointArray(res.getString(R.string.symbols_word_connectors));
         Arrays.sort(mWordConnectors);
-        final String[] suggestPuncsSpec = KeySpecParser.parseCsvString(
-                res.getString(R.string.suggested_punctuations), null);
+        final String[] suggestPuncsSpec = StringUtils.parseCsvString(res.getString(
+                R.string.suggested_punctuations));
         mSuggestPuncList = createSuggestPuncList(suggestPuncsSpec);
         mWordSeparators = res.getString(R.string.symbols_word_separators);
         mHintToSaveText = res.getText(R.string.hint_add_to_dictionary);
@@ -211,6 +211,7 @@ public final class SettingsValues {
         final ArrayList<SuggestedWordInfo> puncList = CollectionUtils.newArrayList();
         if (puncs != null) {
             for (final String puncSpec : puncs) {
+                // TODO: Stop using KeySpceParser.getLabel().
                 puncList.add(new SuggestedWordInfo(KeySpecParser.getLabel(puncSpec),
                         SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_HARDCODED,
                         Dictionary.TYPE_HARDCODED));
diff --git a/java/src/com/android/inputmethod/latin/StringUtils.java b/java/src/com/android/inputmethod/latin/StringUtils.java
index d00edbe92..90c3fcdd2 100644
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -16,7 +16,6 @@
 
 package com.android.inputmethod.latin;
 
-import android.text.InputType;
 import android.text.TextUtils;
 
 import java.util.ArrayList;
@@ -103,37 +102,6 @@ public final class StringUtils {
         }
     }
 
-    /**
-     * Apply an auto-caps mode to a string.
-     *
-     * This intentionally does NOT apply manual caps mode. It only changes the capitalization if
-     * the mode is one of the auto-caps modes.
-     * @param s The string to capitalize.
-     * @param capitalizeMode The mode in which to capitalize.
-     * @param locale The locale for capitalizing.
-     * @return The capitalized string.
-     */
-    public static String applyAutoCapsMode(final String s, final int capitalizeMode,
-            final Locale locale) {
-        if (WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == capitalizeMode) {
-            return s.toUpperCase(locale);
-        } else if (WordComposer.CAPS_MODE_AUTO_SHIFTED == capitalizeMode) {
-            return toTitleCase(s, locale);
-        } else {
-            return s;
-        }
-    }
-
-    /**
-     * Return whether a constant represents an auto-caps mode (either auto-shift or auto-shift-lock)
-     * @param mode The mode to test for
-     * @return true if this represents an auto-caps mode, false otherwise
-     */
-    public static boolean isAutoCapsMode(final int mode) {
-        return WordComposer.CAPS_MODE_AUTO_SHIFTED == mode
-                || WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED == mode;
-    }
-
     public static String toTitleCase(final String s, final Locale locale) {
         if (s.length() <= 1) {
             // TODO: is this really correct? Shouldn't this be s.toUpperCase()?
@@ -166,210 +134,41 @@ public final class StringUtils {
         return codePoints;
     }
 
-    /**
-     * Determine what caps mode should be in effect at the current offset in
-     * the text. Only the mode bits set in <var>reqModes</var> will be
-     * checked. Note that the caps mode flags here are explicitly defined
-     * to match those in {@link InputType}.
-     *
-     * This code is a straight copy of TextUtils.getCapsMode (modulo namespace and formatting
-     * issues). This will change in the future as we simplify the code for our use and fix bugs.
-     *
-     * @param cs The text that should be checked for caps modes.
-     * @param reqModes The modes to be checked: may be any combination of
-     * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
-     * {@link TextUtils#CAP_MODE_SENTENCES}.
-     * @param locale The locale to consider for capitalization rules
-     * @param hasSpaceBefore Whether we should consider there is a space inserted at the end of cs
-     *
-     * @return Returns the actual capitalization modes that can be in effect
-     * at the current position, which is any combination of
-     * {@link TextUtils#CAP_MODE_CHARACTERS}, {@link TextUtils#CAP_MODE_WORDS}, and
-     * {@link TextUtils#CAP_MODE_SENTENCES}.
-     */
-    public static int getCapsMode(final CharSequence cs, final int reqModes, final Locale locale,
-            final boolean hasSpaceBefore) {
-        // Quick description of what we want to do:
-        // CAP_MODE_CHARACTERS is always on.
-        // CAP_MODE_WORDS is on if there is some whitespace before the cursor.
-        // CAP_MODE_SENTENCES is on if there is some whitespace before the cursor, and the end
-        //   of a sentence just before that.
-        // We ignore opening parentheses and the like just before the cursor for purposes of
-        // finding whitespace for WORDS and SENTENCES modes.
-        // The end of a sentence ends with a period, question mark or exclamation mark. If it's
-        // a period, it also needs not to be an abbreviation, which means it also needs to either
-        // be immediately preceded by punctuation, or by a string of only letters with single
-        // periods interleaved.
-
-        // Step 1 : check for cap MODE_CHARACTERS. If it's looked for, it's always on.
-        if ((reqModes & (TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES)) == 0) {
-            // Here we are not looking for MODE_WORDS or MODE_SENTENCES, so since we already
-            // evaluated MODE_CHARACTERS, we can return.
-            return TextUtils.CAP_MODE_CHARACTERS & reqModes;
-        }
-
-        // Step 2 : Skip (ignore at the end of input) any opening punctuation. This includes
-        // opening parentheses, brackets, opening quotes, everything that *opens* a span of
-        // text in the linguistic sense. In RTL languages, this is still an opening sign, although
-        // it may look like a right parenthesis for example. We also include double quote and
-        // single quote since they aren't start punctuation in the unicode sense, but should still
-        // be skipped for English. TODO: does this depend on the language?
-        int i;
-        if (hasSpaceBefore) {
-            i = cs.length() + 1;
-        } else {
-            for (i = cs.length(); i > 0; i--) {
-                final char c = cs.charAt(i - 1);
-                if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
-                        && Character.getType(c) != Character.START_PUNCTUATION) {
-                    break;
+    public static String[] parseCsvString(final String text) {
+        final int size = text.length();
+        if (size == 0) {
+            return null;
+        }
+        if (codePointCount(text) == 1) {
+            return text.codePointAt(0) == Constants.CSV_SEPARATOR ? null : new String[] { text };
+        }
+
+        ArrayList<String> list = null;
+        int start = 0;
+        for (int pos = 0; pos < size; pos++) {
+            final char c = text.charAt(pos);
+            if (c == Constants.CSV_SEPARATOR) {
+                // Skip empty entry.
+                if (pos - start > 0) {
+                    if (list == null) {
+                        list = CollectionUtils.newArrayList();
+                    }
+                    list.add(text.substring(start, pos));
                 }
+                // Skip comma
+                start = pos + 1;
+            } else if (c == Constants.CSV_ESCAPE) {
+                // Skip escape character and escaped character.
+                pos++;
             }
         }
-
-        // We are now on the character that precedes any starting punctuation, so in the most
-        // frequent case this will be whitespace or a letter, although it may occasionally be a
-        // start of line, or some symbol.
-
-        // Step 3 : Search for the start of a paragraph. From the starting point computed in step 2,
-        // we go back over any space or tab char sitting there. We find the start of a paragraph
-        // if the first char that's not a space or tab is a start of line (as in \n, start of text,
-        // or some other similar characters).
-        int j = i;
-        char prevChar = Constants.CODE_SPACE;
-        if (hasSpaceBefore) --j;
-        while (j > 0) {
-            prevChar = cs.charAt(j - 1);
-            if (!Character.isSpaceChar(prevChar) && prevChar != Constants.CODE_TAB) break;
-            j--;
-        }
-        if (j <= 0 || Character.isWhitespace(prevChar)) {
-            // There are only spacing chars between the start of the paragraph and the cursor,
-            // defined as a isWhitespace() char that is neither a isSpaceChar() nor a tab. Both
-            // MODE_WORDS and MODE_SENTENCES should be active.
-            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
-                    | TextUtils.CAP_MODE_SENTENCES) & reqModes;
-        }
-        if (i == j) {
-            // If we don't have whitespace before index i, it means neither MODE_WORDS
-            // nor mode sentences should be on so we can return right away.
-            return TextUtils.CAP_MODE_CHARACTERS & reqModes;
-        }
-        if ((reqModes & TextUtils.CAP_MODE_SENTENCES) == 0) {
-            // Here we know we have whitespace before the cursor (if not, we returned in the above
-            // if i == j clause), so we need MODE_WORDS to be on. And we don't need to evaluate
-            // MODE_SENTENCES so we can return right away.
-            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
-        }
-        // Please note that because of the reqModes & CAP_MODE_SENTENCES test a few lines above,
-        // we know that MODE_SENTENCES is being requested.
-
-        // Step 4 : Search for MODE_SENTENCES.
-        // English is a special case in that "American typography" rules, which are the most common
-        // in English, state that a sentence terminator immediately following a quotation mark
-        // should be swapped with it and de-duplicated (included in the quotation mark),
-        // e.g. <<Did he say, "let's go home?">>
-        // No other language has such a rule as far as I know, instead putting inside the quotation
-        // mark as the exact thing quoted and handling the surrounding punctuation independently,
-        // e.g. <<Did he say, "let's go home"?>>
-        // Hence, specifically for English, we treat this special case here.
-        if (Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) {
-            for (; j > 0; j--) {
-                // Here we look to go over any closing punctuation. This is because in dominant
-                // variants of English, the final period is placed within double quotes and maybe
-                // other closing punctuation signs. This is generally not true in other languages.
-                final char c = cs.charAt(j - 1);
-                if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE
-                        && Character.getType(c) != Character.END_PUNCTUATION) {
-                    break;
-                }
-            }
-        }
-
-        if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes;
-        char c = cs.charAt(--j);
-
-        // We found the next interesting chunk of text ; next we need to determine if it's the
-        // end of a sentence. If we have a question mark or an exclamation mark, it's the end of
-        // a sentence. If it's neither, the only remaining case is the period so we get the opposite
-        // case out of the way.
-        if (c == Constants.CODE_QUESTION_MARK || c == Constants.CODE_EXCLAMATION_MARK) {
-            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes;
-        }
-        if (c != Constants.CODE_PERIOD || j <= 0) {
-            return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
+        final String remain = (size - start > 0) ? text.substring(start) : null;
+        if (list == null) {
+            return remain != null ? new String[] { remain } : null;
         }
-
-        // We found out that we have a period. We need to determine if this is a full stop or
-        // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation
-        // looks like (\w\.){2,}
-        // To find out, we will have a simple state machine with the following states :
-        // START, WORD, PERIOD, ABBREVIATION
-        // On START : (just before the first period)
-        //           letter => WORD
-        //           whitespace => end with no caps (it was a stand-alone period)
-        //           otherwise => end with caps (several periods/symbols in a row)
-        // On WORD : (within the word just before the first period)
-        //           letter => WORD
-        //           period => PERIOD
-        //           otherwise => end with caps (it was a word with a full stop at the end)
-        // On PERIOD : (period within a potential abbreviation)
-        //           letter => LETTER
-        //           otherwise => end with caps (it was not an abbreviation)
-        // On LETTER : (letter within a potential abbreviation)
-        //           letter => LETTER
-        //           period => PERIOD
-        //           otherwise => end with no caps (it was an abbreviation)
-        // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This
-        // should capitalize.
-
-        final int START = 0;
-        final int WORD = 1;
-        final int PERIOD = 2;
-        final int LETTER = 3;
-        final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS
-                | TextUtils.CAP_MODE_SENTENCES) & reqModes;
-        final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes;
-        int state = START;
-        while (j > 0) {
-            c = cs.charAt(--j);
-            switch (state) {
-            case START:
-                if (Character.isLetter(c)) {
-                    state = WORD;
-                } else if (Character.isWhitespace(c)) {
-                    return noCaps;
-                } else {
-                    return caps;
-                }
-                break;
-            case WORD:
-                if (Character.isLetter(c)) {
-                    state = WORD;
-                } else if (c == Constants.CODE_PERIOD) {
-                    state = PERIOD;
-                } else {
-                    return caps;
-                }
-                break;
-            case PERIOD:
-                if (Character.isLetter(c)) {
-                    state = LETTER;
-                } else {
-                    return caps;
-                }
-                break;
-            case LETTER:
-                if (Character.isLetter(c)) {
-                    state = LETTER;
-                } else if (c == Constants.CODE_PERIOD) {
-                    state = PERIOD;
-                } else {
-                    return noCaps;
-                }
-            }
+        if (remain != null) {
+            list.add(remain);
         }
-        // Here we arrived at the start of the line. This should behave exactly like whitespace.
-        return (START == state || LETTER == state) ? noCaps : caps;
+        return list.toArray(new String[list.size()]);
     }
 }
diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
index eb5c387a8..62f2a9750 100644
--- a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java
@@ -122,9 +122,9 @@ public final class UserHistoryDictIOUtils {
             BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
             Log.d(TAG, "end writing");
         } catch (IOException e) {
-            Log.e(TAG, "IO exception while writing file: " + e);
+            Log.e(TAG, "IO exception while writing file", e);
         } catch (UnsupportedFormatException e) {
-            Log.e(TAG, "Unsupported fomat: " + e);
+            Log.e(TAG, "Unsupported format", e);
         }
     }
 
@@ -184,11 +184,11 @@ public final class UserHistoryDictIOUtils {
             BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
                     bigrams);
         } catch (IOException e) {
-            Log.e(TAG, "IO exception while reading file: " + e);
+            Log.e(TAG, "IO exception while reading file", e);
         } catch (UnsupportedFormatException e) {
-            Log.e(TAG, "Unsupported format: " + e);
+            Log.e(TAG, "Unsupported format", e);
         } catch (ArrayIndexOutOfBoundsException e) {
-            Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file: " + e);
+            Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
         }
         addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
     }
diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
index b0e471643..cd3f9e442 100644
--- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
+++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidWordLevelSpellCheckerSession.java
@@ -352,7 +352,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
             if (DBG) {
                 throw e;
             } else {
-                Log.e(TAG, "Exception while spellcheking: " + e);
+                Log.e(TAG, "Exception while spellcheking", e);
                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
             }
         }