1 files changed, 328 insertions, 34 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
index 7ce92920d..063243e1b 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
@@ -17,13 +17,14 @@
 package com.android.inputmethod.latin;
 
 import android.content.Context;
+import android.content.SharedPreferences;
+import android.content.pm.PackageManager.NameNotFoundException;
 import android.content.res.AssetFileDescriptor;
 import android.util.Log;
 
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Locale;
 
 /**
@@ -36,13 +37,127 @@ class BinaryDictionaryGetter {
      */
     private static final String TAG = BinaryDictionaryGetter.class.getSimpleName();
 
+    /**
+     * Used to return empty lists
+     */
+    private static final File[] EMPTY_FILE_ARRAY = new File[0];
+
+    /**
+     * Name of the common preferences name to know which word list are on and which are off.
+     */
+    private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs";
+
+    // Name of the category for the main dictionary
+    private static final String MAIN_DICTIONARY_CATEGORY = "main";
+    public static final String ID_CATEGORY_SEPARATOR = ":";
+
     // Prevents this from being instantiated
     private BinaryDictionaryGetter() {}
 
     /**
+     * Returns whether we may want to use this character as part of a file name.
+     *
+     * This basically only accepts ascii letters and numbers, and rejects everything else.
+     */
+    private static boolean isFileNameCharacter(int codePoint) {
+        if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
+        if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
+        if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
+        return codePoint == '_'; // Underscore
+    }
+
+    /**
+     * Escapes a string for any characters that may be suspicious for a file or directory name.
+     *
+     * Concretely this does a sort of URL-encoding except it will encode everything that's not
+     * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
+     * we cannot allow here)
+     */
+    // TODO: create a unit test for this method
+    private static String replaceFileNameDangerousCharacters(final String name) {
+        // This assumes '%' is fully available as a non-separator, normal
+        // character in a file name. This is probably true for all file systems.
+        final StringBuilder sb = new StringBuilder();
+        final int nameLength = name.length();
+        for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
+            final int codePoint = name.codePointAt(i);
+            if (isFileNameCharacter(codePoint)) {
+                sb.appendCodePoint(codePoint);
+            } else {
+                // 6 digits - unicode is limited to 21 bits
+                sb.append(String.format((Locale)null, "%%%1$06x", codePoint));
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Reverse escaping done by replaceFileNameDangerousCharacters.
+     */
+    private static String getWordListIdFromFileName(final String fname) {
+        final StringBuilder sb = new StringBuilder();
+        final int fnameLength = fname.length();
+        for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
+            final int codePoint = fname.codePointAt(i);
+            if ('%' != codePoint) {
+                sb.appendCodePoint(codePoint);
+            } else {
+                final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16);
+                i += 6;
+                sb.appendCodePoint(encodedCodePoint);
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Helper method to get the top level cache directory.
+     */
+    private static String getWordListCacheDirectory(final Context context) {
+        return context.getFilesDir() + File.separator + "dicts";
+    }
+
+    /**
+     * Find out the cache directory associated with a specific locale.
+     */
+    private static String getCacheDirectoryForLocale(final String locale, final Context context) {
+        final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
+        final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
+                + relativeDirectoryName;
+        final File directory = new File(absoluteDirectoryName);
+        if (!directory.exists()) {
+            if (!directory.mkdirs()) {
+                Log.e(TAG, "Could not create the directory for locale" + locale);
+            }
+        }
+        return absoluteDirectoryName;
+    }
+
+    /**
+     * Generates a file name for the id and locale passed as an argument.
+     *
+     * In the current implementation the file name returned will always be unique for
+     * any id/locale pair, but please do not expect that the id can be the same for
+     * different dictionaries with different locales. An id should be unique for any
+     * dictionary.
+     * The file name is pretty much an URL-encoded version of the id inside a directory
+     * named like the locale, except it will also escape characters that look dangerous
+     * to some file systems.
+     * @param id the id of the dictionary for which to get a file name
+     * @param locale the locale for which to get the file name as a string
+     * @param context the context to use for getting the directory
+     * @return the name of the file to be created
+     */
+    public static String getCacheFileName(String id, String locale, Context context) {
+        final String fileName = replaceFileNameDangerousCharacters(id);
+        return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
+    }
+
+    /**
      * Returns a file address from a resource, or null if it cannot be opened.
      */
-    private static AssetFileAddress loadFallbackResource(Context context, int fallbackResId) {
+    private static AssetFileAddress loadFallbackResource(final Context context,
+            final int fallbackResId) {
         final AssetFileDescriptor afd = context.getResources().openRawResourceFd(fallbackResId);
         if (afd == null) {
             Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId="
@@ -53,45 +168,224 @@ class BinaryDictionaryGetter {
                 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength());
     }
 
+    static private class DictPackSettings {
+        final SharedPreferences mDictPreferences;
+        public DictPackSettings(final Context context) {
+            Context dictPackContext = null;
+            try {
+                final String dictPackName =
+                        context.getString(R.string.dictionary_pack_package_name);
+                dictPackContext = context.createPackageContext(dictPackName, 0);
+            } catch (NameNotFoundException e) {
+                // The dictionary pack is not installed...
+                // TODO: fallback on the built-in dict, see the TODO above
+                Log.e(TAG, "Could not find a dictionary pack");
+            }
+            mDictPreferences = null == dictPackContext ? null
+                    : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME,
+                            Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS);
+        }
+        public boolean isWordListActive(final String dictId) {
+            if (null == mDictPreferences) {
+                // If we don't have preferences it basically means we can't find the dictionary
+                // pack - either it's not installed, or it's disabled, or there is some strange
+                // bug. Either way, a word list with no settings should be on by default: default
+                // dictionaries in LatinIME are on if there is no settings at all, and if for some
+                // reason some dictionaries have been installed BUT the dictionary pack can't be
+                // found anymore it's safer to actually supply installed dictionaries.
+                return true;
+            } else {
+                // The default is true here for the same reasons as above. We got the dictionary
+                // pack but if we don't have any settings for it it means the user has never been
+                // to the settings yet. So by default, the main dictionaries should be on.
+                return mDictPreferences.getBoolean(dictId, true);
+            }
+        }
+    }
+
+    /**
+     * Helper method to the list of cache directories, one for each distinct locale.
+     */
+    private static File[] getCachedDirectoryList(final Context context) {
+        return new File(getWordListCacheDirectory(context)).listFiles();
+    }
+
+    /**
+     * Returns the category for a given file name.
+     *
+     * This parses the file name, extracts the category, and returns it. See
+     * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
+     * @return The category as a string or null if it can't be found in the file name.
+     */
+    private static String getCategoryFromFileName(final String fileName) {
+        final String id = getWordListIdFromFileName(fileName);
+        final String[] idArray = id.split(ID_CATEGORY_SEPARATOR);
+        if (2 != idArray.length) return null;
+        return idArray[0];
+    }
+
+    /**
+     * Utility class for the {@link #getCachedWordLists} method
+     */
+    private static class FileAndMatchLevel {
+        final File mFile;
+        final int mMatchLevel;
+        public FileAndMatchLevel(final File file, final int matchLevel) {
+            mFile = file;
+            mMatchLevel = matchLevel;
+        }
+    }
+
+    /**
+     * Returns the list of cached files for a specific locale, one for each category.
+     *
+     * This will return exactly one file for each word list category that matches
+     * the passed locale. If several files match the locale for any given category,
+     * this returns the file with the closest match to the locale. For example, if
+     * the passed word list is en_US, and for a category we have an en and an en_US
+     * word list available, we'll return only the en_US one.
+     * Thus, the list will contain as many files as there are categories.
+     *
+     * @param locale the locale to find the dictionary files for, as a string.
+     * @param context the context on which to open the files upon.
+     * @return an array of binary dictionary files, which may be empty but may not be null.
+     */
+    private static File[] getCachedWordLists(final String locale,
+            final Context context) {
+        final File[] directoryList = getCachedDirectoryList(context);
+        if (null == directoryList) return EMPTY_FILE_ARRAY;
+        final HashMap<String, FileAndMatchLevel> cacheFiles =
+                new HashMap<String, FileAndMatchLevel>();
+        for (File directory : directoryList) {
+            if (!directory.isDirectory()) continue;
+            final String dirLocale = getWordListIdFromFileName(directory.getName());
+            final int matchLevel = LocaleUtils.getMatchLevel(dirLocale, locale);
+            if (LocaleUtils.isMatch(matchLevel)) {
+                final File[] wordLists = directory.listFiles();
+                if (null != wordLists) {
+                    for (File wordList : wordLists) {
+                        final String category = getCategoryFromFileName(wordList.getName());
+                        final FileAndMatchLevel currentBestMatch = cacheFiles.get(category);
+                        if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) {
+                            cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel));
+                        }
+                    }
+                }
+            }
+        }
+        if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
+        final File[] result = new File[cacheFiles.size()];
+        int index = 0;
+        for (final FileAndMatchLevel entry : cacheFiles.values()) {
+            result[index++] = entry.mFile;
+        }
+        return result;
+    }
+
+    /**
+     * Remove all files with the passed id, except the passed file.
+     *
+     * If a dictionary with a given ID has a metadata change that causes it to change
+     * path, we need to remove the old version. The only way to do this is to check all
+     * installed files for a matching ID in a different directory.
+     */
+    public static void removeFilesWithIdExcept(final Context context, final String id,
+            final File fileToKeep) {
+        try {
+            final File canonicalFileToKeep = fileToKeep.getCanonicalFile();
+            final File[] directoryList = getCachedDirectoryList(context);
+            if (null == directoryList) return;
+            for (File directory : directoryList) {
+                // There is one directory per locale. See #getCachedDirectoryList
+                if (!directory.isDirectory()) continue;
+                final File[] wordLists = directory.listFiles();
+                if (null == wordLists) continue;
+                for (File wordList : wordLists) {
+                    final String fileId = getWordListIdFromFileName(wordList.getName());
+                    if (fileId.equals(id)) {
+                        if (!canonicalFileToKeep.equals(wordList.getCanonicalFile())) {
+                            wordList.delete();
+                        }
+                    }
+                }
+            }
+        } catch (java.io.IOException e) {
+            Log.e(TAG, "IOException trying to cleanup files : " + e);
+        }
+    }
+
+
+    /**
+     * Returns the id associated with the main word list for a specified locale.
+     *
+     * Word lists stored in Android Keyboard's resources are referred to as the "main"
+     * word lists. Since they can be updated like any other list, we need to assign a
+     * unique ID to them. This ID is just the name of the language (locale-wise) they
+     * are for, and this method returns this ID.
+     */
+    private static String getMainDictId(final Locale locale) {
+        // This works because we don't include by default different dictionaries for
+        // different countries. This actually needs to return the id that we would
+        // like to use for word lists included in resources, and the following is okay.
+        return MAIN_DICTIONARY_CATEGORY + ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
+    }
+
+    private static boolean isMainWordListId(final String id) {
+        final String[] idArray = id.split(ID_CATEGORY_SEPARATOR);
+        if (2 != idArray.length) return false;
+        return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
+    }
+
     /**
      * Returns a list of file addresses for a given locale, trying relevant methods in order.
      *
      * Tries to get binary dictionaries from various sources, in order:
-     * - Uses a private method of getting a private dictionaries, as implemented by the
-     *   PrivateBinaryDictionaryGetter class.
-     * If that fails:
      * - Uses a content provider to get a public dictionary set, as per the protocol described
      *   in BinaryDictionaryFileDumper.
      * If that fails:
-     * - Gets a file name from the fallback resource passed as an argument.
+     * - Gets a file name from the built-in dictionary for this locale, if any.
      * If that fails:
      * - Returns null.
-     * @return The address of a valid file, or null.
-     */
-    public static List<AssetFileAddress> getDictionaryFiles(Locale locale, Context context,
-            int fallbackResId) {
-        // Try first to query a private package signed the same way for private files.
-        final List<AssetFileAddress> privateFiles =
-                PrivateBinaryDictionaryGetter.getDictionaryFiles(locale, context);
-        if (null != privateFiles) {
-            return privateFiles;
-        } else {
-            try {
-                // If that was no-go, try to find a publicly exported dictionary.
-                List<AssetFileAddress> listFromContentProvider =
-                        BinaryDictionaryFileDumper.getDictSetFromContentProvider(locale, context);
-                if (null != listFromContentProvider) {
-                    return listFromContentProvider;
-                }
-                // If the list is null, fall through and return the fallback
-            } catch (FileNotFoundException e) {
-                Log.e(TAG, "Unable to create dictionary file from provider for locale "
-                        + locale.toString() + ": falling back to internal dictionary");
-            } catch (IOException e) {
-                Log.e(TAG, "Unable to read source data for locale "
-                        + locale.toString() + ": falling back to internal dictionary");
+     * @return The list of addresses of valid dictionary files, or null.
+     */
+    public static ArrayList<AssetFileAddress> getDictionaryFiles(final Locale locale,
+            final Context context) {
+
+        final boolean hasDefaultWordList = DictionaryFactory.isDictionaryAvailable(context, locale);
+        // cacheWordListsFromContentProvider returns the list of files it copied to local
+        // storage, but we don't really care about what was copied NOW: what we want is the
+        // list of everything we ever cached, so we ignore the return value.
+        BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context,
+                hasDefaultWordList);
+        final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
+        final String mainDictId = getMainDictId(locale);
+        final DictPackSettings dictPackSettings = new DictPackSettings(context);
+
+        boolean foundMainDict = false;
+        final ArrayList<AssetFileAddress> fileList = new ArrayList<AssetFileAddress>();
+        // cachedWordLists may not be null, see doc for getCachedDictionaryList
+        for (final File f : cachedWordLists) {
+            final String wordListId = getWordListIdFromFileName(f.getName());
+            if (isMainWordListId(wordListId)) {
+                foundMainDict = true;
+            }
+            if (!dictPackSettings.isWordListActive(wordListId)) continue;
+            if (f.canRead()) {
+                fileList.add(AssetFileAddress.makeFromFileName(f.getPath()));
+            } else {
+                Log.e(TAG, "Found a cached dictionary file but cannot read it");
+            }
+        }
+
+        if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) {
+            final int fallbackResId =
+                    DictionaryFactory.getMainDictionaryResourceId(context.getResources(), locale);
+            final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId);
+            if (null != fallbackAsset) {
+                fileList.add(fallbackAsset);
             }
-            return Arrays.asList(loadFallbackResource(context, fallbackResId));
         }
+
+        return fileList;
     }
 }