Refactoring

This creates a new DictionaryInfoUtils class and moves a bunch of static methods there for later usage. Change-Id: Iecb0643e6029a7be36bd6cb36aa918c40e6d8c6a
author: Jean Chalard <jchalard@google.com> 2013-01-29 18:05:33 +0900
committer: Jean Chalard <jchalard@google.com> 2013-01-29 19:16:21 +0900
commit: d515f134f726c432c0bab5600e7b31ed989fb1b5 (patch)
tree: f11ed68872c7324e27d9144ab497a77615f0cc3a /java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
parent: 19e13df91f36cf131f5139c44d7b80fc1e8f6f21 (diff)
download: latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.tar.gz
latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.tar.xz
latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.zip
1 files changed, 245 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java b/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
new file mode 100644
index 000000000..c676bf1b9
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.content.Context;
+import android.content.res.Resources;
+import android.util.Log;
+
+import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Locale;
+
+/**
+ * This class encapsulates the logic for the Latin-IME side of dictionary information management.
+ */
+public class DictionaryInfoUtils {
+    private static final String TAG = DictionaryInfoUtils.class.getSimpleName();
+    // This class must be located in the same package as LatinIME.java.
+    private static final String RESOURCE_PACKAGE_NAME =
+            DictionaryInfoUtils.class.getPackage().getName();
+    private static final String DEFAULT_MAIN_DICT = "main";
+    private static final String MAIN_DICT_PREFIX = "main_";
+    // 6 digits - unicode is limited to 21 bits
+    private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
+
+    private DictionaryInfoUtils() {
+        // Private constructor to forbid instantation of this helper class.
+    }
+
+    /**
+     * Returns whether we may want to use this character as part of a file name.
+     *
+     * This basically only accepts ascii letters and numbers, and rejects everything else.
+     */
+    private static boolean isFileNameCharacter(int codePoint) {
+        if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
+        if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
+        if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
+        return codePoint == '_'; // Underscore
+    }
+
+    /**
+     * Escapes a string for any characters that may be suspicious for a file or directory name.
+     *
+     * Concretely this does a sort of URL-encoding except it will encode everything that's not
+     * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
+     * we cannot allow here)
+     */
+    // TODO: create a unit test for this method
+    public static String replaceFileNameDangerousCharacters(final String name) {
+        // This assumes '%' is fully available as a non-separator, normal
+        // character in a file name. This is probably true for all file systems.
+        final StringBuilder sb = new StringBuilder();
+        final int nameLength = name.length();
+        for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
+            final int codePoint = name.codePointAt(i);
+            if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) {
+                sb.appendCodePoint(codePoint);
+            } else {
+                sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x",
+                        codePoint));
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Helper method to get the top level cache directory.
+     */
+    private static String getWordListCacheDirectory(final Context context) {
+        return context.getFilesDir() + File.separator + "dicts";
+    }
+
+    /**
+     * Reverse escaping done by replaceFileNameDangerousCharacters.
+     */
+    public static String getWordListIdFromFileName(final String fname) {
+        final StringBuilder sb = new StringBuilder();
+        final int fnameLength = fname.length();
+        for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
+            final int codePoint = fname.codePointAt(i);
+            if ('%' != codePoint) {
+                sb.appendCodePoint(codePoint);
+            } else {
+                // + 1 to pass the % sign
+                final int encodedCodePoint = Integer.parseInt(
+                        fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16);
+                i += MAX_HEX_DIGITS_FOR_CODEPOINT;
+                sb.appendCodePoint(encodedCodePoint);
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Helper method to the list of cache directories, one for each distinct locale.
+     */
+    public static File[] getCachedDirectoryList(final Context context) {
+        return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles();
+    }
+
+    /**
+     * Returns the category for a given file name.
+     *
+     * This parses the file name, extracts the category, and returns it. See
+     * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
+     * @return The category as a string or null if it can't be found in the file name.
+     */
+    public static String getCategoryFromFileName(final String fileName) {
+        final String id = getWordListIdFromFileName(fileName);
+        final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
+        // An id is supposed to be in format category:locale, so splitting on the separator
+        // should yield a 2-elements array
+        if (2 != idArray.length) return null;
+        return idArray[0];
+    }
+
+    /**
+     * Find out the cache directory associated with a specific locale.
+     */
+    private static String getCacheDirectoryForLocale(final String locale, final Context context) {
+        final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
+        final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
+                + relativeDirectoryName;
+        final File directory = new File(absoluteDirectoryName);
+        if (!directory.exists()) {
+            if (!directory.mkdirs()) {
+                Log.e(TAG, "Could not create the directory for locale" + locale);
+            }
+        }
+        return absoluteDirectoryName;
+    }
+
+    /**
+     * Generates a file name for the id and locale passed as an argument.
+     *
+     * In the current implementation the file name returned will always be unique for
+     * any id/locale pair, but please do not expect that the id can be the same for
+     * different dictionaries with different locales. An id should be unique for any
+     * dictionary.
+     * The file name is pretty much an URL-encoded version of the id inside a directory
+     * named like the locale, except it will also escape characters that look dangerous
+     * to some file systems.
+     * @param id the id of the dictionary for which to get a file name
+     * @param locale the locale for which to get the file name as a string
+     * @param context the context to use for getting the directory
+     * @return the name of the file to be created
+     */
+    public static String getCacheFileName(String id, String locale, Context context) {
+        final String fileName = replaceFileNameDangerousCharacters(id);
+        return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
+    }
+
+    public static boolean isMainWordListId(final String id) {
+        final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
+        // An id is supposed to be in format category:locale, so splitting on the separator
+        // should yield a 2-elements array
+        if (2 != idArray.length) return false;
+        return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
+    }
+
+    /**
+     * Helper method to return a dictionary res id for a locale, or 0 if none.
+     * @param locale dictionary locale
+     * @return main dictionary resource id
+     */
+    public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res,
+            final Locale locale) {
+        int resId;
+        // Try to find main_language_country dictionary.
+        if (!locale.getCountry().isEmpty()) {
+            final String dictLanguageCountry =
+                    MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT);
+            if ((resId = res.getIdentifier(
+                    dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
+                return resId;
+            }
+        }
+
+        // Try to find main_language dictionary.
+        final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage();
+        if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
+            return resId;
+        }
+
+        // Not found, return 0
+        return 0;
+    }
+
+    /**
+     * Returns a main dictionary resource id
+     * @param locale dictionary locale
+     * @return main dictionary resource id
+     */
+    public static int getMainDictionaryResourceId(final Resources res, final Locale locale) {
+        int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale);
+        if (0 != resourceId) return resourceId;
+        return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME);
+    }
+
+    /**
+     * Returns the id associated with the main word list for a specified locale.
+     *
+     * Word lists stored in Android Keyboard's resources are referred to as the "main"
+     * word lists. Since they can be updated like any other list, we need to assign a
+     * unique ID to them. This ID is just the name of the language (locale-wise) they
+     * are for, and this method returns this ID.
+     */
+    public static String getMainDictId(final Locale locale) {
+        // This works because we don't include by default different dictionaries for
+        // different countries. This actually needs to return the id that we would
+        // like to use for word lists included in resources, and the following is okay.
+        return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY +
+                BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
+    }
+
+    public static FileHeader getDictionaryFileHeaderOrNull(final File file) {
+        try {
+            final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeader(file);
+            return header;
+        } catch (UnsupportedFormatException e) {
+            return null;
+        } catch (IOException e) {
+            return null;
+        }
+    }
+}
author	Jean Chalard <jchalard@google.com>	2013-01-29 18:05:33 +0900
committer	Jean Chalard <jchalard@google.com>	2013-01-29 19:16:21 +0900
commit	d515f134f726c432c0bab5600e7b31ed989fb1b5 (patch)
tree	f11ed68872c7324e27d9144ab497a77615f0cc3a /java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java
parent	19e13df91f36cf131f5139c44d7b80fc1e8f6f21 (diff)
download	latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.tar.gz latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.tar.xz latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.zip