diff options
author | 2013-01-29 18:05:33 +0900 | |
---|---|---|
committer | 2013-01-29 19:16:21 +0900 | |
commit | d515f134f726c432c0bab5600e7b31ed989fb1b5 (patch) | |
tree | f11ed68872c7324e27d9144ab497a77615f0cc3a /java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java | |
parent | 19e13df91f36cf131f5139c44d7b80fc1e8f6f21 (diff) | |
download | latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.tar.gz latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.tar.xz latinime-d515f134f726c432c0bab5600e7b31ed989fb1b5.zip |
Refactoring
This creates a new DictionaryInfoUtils class and moves a bunch
of static methods there for later usage.
Change-Id: Iecb0643e6029a7be36bd6cb36aa918c40e6d8c6a
Diffstat (limited to 'java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java b/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java new file mode 100644 index 000000000..c676bf1b9 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/DictionaryInfoUtils.java @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import android.content.Context; +import android.content.res.Resources; +import android.util.Log; + +import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + +import java.io.File; +import java.io.IOException; +import java.util.Locale; + +/** + * This class encapsulates the logic for the Latin-IME side of dictionary information management. + */ +public class DictionaryInfoUtils { + private static final String TAG = DictionaryInfoUtils.class.getSimpleName(); + // This class must be located in the same package as LatinIME.java. + private static final String RESOURCE_PACKAGE_NAME = + DictionaryInfoUtils.class.getPackage().getName(); + private static final String DEFAULT_MAIN_DICT = "main"; + private static final String MAIN_DICT_PREFIX = "main_"; + // 6 digits - unicode is limited to 21 bits + private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6; + + private DictionaryInfoUtils() { + // Private constructor to forbid instantation of this helper class. + } + + /** + * Returns whether we may want to use this character as part of a file name. + * + * This basically only accepts ascii letters and numbers, and rejects everything else. + */ + private static boolean isFileNameCharacter(int codePoint) { + if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit + if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase + if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase + return codePoint == '_'; // Underscore + } + + /** + * Escapes a string for any characters that may be suspicious for a file or directory name. + * + * Concretely this does a sort of URL-encoding except it will encode everything that's not + * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which + * we cannot allow here) + */ + // TODO: create a unit test for this method + public static String replaceFileNameDangerousCharacters(final String name) { + // This assumes '%' is fully available as a non-separator, normal + // character in a file name. This is probably true for all file systems. + final StringBuilder sb = new StringBuilder(); + final int nameLength = name.length(); + for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { + final int codePoint = name.codePointAt(i); + if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) { + sb.appendCodePoint(codePoint); + } else { + sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", + codePoint)); + } + } + return sb.toString(); + } + + /** + * Helper method to get the top level cache directory. + */ + private static String getWordListCacheDirectory(final Context context) { + return context.getFilesDir() + File.separator + "dicts"; + } + + /** + * Reverse escaping done by replaceFileNameDangerousCharacters. + */ + public static String getWordListIdFromFileName(final String fname) { + final StringBuilder sb = new StringBuilder(); + final int fnameLength = fname.length(); + for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { + final int codePoint = fname.codePointAt(i); + if ('%' != codePoint) { + sb.appendCodePoint(codePoint); + } else { + // + 1 to pass the % sign + final int encodedCodePoint = Integer.parseInt( + fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16); + i += MAX_HEX_DIGITS_FOR_CODEPOINT; + sb.appendCodePoint(encodedCodePoint); + } + } + return sb.toString(); + } + + /** + * Helper method to the list of cache directories, one for each distinct locale. + */ + public static File[] getCachedDirectoryList(final Context context) { + return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles(); + } + + /** + * Returns the category for a given file name. + * + * This parses the file name, extracts the category, and returns it. See + * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. + * @return The category as a string or null if it can't be found in the file name. + */ + public static String getCategoryFromFileName(final String fileName) { + final String id = getWordListIdFromFileName(fileName); + final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); + // An id is supposed to be in format category:locale, so splitting on the separator + // should yield a 2-elements array + if (2 != idArray.length) return null; + return idArray[0]; + } + + /** + * Find out the cache directory associated with a specific locale. + */ + private static String getCacheDirectoryForLocale(final String locale, final Context context) { + final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); + final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator + + relativeDirectoryName; + final File directory = new File(absoluteDirectoryName); + if (!directory.exists()) { + if (!directory.mkdirs()) { + Log.e(TAG, "Could not create the directory for locale" + locale); + } + } + return absoluteDirectoryName; + } + + /** + * Generates a file name for the id and locale passed as an argument. + * + * In the current implementation the file name returned will always be unique for + * any id/locale pair, but please do not expect that the id can be the same for + * different dictionaries with different locales. An id should be unique for any + * dictionary. + * The file name is pretty much an URL-encoded version of the id inside a directory + * named like the locale, except it will also escape characters that look dangerous + * to some file systems. + * @param id the id of the dictionary for which to get a file name + * @param locale the locale for which to get the file name as a string + * @param context the context to use for getting the directory + * @return the name of the file to be created + */ + public static String getCacheFileName(String id, String locale, Context context) { + final String fileName = replaceFileNameDangerousCharacters(id); + return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; + } + + public static boolean isMainWordListId(final String id) { + final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); + // An id is supposed to be in format category:locale, so splitting on the separator + // should yield a 2-elements array + if (2 != idArray.length) return false; + return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); + } + + /** + * Helper method to return a dictionary res id for a locale, or 0 if none. + * @param locale dictionary locale + * @return main dictionary resource id + */ + public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, + final Locale locale) { + int resId; + // Try to find main_language_country dictionary. + if (!locale.getCountry().isEmpty()) { + final String dictLanguageCountry = + MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT); + if ((resId = res.getIdentifier( + dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) { + return resId; + } + } + + // Try to find main_language dictionary. + final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage(); + if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) { + return resId; + } + + // Not found, return 0 + return 0; + } + + /** + * Returns a main dictionary resource id + * @param locale dictionary locale + * @return main dictionary resource id + */ + public static int getMainDictionaryResourceId(final Resources res, final Locale locale) { + int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale); + if (0 != resourceId) return resourceId; + return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME); + } + + /** + * Returns the id associated with the main word list for a specified locale. + * + * Word lists stored in Android Keyboard's resources are referred to as the "main" + * word lists. Since they can be updated like any other list, we need to assign a + * unique ID to them. This ID is just the name of the language (locale-wise) they + * are for, and this method returns this ID. + */ + public static String getMainDictId(final Locale locale) { + // This works because we don't include by default different dictionaries for + // different countries. This actually needs to return the id that we would + // like to use for word lists included in resources, and the following is okay. + return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY + + BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); + } + + public static FileHeader getDictionaryFileHeaderOrNull(final File file) { + try { + final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeader(file); + return header; + } catch (UnsupportedFormatException e) { + return null; + } catch (IOException e) { + return null; + } + } +} |