diff options
58 files changed, 1752 insertions, 293 deletions
diff --git a/common/Android.mk b/common/Android.mk index 085543f75..132a22358 100644 --- a/common/Android.mk +++ b/common/Android.mk @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -LOCAL_PATH:= $(call my-dir) +LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE := latinime-common LOCAL_SRC_FILES := $(call all-java-files-under, src) diff --git a/common/src/com/android/inputmethod/latin/common/LocaleUtils.java b/common/src/com/android/inputmethod/latin/common/LocaleUtils.java index 14b3d220d..7f2333be5 100644 --- a/common/src/com/android/inputmethod/latin/common/LocaleUtils.java +++ b/common/src/com/android/inputmethod/latin/common/LocaleUtils.java @@ -17,8 +17,12 @@ package com.android.inputmethod.latin.common; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + /** * A class to help with handling Locales in string form. * @@ -160,26 +164,49 @@ public final class LocaleUtils { /** * Creates a locale from a string specification. + * @param localeString a string specification of a locale, in a format of "ll_cc_variant" where + * "ll" is a language code, "cc" is a country code. */ - public static Locale constructLocaleFromString(final String localeStr) { - if (localeStr == null) + @Nullable + public static Locale constructLocaleFromString(@Nullable final String localeString) { + if (localeString == null) { return null; + } synchronized (sLocaleCache) { - if (sLocaleCache.containsKey(localeStr)) - return sLocaleCache.get(localeStr); - Locale retval = null; - String[] localeParams = localeStr.split("_", 3); - if (localeParams.length == 1) { - retval = new Locale(localeParams[0]); - } else if (localeParams.length == 2) { - retval = new Locale(localeParams[0], localeParams[1]); - } else if (localeParams.length == 3) { - retval = new Locale(localeParams[0], localeParams[1], localeParams[2]); + if (sLocaleCache.containsKey(localeString)) { + return sLocaleCache.get(localeString); } - if (retval != null) { - sLocaleCache.put(localeStr, retval); + final String[] elements = localeString.split("_", 3); + final Locale locale; + if (elements.length == 1) { + locale = new Locale(elements[0] /* language */); + } else if (elements.length == 2) { + locale = new Locale(elements[0] /* language */, elements[1] /* country */); + } else { // localeParams.length == 3 + locale = new Locale(elements[0] /* language */, elements[1] /* country */, + elements[2] /* variant */); } - return retval; + sLocaleCache.put(localeString, locale); + return locale; } } + + // TODO: Get this information from the framework instead of maintaining here by ourselves. + private static final HashSet<String> sRtlLanguageCodes = new HashSet<>(); + static { + // List of known Right-To-Left language codes. + sRtlLanguageCodes.add("ar"); // Arabic + sRtlLanguageCodes.add("fa"); // Persian + sRtlLanguageCodes.add("iw"); // Hebrew + sRtlLanguageCodes.add("ku"); // Kurdish + sRtlLanguageCodes.add("ps"); // Pashto + sRtlLanguageCodes.add("sd"); // Sindhi + sRtlLanguageCodes.add("ug"); // Uyghur + sRtlLanguageCodes.add("ur"); // Urdu + sRtlLanguageCodes.add("yi"); // Yiddish + } + + public static boolean isRtlLanguage(@Nonnull final Locale locale) { + return sRtlLanguageCodes.contains(locale.getLanguage()); + } } diff --git a/java/Android.mk b/java/Android.mk index a2c5697d3..b84b3471e 100644 --- a/java/Android.mk +++ b/java/Android.mk @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -LOCAL_PATH:= $(call my-dir) +LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE_TAGS := optional diff --git a/java/res/values-eu-rES/strings-emoji-descriptions.xml b/java/res/values-eu-rES/strings-emoji-descriptions.xml index 2faec968a..c774ae1b8 100644 --- a/java/res/values-eu-rES/strings-emoji-descriptions.xml +++ b/java/res/values-eu-rES/strings-emoji-descriptions.xml @@ -846,6 +846,6 @@ <string name="spoken_emoji_1F6C1" msgid="2845056048320031158">"Bainuontzia"</string> <string name="spoken_emoji_1F6C2" msgid="8117262514698011877">"Pasaporte-kontrola"</string> <string name="spoken_emoji_1F6C3" msgid="1176342001834630675">"Aduana"</string> - <string name="spoken_emoji_1F6C4" msgid="1477622834179978886">"Maleta-erreklamazioa"</string> + <string name="spoken_emoji_1F6C4" msgid="1477622834179978886">"Ekipaje-erreklamazioa"</string> <string name="spoken_emoji_1F6C5" msgid="2495834050856617451">"Ahaztutako maletak"</string> </resources> diff --git a/java/res/values-km-rKH/strings-emoji-descriptions.xml b/java/res/values-km-rKH/strings-emoji-descriptions.xml index 757df50e7..e9b8780a5 100644 --- a/java/res/values-km-rKH/strings-emoji-descriptions.xml +++ b/java/res/values-km-rKH/strings-emoji-descriptions.xml @@ -267,7 +267,7 @@ <string name="spoken_emoji_1F36A" msgid="2726271795913042295">"ខូគី"</string> <string name="spoken_emoji_1F36B" msgid="6342163604299875931">"សូកូឡា"</string> <string name="spoken_emoji_1F36C" msgid="2168934753998218790">"ស្ករគ្រាប់"</string> - <string name="spoken_emoji_1F36D" msgid="3671507903799975792">"ស្ករគ្រាប់មានដងកាន់"</string> + <string name="spoken_emoji_1F36D" msgid="3671507903799975792">"Lollipop"</string> <string name="spoken_emoji_1F36E" msgid="4630541402785165902">"សង់ខ្យា"</string> <string name="spoken_emoji_1F36F" msgid="5577915387425169439">"ថូ"</string> <string name="spoken_emoji_1F370" msgid="7243244547866114951">"នំខេក"</string> diff --git a/java/res/values-pt/strings.xml b/java/res/values-pt/strings.xml index 2e0cd3b55..2cc86e12f 100644 --- a/java/res/values-pt/strings.xml +++ b/java/res/values-pt/strings.xml @@ -102,8 +102,7 @@ <string name="subtype_with_layout_hi_ZZ" msgid="6827402953860547044">"Híndi-inglês (<xliff:g id="KEYBOARD_LAYOUT">%s</xliff:g>)"</string> <string name="subtype_with_layout_sr_ZZ" msgid="2859024772719772407">"Sérvio (<xliff:g id="KEYBOARD_LAYOUT">%s</xliff:g>)"</string> <string name="subtype_generic_traditional" msgid="8584594350973800586">"<xliff:g id="LANGUAGE_NAME">%s</xliff:g> (tradicional)"</string> - <!-- no translation found for subtype_generic_compact (3353673321203202922) --> - <skip /> + <string name="subtype_generic_compact" msgid="3353673321203202922">"<xliff:g id="LANGUAGE_NAME">%s</xliff:g> (compacto)"</string> <string name="subtype_no_language" msgid="7137390094240139495">"Nenhum idioma (alfabeto)"</string> <string name="subtype_no_language_qwerty" msgid="244337630616742604">"Alfabeto (QWERTY)"</string> <string name="subtype_no_language_qwertz" msgid="443066912507547976">"Alfabeto (QWERTZ)"</string> diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardLayoutSet.java b/java/src/com/android/inputmethod/keyboard/KeyboardLayoutSet.java index 51f37fdc6..b1051385d 100644 --- a/java/src/com/android/inputmethod/keyboard/KeyboardLayoutSet.java +++ b/java/src/com/android/inputmethod/keyboard/KeyboardLayoutSet.java @@ -294,7 +294,7 @@ public final class KeyboardLayoutSet { : subtype; mParams.mSubtype = keyboardSubtype; mParams.mKeyboardLayoutSetName = KEYBOARD_LAYOUT_SET_RESOURCE_PREFIX - + SubtypeLocaleUtils.getKeyboardLayoutSetName(keyboardSubtype); + + keyboardSubtype.getKeyboardLayoutSetName(); return this; } diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java index c739bf3e0..51f89c122 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java @@ -36,7 +36,6 @@ import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.common.Constants; import com.android.inputmethod.latin.common.StringUtils; import com.android.inputmethod.latin.utils.ResourceUtils; -import com.android.inputmethod.latin.utils.SubtypeLocaleUtils; import com.android.inputmethod.latin.utils.XmlParseUtils; import com.android.inputmethod.latin.utils.XmlParseUtils.ParseException; @@ -648,7 +647,7 @@ public class KeyboardBuilder<KP extends KeyboardParams> { try { final boolean keyboardLayoutSetMatched = matchString(caseAttr, R.styleable.Keyboard_Case_keyboardLayoutSet, - SubtypeLocaleUtils.getKeyboardLayoutSetName(id.mSubtype)); + id.mSubtype.getKeyboardLayoutSetName()); final boolean keyboardLayoutSetElementMatched = matchTypedValue(caseAttr, R.styleable.Keyboard_Case_keyboardLayoutSetElement, id.mElementId, KeyboardId.elementIdToName(id.mElementId)); diff --git a/java/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelper.java b/java/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelper.java index 21eaed950..8ed80107a 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelper.java +++ b/java/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelper.java @@ -25,6 +25,8 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import javax.annotation.Nonnull; + /** * This class determines that the language name on the spacebar should be displayed in what format. */ @@ -37,7 +39,7 @@ public final class LanguageOnSpacebarHelper { private List<InputMethodSubtype> mEnabledSubtypes = Collections.emptyList(); private boolean mIsSystemLanguageSameAsInputLanguage; - public int getLanguageOnSpacebarFormatType(final RichInputMethodSubtype subtype) { + public int getLanguageOnSpacebarFormatType(@Nonnull final RichInputMethodSubtype subtype) { if (subtype.isNoLanguage()) { return FORMAT_TYPE_FULL_LOCALE; } @@ -50,7 +52,7 @@ public final class LanguageOnSpacebarHelper { return FORMAT_TYPE_MULTIPLE; } final String keyboardLanguage = locales[0].getLanguage(); - final String keyboardLayout = SubtypeLocaleUtils.getKeyboardLayoutSetName(subtype); + final String keyboardLayout = subtype.getKeyboardLayoutSetName(); int sameLanguageAndLayoutCount = 0; for (final InputMethodSubtype ims : mEnabledSubtypes) { final String language = SubtypeLocaleUtils.getSubtypeLocale(ims).getLanguage(); @@ -65,11 +67,30 @@ public final class LanguageOnSpacebarHelper { : FORMAT_TYPE_LANGUAGE_ONLY; } - public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { + public void onUpdateEnabledSubtypes(@Nonnull final List<InputMethodSubtype> enabledSubtypes) { mEnabledSubtypes = enabledSubtypes; } - public void updateIsSystemLanguageSameAsInputLanguage(final boolean isSame) { - mIsSystemLanguageSameAsInputLanguage = isSame; + public void onSubtypeChanged(@Nonnull final RichInputMethodSubtype subtype, + final boolean implicitlyEnabledSubtype, @Nonnull final Locale systemLocale) { + final Locale[] newLocales = subtype.getLocales(); + if (newLocales.length > 1) { + // In multi-locales mode, the system language is never the same as the input language + // because there is no single input language. + mIsSystemLanguageSameAsInputLanguage = false; + return; + } + final Locale newLocale = newLocales[0]; + if (systemLocale.equals(newLocale)) { + mIsSystemLanguageSameAsInputLanguage = true; + return; + } + if (!systemLocale.getLanguage().equals(newLocale.getLanguage())) { + mIsSystemLanguageSameAsInputLanguage = false; + return; + } + // If the subtype is enabled explicitly, the language name should be displayed even when + // the keyboard language and the system language are equal. + mIsSystemLanguageSameAsInputLanguage = implicitlyEnabledSubtype; } } diff --git a/java/src/com/android/inputmethod/latin/RichInputMethodSubtype.java b/java/src/com/android/inputmethod/latin/RichInputMethodSubtype.java index 03f6d60ab..ea8d4a210 100644 --- a/java/src/com/android/inputmethod/latin/RichInputMethodSubtype.java +++ b/java/src/com/android/inputmethod/latin/RichInputMethodSubtype.java @@ -135,13 +135,18 @@ public final class RichInputMethodSubtype { public boolean isRtlSubtype() { // The subtype is considered RTL if the language of the main subtype is RTL. - return SubtypeLocaleUtils.isRtlLanguage(mLocales[0]); + return LocaleUtils.isRtlLanguage(mLocales[0]); } // TODO: remove this method @Nonnull public InputMethodSubtype getRawSubtype() { return mSubtype; } + @Nonnull + public String getKeyboardLayoutSetName() { + return SubtypeLocaleUtils.getKeyboardLayoutSetName(mSubtype); + } + // Dummy no language QWERTY subtype. See {@link R.xml.method}. private static final int SUBTYPE_ID_OF_DUMMY_NO_LANGUAGE_SUBTYPE = 0xdde0bfd3; private static final String EXTRA_VALUE_OF_DUMMY_NO_LANGUAGE_SUBTYPE = diff --git a/java/src/com/android/inputmethod/latin/SubtypeSwitcher.java b/java/src/com/android/inputmethod/latin/SubtypeSwitcher.java index b2766e27a..23e348bff 100644 --- a/java/src/com/android/inputmethod/latin/SubtypeSwitcher.java +++ b/java/src/com/android/inputmethod/latin/SubtypeSwitcher.java @@ -24,7 +24,6 @@ import com.android.inputmethod.keyboard.internal.LanguageOnSpacebarHelper; import com.android.inputmethod.latin.utils.SubtypeLocaleUtils; import java.util.List; -import java.util.Locale; import javax.annotation.Nonnull; @@ -69,28 +68,17 @@ public final class SubtypeSwitcher { public void updateParametersOnStartInputView() { final List<InputMethodSubtype> enabledSubtypesOfThisIme = mRichImm.getMyEnabledInputMethodSubtypeList(true); - mLanguageOnSpacebarHelper.updateEnabledSubtypes(enabledSubtypesOfThisIme); + mLanguageOnSpacebarHelper.onUpdateEnabledSubtypes(enabledSubtypesOfThisIme); mRichImm.updateShortcutIME(); } // Update the current subtype. LatinIME.onCurrentInputMethodSubtypeChanged calls this function. public void onSubtypeChanged(@Nonnull final InputMethodSubtype newSubtype) { final RichInputMethodSubtype richSubtype = mRichImm.onSubtypeChanged(newSubtype); - final Locale[] newLocales = richSubtype.getLocales(); - if (newLocales.length > 1) { - // In multi-locales mode, the system language is never the same as the input language - // because there is no single input language. - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(false); - } else { - final Locale newLocale = newLocales[0]; - final Locale systemLocale = mResources.getConfiguration().locale; - final boolean sameLocale = systemLocale.equals(newLocale); - final boolean sameLanguage = systemLocale.getLanguage().equals(newLocale.getLanguage()); - final boolean implicitlyEnabled = mRichImm - .checkIfSubtypeBelongsToThisImeAndImplicitlyEnabled(newSubtype); - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage( - sameLocale || (sameLanguage && implicitlyEnabled)); - } + final boolean implicitlyEnabledSubtype = mRichImm + .checkIfSubtypeBelongsToThisImeAndImplicitlyEnabled(newSubtype); + mLanguageOnSpacebarHelper.onSubtypeChanged( + richSubtype, implicitlyEnabledSubtype, mResources.getConfiguration().locale); mRichImm.updateShortcutIME(); } diff --git a/java/src/com/android/inputmethod/latin/settings/CustomInputStylePreference.java b/java/src/com/android/inputmethod/latin/settings/CustomInputStylePreference.java index 01398f467..b749aa51a 100644 --- a/java/src/com/android/inputmethod/latin/settings/CustomInputStylePreference.java +++ b/java/src/com/android/inputmethod/latin/settings/CustomInputStylePreference.java @@ -346,8 +346,10 @@ final class CustomInputStylePreference extends DialogPreference super(context, android.R.layout.simple_spinner_item); setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item); + final String[] predefinedKeyboardLayoutSet = context.getResources().getStringArray( + R.array.predefined_layouts); // TODO: Should filter out already existing combinations of locale and layout. - for (final String layout : SubtypeLocaleUtils.getPredefinedKeyboardLayoutSet()) { + for (final String layout : predefinedKeyboardLayoutSet) { // This is a dummy subtype with NO_LANGUAGE, only for display. final InputMethodSubtype subtype = AdditionalSubtypeUtils.createDummyAdditionalSubtype( diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java index 27a0f62ff..7991a2473 100644 --- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java +++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java @@ -50,10 +50,10 @@ import com.android.inputmethod.latin.PunctuationSuggestions; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.SuggestedWords; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.common.LocaleUtils; import com.android.inputmethod.latin.settings.Settings; import com.android.inputmethod.latin.settings.SettingsValues; import com.android.inputmethod.latin.utils.ResourceUtils; -import com.android.inputmethod.latin.utils.SubtypeLocaleUtils; import com.android.inputmethod.latin.utils.ViewLayoutUtils; import java.util.ArrayList; @@ -570,8 +570,7 @@ final class SuggestionStripLayoutHelper { final boolean isRtlLanguage = (ViewCompat.getLayoutDirection(addToDictionaryStrip) == ViewCompat.LAYOUT_DIRECTION_RTL); final String arrow = isRtlLanguage ? RIGHTWARDS_ARROW : LEFTWARDS_ARROW; - final boolean isRtlSystem = SubtypeLocaleUtils.isRtlLanguage( - res.getConfiguration().locale); + final boolean isRtlSystem = LocaleUtils.isRtlLanguage(res.getConfiguration().locale); final CharSequence hint = res.getText(R.string.hint_add_to_dictionary); hintText = (isRtlLanguage == isRtlSystem) ? (arrow + hint) : (hint + arrow); hintWidth = width - wordWidth; diff --git a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java index b36168b6c..013f024c0 100644 --- a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java @@ -27,11 +27,9 @@ import android.util.Log; import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.latin.R; -import com.android.inputmethod.latin.RichInputMethodSubtype; import com.android.inputmethod.latin.common.LocaleUtils; import com.android.inputmethod.latin.common.StringUtils; -import java.util.Arrays; import java.util.HashMap; import java.util.Locale; @@ -56,7 +54,6 @@ public final class SubtypeLocaleUtils { private static volatile boolean sInitialized = false; private static final Object sInitializeLock = new Object(); private static Resources sResources; - private static String[] sPredefinedKeyboardLayoutSet; // Keyboard layout to its display name map. private static final HashMap<String, String> sKeyboardLayoutToDisplayNameMap = new HashMap<>(); // Keyboard layout to subtype name resource id map. @@ -103,7 +100,6 @@ public final class SubtypeLocaleUtils { sResources = res; final String[] predefinedLayoutSet = res.getStringArray(R.array.predefined_layouts); - sPredefinedKeyboardLayoutSet = predefinedLayoutSet; final String[] layoutDisplayNames = res.getStringArray( R.array.predefined_layout_display_names); for (int i = 0; i < predefinedLayoutSet.length; i++) { @@ -152,10 +148,6 @@ public final class SubtypeLocaleUtils { } } - public static String[] getPredefinedKeyboardLayoutSet() { - return sPredefinedKeyboardLayoutSet; - } - public static boolean isExceptionalLocale(final String localeString) { return sExceptionalLocaleToNameIdsMap.containsKey(localeString); } @@ -334,10 +326,6 @@ public final class SubtypeLocaleUtils { } @Nonnull - public static String getKeyboardLayoutSetName(@Nonnull final RichInputMethodSubtype subtype) { - return getKeyboardLayoutSetName(subtype.getRawSubtype()); - } - public static String getKeyboardLayoutSetName(final InputMethodSubtype subtype) { String keyboardLayoutSet = subtype.getExtraValueOf(KEYBOARD_LAYOUT_SET); if (keyboardLayoutSet == null) { @@ -357,22 +345,6 @@ public final class SubtypeLocaleUtils { return keyboardLayoutSet; } - // TODO: Get this information from the framework instead of maintaining here by ourselves. - // Sorted list of known Right-To-Left language codes. - private static final String[] SORTED_RTL_LANGUAGES = { - "ar", // Arabic - "fa", // Persian - "iw", // Hebrew - }; - static { - Arrays.sort(SORTED_RTL_LANGUAGES); - } - - public static boolean isRtlLanguage(final Locale locale) { - final String language = locale.getLanguage(); - return Arrays.binarySearch(SORTED_RTL_LANGUAGES, language) >= 0; - } - public static String getCombiningRulesExtraValue(final InputMethodSubtype subtype) { return subtype.getExtraValueOf(COMBINING_RULES); } diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk index b6be9c541..1c004f73a 100644 --- a/native/dicttoolkit/NativeFileList.mk +++ b/native/dicttoolkit/NativeFileList.mk @@ -15,7 +15,23 @@ LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := \ dict_toolkit_main.cpp -LATIN_IME_DICT_TOOLKIT_SRC_FILES := +LATIN_IME_DICT_TOOLKIT_SRC_FILES := \ + $(addprefix command_executors/, \ + diff_executor.cpp \ + header_executor.cpp \ + help_executor.cpp \ + info_executor.cpp \ + makedict_executor.cpp) \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict.cpp) \ + $(addprefix utils/, \ + command_utils.cpp \ + utf8_utils.cpp) LATIN_IME_DICT_TOOLKIT_TEST_FILES := \ - dict_toolkit_defines_test.cpp + dict_toolkit_defines_test.cpp \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict_test.cpp) \ + $(addprefix utils/, \ + command_utils_test.cpp \ + utf8_utils_test.cpp) diff --git a/native/dicttoolkit/UnitTests.mk b/native/dicttoolkit/UnitTests.mk index d568db44a..96e28730e 100644 --- a/native/dicttoolkit/UnitTests.mk +++ b/native/dicttoolkit/UnitTests.mk @@ -46,6 +46,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) include $(CLEAR_VARS) # TODO: Remove -std=c++11 once it is set by default on host build. +LOCAL_ADDRESS_SANITIZER := true LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function LOCAL_CLANG := true LOCAL_CXX_STL := libc++ diff --git a/native/dicttoolkit/dict_toolkit_main.cpp b/native/dicttoolkit/dict_toolkit_main.cpp index d71b50eb4..53cc5e915 100644 --- a/native/dicttoolkit/dict_toolkit_main.cpp +++ b/native/dicttoolkit/dict_toolkit_main.cpp @@ -16,8 +16,24 @@ #include <cstdio> +#include "dict_toolkit_defines.h" +#include "utils/command_utils.h" + +void usage(int argc, char **argv) { + fprintf(stderr, "Usage: %s <command> [arguments]\n", argc > 0 ? argv[0] : "dicttoolkit"); +} + int main(int argc, char **argv) { - // TODO: Implement. - printf("%s\n", argv[0]); - return 0; + if (argc < MIN_ARG_COUNT) { + usage(argc, argv); + return 1; + } + using namespace latinime::dicttoolkit; + const CommandType commandType = CommandUtils::getCommandType(argv[1]); + if (commandType == CommandType::Unknown) { + CommandUtils::printCommandUnknownMessage(argv[0], argv[1]); + return 1; + } + const auto executor = CommandUtils::getCommandExecutor(commandType); + return executor(argc - 1, argv + 1); } diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp new file mode 100644 index 000000000..077a40090 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/diff_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const DiffExecutor::COMMAND_NAME = "diff"; + +/* static */ int DiffExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void DiffExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Shows differences between two dictionaries.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h new file mode 100644 index 000000000..fc8dc0d8f --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class DiffExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp new file mode 100644 index 000000000..068a62c31 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/header_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const HeaderExecutor::COMMAND_NAME = "header"; + +/* static */ int HeaderExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void HeaderExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Prints the header contents of a dictionary file.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h new file mode 100644 index 000000000..4cdeb1a99 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class HeaderExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp new file mode 100644 index 000000000..bd29a5b16 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/help_executor.h" + +#include <cstdio> +#include <functional> +#include <vector> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" +#include "utils/command_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const char *const HelpExecutor::COMMAND_NAME = "help"; + +/* static */ int HelpExecutor::run(const int argc, char **argv) { + printf("Available commands:\n\n"); + const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage, + HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage, + printUsage}; + for (const auto &printUsageMethod : printUsageMethods) { + printUsageMethod(); + } + return 0; +} + +/* static */ void HelpExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Show this help list.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h new file mode 100644 index 000000000..280610eb9 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class HelpExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp new file mode 100644 index 000000000..c4d84cab3 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/info_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const InfoExecutor::COMMAND_NAME = "info"; + +/* static */ int InfoExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void InfoExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Prints various information about a dictionary file.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h new file mode 100644 index 000000000..4ffa74fb0 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class InfoExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor); +}; + +} // namepsace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp new file mode 100644 index 000000000..ea62e3c37 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/makedict_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const MakedictExecutor::COMMAND_NAME = "makedict"; + +/* static */ int MakedictExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void MakedictExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Converts a source dictionary file to one or several outputs.\n" + "Source can be a binary dictionary file or a combined format file.\n" + "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h new file mode 100644 index 000000000..ae1309f60 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class MakedictExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h index 2a2104e26..dbaae0ca0 100644 --- a/native/dicttoolkit/src/dict_toolkit_defines.h +++ b/native/dicttoolkit/src/dict_toolkit_defines.h @@ -19,4 +19,6 @@ #include "defines.h" +#define MIN_ARG_COUNT 2 + #endif // LATINIME_DICT_TOOLKIT_DEFINES_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp new file mode 100644 index 000000000..af28131cf --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" + +namespace latinime { +namespace dicttoolkit { + +bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { + return false; + } + return addWordInner(codePoints, wordProperty, mRootPtNodeArray); +} + +bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, + const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { + auto ptNodeList = ptNodeArray.getMutablePtNodeList(); + auto ptNodeIt = ptNodeList->begin(); + for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { + const auto &ptNode = *ptNodeIt; + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[0] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[0] > ptNodeCodePoints[0]) { + break; + } + size_t i = 1; + for (; i < codePoints.size(); ++i) { + if (i >= ptNodeCodePoints.size()) { + // Add new child. + return addWordInner(codePoints.skip(i), wordProperty, + ptNode->getChildrenPtNodeArray()); + } + if (codePoints[i] != ptNodeCodePoints[i]) { + break; + } + } + if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { + // All code points matched. + if (ptNode->getWordProperty()) { + // Adding the same word multiple times is not supported. + return false; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode)); + ptNodeList->erase(ptNodeIt); + return true; + } + // The (i+1)-th elements are different. + // Create and Add new parent ptNode for the common part. + auto newPtNode = codePoints.size() == i + ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty) + : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i)); + ptNodeList->insert(ptNodeIt, newPtNode); + OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = + newPtNode->getChildrenPtNodeArray(); + // Add new child for the existing ptNode. + childrenPtNodeArray.getMutablePtNodeList()->push_back( + std::make_shared<OffdeviceIntermediateDictPtNode>( + ptNodeCodePoints.skip(i), *ptNode)); + ptNodeList->erase(ptNodeIt); + if (codePoints.size() != i) { + // Add a child for the new word. + return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); + } + return true; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)); + return true; +} + +const WordProperty *OffdeviceIntermediateDict::getWordProperty( + const CodePointArrayView codePoints) const { + const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; + for (size_t i = 0; i < codePoints.size();) { + bool foundNext = false; + for (const auto ptNode : ptNodeArray->getPtNodeList()) { + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[i] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[i] > ptNodeCodePoints[0] + || codePoints.size() < ptNodeCodePoints.size()) { + return nullptr; + } + for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { + if (codePoints[i + j] != ptNodeCodePoints[j]) { + return nullptr; + } + } + i += ptNodeCodePoints.size(); + if (i == codePoints.size()) { + return ptNode->getWordProperty(); + } + ptNodeArray = &ptNode->getChildrenPtNodeArray(); + foundNext = true; + break; + } + if (!foundNext) { + break; + } + } + return nullptr; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h new file mode 100644 index 000000000..13d26ba91 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +/** + * On memory patricia trie to represent a dictionary. + */ +class OffdeviceIntermediateDict final { + public: + OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header) + : mHeader(header), mRootPtNodeArray() {} + + bool addWord(const WordProperty &wordProperty); + // The returned value will be invalid after modifying the dictionary. e.g. calling addWord(). + const WordProperty *getWordProperty(const CodePointArrayView codePoints) const; + const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict); + + const OffdeviceIntermediateDictHeader mHeader; + OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray; + + bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty, + OffdeviceIntermediateDictPtNodeArray &ptNodeArray); +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h new file mode 100644 index 000000000..440627a79 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H + +#include <map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictHeader final { + public: + using AttributeMap = std::map<std::vector<int>, std::vector<int>>; + + OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap) + : mAttributeMap(attributesMap) {} + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader); + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader); + + const AttributeMap mAttributeMap; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h new file mode 100644 index 000000000..721ccd778 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H + +#include <memory> + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode final { + public: + // Non-terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(nullptr) {} + + // Terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const WordProperty &wordProperty) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(new WordProperty(wordProperty)) {} + + // Replacing PtNodeCodePoints. + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(*ptNode.mWortProperty)) {} + + // Replacing WordProperty. + OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNode.mPtNodeCodePoints), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(wordProperty)) {} + + const WordProperty *getWordProperty() const { + return mWortProperty.get(); + } + + const CodePointArrayView getPtNodeCodePoints() const { + return CodePointArrayView(mPtNodeCodePoints); + } + + OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() { + return mChildrenPtNodeArray; + } + + private: + DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode); + + const std::vector<int> mPtNodeCodePoints; + OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray; + const std::unique_ptr<WordProperty> mWortProperty; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h new file mode 100644 index 000000000..f87456ce0 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H + +#include <list> +#include <memory> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode; + +class OffdeviceIntermediateDictPtNodeArray final { + public: + const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const { + return mPtNodes; + } + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() { + return &mPtNodes; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray); + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp new file mode 100644 index 000000000..34196425e --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <cstdio> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/help_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" + +namespace latinime { +namespace dicttoolkit { + +/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) { + if (commandName == InfoExecutor::COMMAND_NAME) { + return CommandType::Info; + } else if (commandName == DiffExecutor::COMMAND_NAME) { + return CommandType::Diff; + } else if (commandName == MakedictExecutor::COMMAND_NAME) { + return CommandType::Makedict; + } else if (commandName == HeaderExecutor::COMMAND_NAME) { + return CommandType::Header; + } else if (commandName == HelpExecutor::COMMAND_NAME) { + return CommandType::Help; + } else { + return CommandType::Unknown; + } +} + +/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName, + const std::string &commandName) { + fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n", + commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME); +} + +/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor( + const CommandType commandType) { + switch (commandType) { + case CommandType::Info: + return InfoExecutor::run; + case CommandType::Diff: + return DiffExecutor::run; + case CommandType::Makedict: + return MakedictExecutor::run; + case CommandType::Header: + return HeaderExecutor::run; + case CommandType::Help: + return HelpExecutor::run; + default: + return [] (int, char **) -> int { + printf("Command executor not found."); + return 1; + }; + } +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h new file mode 100644 index 000000000..4a181f194 --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H +#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H + +#include <functional> +#include <memory> +#include <string> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +enum class CommandType : int { + Info, + Diff, + Makedict, + Header, + Help, + Unknown +}; + +class CommandUtils { +public: + static CommandType getCommandType(const std::string &commandName); + static void printCommandUnknownMessage(const std::string &programName, + const std::string &commandName); + static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp new file mode 100644 index 000000000..0f349f512 --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include "utils/char_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4; +const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8}; +const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0}; +const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03}; +const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F; +const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80; +const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6; + +/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) { + std::vector<int> codePoints; + int remainingByteCountForCurrentCodePoint = 0; + int currentCodePointSequenceSize = 0; + int codePoint = 0; + for (const char c : utf8Str) { + if (remainingByteCountForCurrentCodePoint == 0) { + currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c); + if (currentCodePointSequenceSize <= 0) { + AKLOGE("%x is an invalid utf8 first byte value.", c); + return std::vector<int>(); + } + remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize; + codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint); + } else { + codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + codePoint += maskTrailingByte(c); + } + remainingByteCountForCurrentCodePoint--; + if (remainingByteCountForCurrentCodePoint == 0) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) { + AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.", + currentCodePointSequenceSize, codePoint); + return std::vector<int>(); + } + codePoints.push_back(codePoint); + } + } + return codePoints; +} + +/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) { + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) { + return i; + } + } + // Not a valid utf8 char first byte. + return -1; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte, + const int sequenceSize) { + return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize]; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) { + return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK; +} + +/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) { + std::string utf8String; + for (const int codePoint : codePoints) { + const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint); + if (sequenceSize <= 0) { + AKLOGE("Cannot encode code point (%d).", codePoint); + return std::string(); + } + const int trailingByteCount = sequenceSize - 1; + // Output first byte. + const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE); + utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize])); + // Output second and later bytes. + for (int i = 1; i < sequenceSize; ++i) { + const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK; + utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER)); + } + } + return utf8String; +} + +/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) { + if (codePoint < 0) { + return -1; + } + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) { + return i; + } + } + return -1; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h new file mode 100644 index 000000000..35818e56c --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H +#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H + +#include <cstdint> +#include <string> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class Utf8Utils { +public: + static std::vector<int> getCodePoints(const std::string &utf8Str); + static std::string getUtf8String(const CodePointArrayView codePoints); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils); + + // Values indexed by sequence size. + static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; + static const uint8_t FIRST_BYTE_MARKER_MASKS[]; + static const uint8_t FIRST_BYTE_MARKERS[]; + static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[]; + static const int MAX_ENCODED_CODE_POINT_VALUES[]; + + static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK; + static const uint8_t TRAILING_BYTE_MARKER; + static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + + static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte); + static int maskFirstByte(const uint8_t firstByte, const int encodeSize); + static int maskTrailingByte(const uint8_t secondOrLaterByte); + static int getSequenceSizeToEncodeCodePoint(const int codePoint); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp new file mode 100644 index 000000000..f2e24ab5f --- /dev/null +++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +const std::vector<int> getCodePointVector(const char *str) { + std::vector<int> codePoints; + while (*str) { + codePoints.push_back(*str); + ++str; + } + return codePoints; +} + +const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) { + return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>()); +} + +TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) { + OffdeviceIntermediateDict dict = OffdeviceIntermediateDict( + OffdeviceIntermediateDictHeader(OffdeviceIntermediateDictHeader::AttributeMap())); + EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView())); + + const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_TRUE(dict.addWord(wordProperty0)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + + const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh")); + EXPECT_TRUE(dict.addWord(wordProperty1)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + + const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab")); + EXPECT_TRUE(dict.addWord(wordProperty2)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + + const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg")); + EXPECT_TRUE(dict.addWord(wordProperty3)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + + const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef")); + EXPECT_TRUE(dict.addWord(wordProperty4)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + + const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef")); + EXPECT_TRUE(dict.addWord(wordProperty5)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); + + const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail."; + + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/command_utils_test.cpp b/native/dicttoolkit/tests/utils/command_utils_test.cpp new file mode 100644 index 000000000..9d79c9dd9 --- /dev/null +++ b/native/dicttoolkit/tests/utils/command_utils_test.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(CommandUtilsTests, TestGetCommandType) { + EXPECT_EQ(CommandUtils::getCommandType(""), CommandType::Unknown); + EXPECT_EQ(CommandUtils::getCommandType("abc"), CommandType::Unknown); + EXPECT_EQ(CommandUtils::getCommandType("info"), CommandType::Info); + EXPECT_EQ(CommandUtils::getCommandType("diff"), CommandType::Diff); + EXPECT_EQ(CommandUtils::getCommandType("makedict"), CommandType::Makedict); + EXPECT_EQ(CommandUtils::getCommandType("header"), CommandType::Header); + EXPECT_EQ(CommandUtils::getCommandType("help"), CommandType::Help); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/utf8_utils_test.cpp b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp new file mode 100644 index 000000000..9c59a8b05 --- /dev/null +++ b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(Utf8UtilsTests, TestGetCodePoints) { + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(""); + EXPECT_EQ(0u, codePoints.size()); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints("test"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ('t', codePoints[0]); + EXPECT_EQ('e', codePoints[1]); + EXPECT_EQ('s', codePoints[2]); + EXPECT_EQ('t', codePoints[3]); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A + EXPECT_EQ('a', codePoints[1]); + EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A + EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752"); + EXPECT_EQ(3u, codePoints.size()); + EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE + EXPECT_EQ('?', codePoints[1]); + EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT + } + + // Redundant UTF-8 sequences must be rejected. + EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty()); +} + +TEST(Utf8UtilsTests, TestGetUtf8String) { + { + const std::vector<int> codePoints = {'t', 'e', 's', 't'}; + EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } + { + const std::vector<int> codePoints = { + 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */, + 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */, + 0x0430 /* CYRILLIC SMALL LETTER A */, + 0x3042 /* HIRAGANA LETTER A */, + 0x1F36A /* COOKIE */, + 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */ + }; + EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752", + Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 118f600bb..9c065e0d1 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -35,6 +35,7 @@ #include "utils/int_array_view.h" #include "utils/jni_data_utils.h" #include "utils/log_utils.h" +#include "utils/profiler.h" #include "utils/time_keeper.h" namespace latinime { @@ -43,8 +44,8 @@ class ProximityInfo; static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { - PROF_OPEN; - PROF_START(66); + PROF_INIT; + PROF_TIMER_START(66); const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); if (sourceDirUtf8Length <= 0) { AKLOGE("DICT: Can't get sourceDir string"); @@ -63,8 +64,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s Dictionary *const dictionary = new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); - PROF_END(66); - PROF_CLOSE; + PROF_TIMER_END(66); return reinterpret_cast<jlong>(dictionary); } @@ -586,7 +586,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j } if (!dictionaryStructureWithBufferPolicy->addUnigramEntry( CodePointArrayView(wordCodePoints, wordCodePointCount), - wordProperty.getUnigramProperty())) { + &wordProperty.getUnigramProperty())) { LogUtils::logToJava(env, "Cannot add unigram to the new dict."); return false; } @@ -605,7 +605,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) { + for (const NgramProperty &ngramProperty : wordProperty.getNgramProperties()) { if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) { LogUtils::logToJava(env, "Cannot add ngram to the new dict."); return false; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 885118524..0e67b4d5a 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -23,10 +23,10 @@ #define AK_FORCE_INLINE inline #endif // __GNUC__ -#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#if defined(FLAG_DBG) #undef AK_FORCE_INLINE #define AK_FORCE_INLINE inline -#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#endif // defined(FLAG_DBG) // Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java #define MAX_WORD_LENGTH 48 @@ -172,69 +172,6 @@ static inline void showStackTrace() { #define INTS_TO_CHARS(input, length, output) #endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#ifdef FLAG_DO_PROFILE -// Profiler -#include <time.h> - -#define PROF_BUF_SIZE 100 -static float profile_buf[PROF_BUF_SIZE]; -static float profile_old[PROF_BUF_SIZE]; -static unsigned int profile_counter[PROF_BUF_SIZE]; - -#define PROF_RESET prof_reset() -#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] -#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0) -#define PROF_START(prof_buf_id) do { \ - PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0) -#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0) -#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) -#define PROF_CLOCKOUT(prof_buf_id) \ - AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) -#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0) - -static inline void prof_reset(void) { - for (int i = 0; i < PROF_BUF_SIZE; ++i) { - profile_buf[i] = 0; - profile_old[i] = 0; - profile_counter[i] = 0; - } -} - -static inline void prof_out(void) { - if (profile_counter[PROF_BUF_SIZE - 1] != 1) { - AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); - } - AKLOGI("Total time is %6.3f ms.", - profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); - float all = 0.0f; - for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - all += profile_buf[i]; - } - if (all < 1.0f) all = 1.0f; - for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - if (profile_buf[i] > 0.0f) { - AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", - i, (profile_buf[i] * 100.0f / all), - profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), - profile_counter[i]); - } - } -} - -#else // FLAG_DO_PROFILE -#define PROF_BUF_SIZE 0 -#define PROF_RESET -#define PROF_COUNT(prof_buf_id) -#define PROF_OPEN -#define PROF_START(prof_buf_id) -#define PROF_CLOSE -#define PROF_END(prof_buf_id) -#define PROF_CLOCK_OUT(prof_buf_id) -#define PROF_CLOCKOUT(prof_buf_id) -#define PROF_OUTALL - -#endif // FLAG_DO_PROFILE - #ifdef FLAG_DBG #define DEBUG_DICT true #define DEBUG_DICT_FULL false diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h index b5314faaa..d4db3f09f 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.h +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -23,6 +23,7 @@ #include "jni.h" #include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" +#include "utils/int_array_view.h" namespace latinime { @@ -33,10 +34,10 @@ class WordProperty { WordProperty() : mCodePoints(), mUnigramProperty(), mNgrams() {} - WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, - const std::vector<NgramProperty> *const ngrams) - : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), - mNgrams(*ngrams) {} + WordProperty(const std::vector<int> &&codePoints, const UnigramProperty &unigramProperty, + const std::vector<NgramProperty> &ngrams) + : mCodePoints(std::move(codePoints)), mUnigramProperty(unigramProperty), + mNgrams(ngrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, @@ -44,12 +45,16 @@ class WordProperty { jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; - const UnigramProperty *getUnigramProperty() const { - return &mUnigramProperty; + const CodePointArrayView getCodePoints() const { + return CodePointArrayView(mCodePoints); } - const std::vector<NgramProperty> *getNgramProperties() const { - return &mNgrams; + const UnigramProperty &getUnigramProperty() const { + return mUnigramProperty; + } + + const std::vector<NgramProperty> &getNgramProperties() const { + return mNgrams; } private: diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index c372d668b..e5e9b46bf 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -29,6 +29,7 @@ #include "suggest/core/result/suggestions_output_utils.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest_options.h" +#include "utils/profiler.h" namespace latinime { @@ -48,8 +49,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const { - PROF_OPEN; - PROF_START(0); + PROF_INIT; + PROF_TIMER_START(0); const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, @@ -57,8 +58,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, // TODO: Add the way to evaluate cache initializeSearch(tSession); - PROF_END(0); - PROF_START(1); + PROF_TIMER_END(0); + PROF_TIMER_START(1); // keep expanding search dicNodes until all have terminated. while (tSession->getDicTraverseCache()->activeSize() > 0) { @@ -66,12 +67,11 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, tSession->getDicTraverseCache()->advanceActiveDicNodes(); tSession->getDicTraverseCache()->advanceInputIndex(inputSize); } - PROF_END(1); - PROF_START(2); + PROF_TIMER_END(1); + PROF_TIMER_START(2); SuggestionsOutputUtils::outputSuggestions( SCORING, tSession, weightOfLangModelVsSpatialModel, outSuggestionResults); - PROF_END(2); - PROF_CLOSE; + PROF_TIMER_END(2); } /** diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 9455222dd..ca7d93b0e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -614,7 +614,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 59873612a..1a51acad5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -478,7 +478,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 1992d4a5a..7449cd02b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -560,7 +560,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(), wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(), *historicalInfo, std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index 408373176..e0f671056 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -133,6 +133,29 @@ class IntArrayView { return std::vector<int>(begin(), end()); } + std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const { + if (limit <= 0) { + return std::vector<IntArrayView>(); + } + std::vector<IntArrayView> result; + if (limit == 1) { + result.emplace_back(mPtr, mSize); + return result; + } + size_t startIndex = 0; + for (size_t i = 0; i < mSize; ++i) { + if (mPtr[i] == separator) { + result.emplace_back(mPtr + startIndex, i - startIndex); + startIndex = i + 1; + if (result.size() >= static_cast<size_t>(limit - 1)) { + break; + } + } + } + result.emplace_back(mPtr + startIndex, mSize - startIndex); + return result; + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h new file mode 100644 index 000000000..5f107fed3 --- /dev/null +++ b/native/jni/src/utils/profiler.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PROFILER_H +#define LATINIME_PROFILER_H + +#ifdef FLAG_DO_PROFILE + +#include "defines.h" + +#include <ctime> +#include <unordered_map> + +namespace latinime { + +class Profiler final { + public: + Profiler(const clockid_t clockId) + : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(), + mCounters() {} + + ~Profiler() { + const float totalTime = + static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f; + AKLOGI("Total time is %6.3f ms.", totalTime); + for (const auto &time : mTimes) { + AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first, + time.second / totalTime * 100.0f, time.second, mCounters[time.first]); + } + } + + void startTimer(const int id) { + mStartTimes[id] = getTimeInMicroSec(); + } + + void endTimer(const int id) { + mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f; + mCounters[id]++; + } + + operator bool() const { return false; } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler); + + const clockid_t mClockId; + int64_t mStartTime; + std::unordered_map<int, int64_t> mStartTimes; + std::unordered_map<int, float> mTimes; + std::unordered_map<int, int> mCounters; + + int64_t getTimeInMicroSec() { + timespec time; + clock_gettime(mClockId, &time); + return static_cast<int64_t>(time.tv_sec) * 1000000 + + static_cast<int64_t>(time.tv_nsec) / 1000; + } +}; +} // namespace latinime + +#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID) +#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id) +#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id) + +#else // FLAG_DO_PROFILE + +#define PROF_INIT +#define PROF_TIMER_START(timer_id) +#define PROF_TIMER_END(timer_id) + +#endif // FLAG_DO_PROFILE + +#endif /* LATINIME_PROFILER_H */ diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp index 4757a416b..2fce633f5 100644 --- a/native/jni/tests/utils/int_array_view_test.cpp +++ b/native/jni/tests/utils/int_array_view_test.cpp @@ -151,5 +151,52 @@ TEST(IntArrayViewTest, TestToVector) { EXPECT_EQ(std::vector<int>(), CodePointArrayView().toVector()); } +TEST(IntArrayViewTest, TestSplit) { + EXPECT_TRUE(IntArrayView().split(0, 0).empty()); + { + const auto intArrayViews = IntArrayView().split(0, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + { + const auto intArrayViews = IntArrayView().split(0, 100); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + + const std::vector<int> intVector = {1, 2, 3, 3, 2, 3}; + const IntArrayView intArrayView(intVector); + { + const auto intArrayViews = intArrayView.split(2); + EXPECT_EQ(3u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3}), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({3}), intArrayViews[2].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 2); + EXPECT_EQ(2u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3, 2, 3}), intArrayViews[1].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_EQ(intVector, intArrayViews[0].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 0); + EXPECT_EQ(0u, intArrayViews.size()); + } + { + const auto intArrayViews = intArrayView.split(3); + EXPECT_EQ(4u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1, 2}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({2}), intArrayViews[2].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[3].toVector()); + } +} + } // namespace } // namespace latinime diff --git a/tests/Android.mk b/tests/Android.mk index 7cdabf249..0b5449143 100644 --- a/tests/Android.mk +++ b/tests/Android.mk @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -LOCAL_PATH:= $(call my-dir) +LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) # We only want this apk build for tests. diff --git a/tests/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelperTests.java b/tests/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelperTests.java index e6198015a..6bb255b01 100644 --- a/tests/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelperTests.java +++ b/tests/src/com/android/inputmethod/keyboard/internal/LanguageOnSpacebarHelperTests.java @@ -30,10 +30,11 @@ import com.android.inputmethod.latin.RichInputMethodSubtype; import com.android.inputmethod.latin.utils.AdditionalSubtypeUtils; import com.android.inputmethod.latin.utils.SubtypeLocaleUtils; -import java.util.Arrays; -import java.util.List; +import java.util.ArrayList; import java.util.Locale; +import javax.annotation.Nonnull; + @SmallTest public class LanguageOnSpacebarHelperTests extends AndroidTestCase { private final LanguageOnSpacebarHelper mLanguageOnSpacebarHelper = @@ -48,6 +49,7 @@ public class LanguageOnSpacebarHelperTests extends AndroidTestCase { RichInputMethodSubtype FR_CH_SWISS; RichInputMethodSubtype FR_CH_QWERTY; RichInputMethodSubtype FR_CH_QWERTZ; + RichInputMethodSubtype IW_HEBREW; RichInputMethodSubtype ZZ_QWERTY; @Override @@ -56,116 +58,160 @@ public class LanguageOnSpacebarHelperTests extends AndroidTestCase { final Context context = getContext(); RichInputMethodManager.init(context); mRichImm = RichInputMethodManager.getInstance(); - SubtypeLocaleUtils.init(context); - - EN_US_QWERTY = new RichInputMethodSubtype(mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( - Locale.US.toString(), "qwerty")); - EN_GB_QWERTY = new RichInputMethodSubtype(mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( - Locale.UK.toString(), "qwerty")); - FR_AZERTY = new RichInputMethodSubtype(mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( - Locale.FRENCH.toString(), "azerty")); - FR_CA_QWERTY = new RichInputMethodSubtype(mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( - Locale.CANADA_FRENCH.toString(), "qwerty")); - FR_CH_SWISS = new RichInputMethodSubtype(mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( - "fr_CH", "swiss")); + + EN_US_QWERTY = findSubtypeOf(Locale.US.toString(), "qwerty"); + EN_GB_QWERTY = findSubtypeOf(Locale.UK.toString(), "qwerty"); + FR_AZERTY = findSubtypeOf(Locale.FRENCH.toString(), "azerty"); + FR_CA_QWERTY = findSubtypeOf(Locale.CANADA_FRENCH.toString(), "qwerty"); + FR_CH_SWISS = findSubtypeOf("fr_CH", "swiss"); FR_CH_QWERTZ = new RichInputMethodSubtype( AdditionalSubtypeUtils.createAsciiEmojiCapableAdditionalSubtype("fr_CH", "qwertz")); FR_CH_QWERTY = new RichInputMethodSubtype( AdditionalSubtypeUtils.createAsciiEmojiCapableAdditionalSubtype("fr_CH", "qwerty")); - ZZ_QWERTY = new RichInputMethodSubtype(mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( - SubtypeLocaleUtils.NO_LANGUAGE, "qwerty")); + IW_HEBREW = findSubtypeOf("iw", "hebrew"); + ZZ_QWERTY = findSubtypeOf(SubtypeLocaleUtils.NO_LANGUAGE, "qwerty"); + } + + @Nonnull + private RichInputMethodSubtype findSubtypeOf(final String localeString, + final String keyboardLayoutSetName) { + final InputMethodSubtype subtype = mRichImm.findSubtypeByLocaleAndKeyboardLayoutSet( + localeString, keyboardLayoutSetName); + if (subtype == null) { + throw new RuntimeException("Can't find subtype of " + localeString + " with " + + keyboardLayoutSetName); + } + return new RichInputMethodSubtype(subtype); + } + + private void enableSubtypes(final RichInputMethodSubtype ... subtypes) { + final ArrayList<InputMethodSubtype> enabledSubtypes = new ArrayList<>(); + for (final RichInputMethodSubtype subtype : subtypes) { + enabledSubtypes.add(subtype.getRawSubtype()); + } + mLanguageOnSpacebarHelper.onUpdateEnabledSubtypes(enabledSubtypes); + } + + private void assertFormatType(final RichInputMethodSubtype subtype, + final boolean implicitlyEnabledSubtype, final Locale systemLocale, + final int expectedFormat) { + mLanguageOnSpacebarHelper.onSubtypeChanged(subtype, implicitlyEnabledSubtype, systemLocale); + assertEquals(subtype.getLocales()[0] + " implicitly=" + implicitlyEnabledSubtype + + " in " + systemLocale, expectedFormat, + mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(subtype)); + } + + public void testOneSubtypeImplicitlyEnabled() { + enableSubtypes(EN_US_QWERTY); + assertFormatType(EN_US_QWERTY, true, Locale.US, FORMAT_TYPE_NONE); + + enableSubtypes(EN_GB_QWERTY); + assertFormatType(EN_GB_QWERTY, true, Locale.UK, FORMAT_TYPE_NONE); + + enableSubtypes(FR_AZERTY); + assertFormatType(FR_AZERTY, true, Locale.FRANCE, FORMAT_TYPE_NONE); + + enableSubtypes(FR_CA_QWERTY); + assertFormatType(FR_CA_QWERTY, true, Locale.CANADA_FRENCH, FORMAT_TYPE_NONE); + } + + public void testOneSubtypeExplicitlyEnabled() { + enableSubtypes(EN_US_QWERTY); + assertFormatType(EN_US_QWERTY, false, Locale.UK, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(EN_US_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + + enableSubtypes(EN_GB_QWERTY); + assertFormatType(EN_GB_QWERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(EN_GB_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + + enableSubtypes(FR_AZERTY); + assertFormatType(FR_AZERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_AZERTY, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + + enableSubtypes(FR_CA_QWERTY); + assertFormatType(FR_CA_QWERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); } - private static List<InputMethodSubtype> asList(final InputMethodSubtype ... subtypes) { - return Arrays.asList(subtypes); + public void testOneSubtypeImplicitlyEnabledWithNoLanguageSubtype() { + final Locale Locale_IW = new Locale("iw"); + enableSubtypes(IW_HEBREW, ZZ_QWERTY); + // TODO: Should this be FORMAT_TYPE_NONE? + assertFormatType(IW_HEBREW, true, Locale_IW, FORMAT_TYPE_LANGUAGE_ONLY); + // TODO: Should this be FORMAT_TYPE_NONE? + assertFormatType(ZZ_QWERTY, true, Locale_IW, FORMAT_TYPE_FULL_LOCALE); } - public void testOneSubtype() { - mLanguageOnSpacebarHelper.updateEnabledSubtypes(asList(EN_US_QWERTY.getRawSubtype())); - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(true /* isSame */); - assertEquals("one same English (US)", FORMAT_TYPE_NONE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_US_QWERTY)); - assertEquals("one same NoLanguage", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(ZZ_QWERTY)); - - mLanguageOnSpacebarHelper.updateEnabledSubtypes(asList(FR_AZERTY.getRawSubtype())); - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(false /* isSame */); - assertEquals("one diff English (US)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_US_QWERTY)); - assertEquals("one diff NoLanguage", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(ZZ_QWERTY)); + public void testTwoSubtypesExplicitlyEnabled() { + enableSubtypes(EN_US_QWERTY, FR_AZERTY); + assertFormatType(EN_US_QWERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_AZERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(EN_US_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_AZERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(EN_US_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_AZERTY, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + + enableSubtypes(EN_US_QWERTY, ZZ_QWERTY); + assertFormatType(EN_US_QWERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(ZZ_QWERTY, false, Locale.US, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(EN_US_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(ZZ_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_FULL_LOCALE); + } - public void testTwoSubtypes() { - mLanguageOnSpacebarHelper.updateEnabledSubtypes(asList(EN_US_QWERTY.getRawSubtype(), - FR_AZERTY.getRawSubtype())); - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(true /* isSame */); - assertEquals("two same English (US)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_US_QWERTY)); - assertEquals("two same French)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_AZERTY)); - assertEquals("two same NoLanguage", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(ZZ_QWERTY)); - - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(false /* isSame */); - assertEquals("two diff English (US)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_US_QWERTY)); - assertEquals("two diff French", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_AZERTY)); - assertEquals("two diff NoLanguage", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(ZZ_QWERTY)); + public void testMultiSubtypeWithSameLanuageAndSameLayout() { + // Explicitly enable en_US, en_GB, fr_FR, and no language keyboards. + enableSubtypes(EN_US_QWERTY, EN_GB_QWERTY, FR_CA_QWERTY, ZZ_QWERTY); + + assertFormatType(EN_US_QWERTY, false, Locale.US, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(EN_GB_QWERTY, false, Locale.US, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CA_QWERTY, false, Locale.US, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(ZZ_QWERTY, false, Locale.US, FORMAT_TYPE_FULL_LOCALE); + + assertFormatType(EN_US_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(EN_GB_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CA_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(ZZ_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_FULL_LOCALE); } - public void testSameLanuageSubtypes() { - mLanguageOnSpacebarHelper.updateEnabledSubtypes( - asList(EN_US_QWERTY.getRawSubtype(), EN_GB_QWERTY.getRawSubtype(), - FR_AZERTY.getRawSubtype(), ZZ_QWERTY.getRawSubtype())); - - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(true /* isSame */); - assertEquals("two same English (US)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_US_QWERTY)); - assertEquals("two same English (UK)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_GB_QWERTY)); - assertEquals("two same NoLanguage", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(ZZ_QWERTY)); - - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(false /* isSame */); - assertEquals("two diff English (US)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_US_QWERTY)); - assertEquals("two diff English (UK)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(EN_GB_QWERTY)); - assertEquals("two diff NoLanguage", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(ZZ_QWERTY)); + public void testMultiSubtypesWithSameLanguageButHaveDifferentLayout() { + enableSubtypes(FR_AZERTY, FR_CA_QWERTY, FR_CH_SWISS, FR_CH_QWERTZ); + + assertFormatType(FR_AZERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_SWISS, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_QWERTZ, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + + assertFormatType(FR_AZERTY, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_SWISS, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_QWERTZ, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + + assertFormatType(FR_AZERTY, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_SWISS, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_QWERTZ, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); } - public void testMultiSameLanuageSubtypes() { - mLanguageOnSpacebarHelper.updateEnabledSubtypes( - asList(FR_AZERTY.getRawSubtype(), FR_CA_QWERTY.getRawSubtype(), - FR_CH_SWISS.getRawSubtype(), FR_CH_QWERTY.getRawSubtype(), - FR_CH_QWERTZ.getRawSubtype())); - - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(true /* isSame */); - assertEquals("multi same French", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_AZERTY)); - assertEquals("multi same French (CA)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CA_QWERTY)); - assertEquals("multi same French (CH)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CH_SWISS)); - assertEquals("multi same French (CH) (QWERTY)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CH_QWERTY)); - assertEquals("multi same French (CH) (QWERTZ)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CH_QWERTZ)); - - mLanguageOnSpacebarHelper.updateIsSystemLanguageSameAsInputLanguage(false /* isSame */); - assertEquals("multi diff French", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_AZERTY)); - assertEquals("multi diff French (CA)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CA_QWERTY)); - assertEquals("multi diff French (CH)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CH_SWISS)); - assertEquals("multi diff French (CH) (QWERTY)", FORMAT_TYPE_FULL_LOCALE, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CH_QWERTY)); - assertEquals("multi diff French (CH) (QWERTZ)", FORMAT_TYPE_LANGUAGE_ONLY, - mLanguageOnSpacebarHelper.getLanguageOnSpacebarFormatType(FR_CH_QWERTZ)); + public void testMultiSubtypesWithSameLanguageAndMayHaveSameLayout() { + enableSubtypes(FR_AZERTY, FR_CA_QWERTY, FR_CH_SWISS, FR_CH_QWERTY, FR_CH_QWERTZ); + + assertFormatType(FR_AZERTY, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CH_SWISS, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_QWERTY, false, Locale.FRANCE, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CH_QWERTZ, false, Locale.FRANCE, FORMAT_TYPE_LANGUAGE_ONLY); + + assertFormatType(FR_AZERTY, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.CANADA_FRENCH, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CH_SWISS, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_QWERTY, false, Locale.CANADA_FRENCH, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CH_QWERTZ, false, Locale.CANADA_FRENCH, FORMAT_TYPE_LANGUAGE_ONLY); + + assertFormatType(FR_AZERTY, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CA_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CH_SWISS, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); + assertFormatType(FR_CH_QWERTY, false, Locale.JAPAN, FORMAT_TYPE_FULL_LOCALE); + assertFormatType(FR_CH_QWERTZ, false, Locale.JAPAN, FORMAT_TYPE_LANGUAGE_ONLY); } } diff --git a/tests/src/com/android/inputmethod/latin/utils/SpacebarLanguageUtilsTests.java b/tests/src/com/android/inputmethod/latin/RichInputMethodSubtypeTests.java index 83afd782d..aed7d6ad6 100644 --- a/tests/src/com/android/inputmethod/latin/utils/SpacebarLanguageUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/RichInputMethodSubtypeTests.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 The Android Open Source Project + * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.android.inputmethod.latin.utils; +package com.android.inputmethod.latin; import android.content.Context; import android.content.res.Resources; @@ -26,12 +26,15 @@ import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.RichInputMethodManager; import com.android.inputmethod.latin.RichInputMethodSubtype; +import com.android.inputmethod.latin.utils.AdditionalSubtypeUtils; +import com.android.inputmethod.latin.utils.RunInLocale; +import com.android.inputmethod.latin.utils.SubtypeLocaleUtils; import java.util.ArrayList; import java.util.Locale; @SmallTest -public class SpacebarLanguageUtilsTests extends AndroidTestCase { +public class RichInputMethodSubtypeTests extends AndroidTestCase { // All input method subtypes of LatinIME. private final ArrayList<RichInputMethodSubtype> mSubtypesList = new ArrayList<>(); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 6c60fdc0c..d833b9736 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -26,7 +26,6 @@ import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.common.CodePointUtils; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; diff --git a/tests/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtilsTests.java index 54f478f5a..03dcdfc78 100644 --- a/tests/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtilsTests.java @@ -434,8 +434,8 @@ public class SubtypeLocaleUtilsTests extends AndroidTestCase { // locale layout | display name // ------ -------------- - ---------------------- // sr south_slavic F Српски - // sr_ZZ serbian_qwertz F српски (латиница) - // sr_ZZ qwerty T српски (QWERTY) + // sr_ZZ serbian_qwertz F Српски (латиница) + // sr_ZZ qwerty T Српски (QWERTY) public void testSerbianLatinSubtypesInSerbianSystemLocale() { final RunInLocale<Void> tests = new RunInLocale<Void>() { @@ -445,12 +445,10 @@ public class SubtypeLocaleUtilsTests extends AndroidTestCase { SubtypeLocaleUtils.getSubtypeDisplayNameInSystemLocale(SR)); // These are preliminary subtypes and may not exist. if (SR_LATN != null) { - // TODO: Uncommented because of the current translation of these strings - // in Seriban are described in Latin script. -// assertEquals("sr_ZZ", "српски (латиница)", -// SubtypeLocaleUtils.getSubtypeDisplayNameInSystemLocale(SR_LATN)); -// assertEquals("sr_ZZ", "српски (QWERTY)", -// SubtypeLocaleUtils.getSubtypeDisplayNameInSystemLocale(SR_LATN_QWERTY)); + assertEquals("sr_ZZ", "Српски (латиница)", + SubtypeLocaleUtils.getSubtypeDisplayNameInSystemLocale(SR_LATN)); + assertEquals("sr_ZZ", "Српски (QWERTY)", + SubtypeLocaleUtils.getSubtypeDisplayNameInSystemLocale(SR_LATN_QWERTY)); } return null; } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 3ec28f313..84c3956f7 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -186,9 +186,7 @@ public final class BinaryDictOffdeviceUtils { throw new UnsupportedFormatException("Header too large"); } final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length]; - if (headerBuffer.length != input.read(headerBuffer)) { - throw new UnsupportedFormatException("File shorter than specified in the header"); - } + readStreamExhaustively(input, headerBuffer); final HashMap<String, String> attributes = BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer); return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes), @@ -196,6 +194,20 @@ public final class BinaryDictOffdeviceUtils { } } + private static void readStreamExhaustively(final InputStream inputStream, + final byte[] outBuffer) throws IOException, UnsupportedFormatException { + int readBytes = 0; + int readBytesLastCycle = -1; + while (readBytes != outBuffer.length) { + readBytesLastCycle = inputStream.read(outBuffer, readBytes, + outBuffer.length - readBytes); + if (readBytesLastCycle == -1) + throw new UnsupportedFormatException("File shorter than specified in the header" + + " (expected " + outBuffer.length + ", read " + readBytes + ")"); + readBytes += readBytesLastCycle; + } + } + public static void copy(final InputStream input, final OutputStream output) throws IOException { final byte[] buffer = new byte[COPY_BUFFER_SIZE]; for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java index 51efdec33..ba96c0aeb 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Header.java @@ -62,6 +62,7 @@ public class Header extends Dicttool.Command { System.out.println("Dictionary : " + dictFile.getAbsolutePath()); System.out.println("Size : " + dictFile.length() + " bytes"); System.out.println("Format : Binary dictionary format"); + System.out.println("Format version : " + header.mFormatOptions.mVersion); System.out.println("Packaging : " + spec.describeChain()); System.out.println("Header attributes :"); System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing)); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java index 4e5c0742e..3efa10a80 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java @@ -16,6 +16,8 @@ package com.android.inputmethod.latin.dicttool; +import com.android.inputmethod.latin.makedict.DictionaryHeader; + import java.io.BufferedOutputStream; import java.io.File; import java.io.FileNotFoundException; @@ -77,16 +79,16 @@ public class Package { if (mArgs.length != 2) { throw new RuntimeException("Too many/too few arguments for command " + COMMAND); } - final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodedSpec = - BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0])); + final BinaryDictOffdeviceUtils.DecoderChainSpec<DictionaryHeader> decodedSpec = + BinaryDictOffdeviceUtils.decodeDictionaryForProcess(new File(mArgs[0]), + new BinaryDictOffdeviceUtils.HeaderReaderProcessor()); if (null == decodedSpec) { System.out.println(mArgs[0] + " does not seem to be a dictionary"); return; } System.out.println("Packaging : " + decodedSpec.describeChain()); - System.out.println("Uncompressed size : " + decodedSpec.mResult.length()); try ( - final InputStream input = getFileInputStream(decodedSpec.mResult); + final InputStream input = decodedSpec.getStream(new File(mArgs[0])); final OutputStream output = new BufferedOutputStream( getFileOutputStreamOrStdOut(mArgs[1])) ) { |