diff options
41 files changed, 3369 insertions, 373 deletions
diff --git a/java/Android.mk b/java/Android.mk index 4bb8986ee..b6bbbf8e7 100755 --- a/java/Android.mk +++ b/java/Android.mk @@ -15,7 +15,7 @@ LOCAL_STATIC_JAVA_LIBRARIES := android-common #LOCAL_AAPT_FLAGS := -0 .dict -LOCAL_SDK_VERSION := current +LOCAL_SDK_VERSION := 8 LOCAL_PROGUARD_FLAGS := -include $(LOCAL_PATH)/proguard.flags diff --git a/java/proguard.flags b/java/proguard.flags index 0a5d2dda9..829a096c0 100644 --- a/java/proguard.flags +++ b/java/proguard.flags @@ -1,3 +1,8 @@ -keep class com.android.inputmethod.latin.BinaryDictionary { int mDictLength; + <init>(...); +} + +-keep class com.android.inputmethod.latin.Suggest { + <init>(...); } diff --git a/java/res/layout-xlarge/input.xml b/java/res/layout-xlarge/input.xml new file mode 100644 index 000000000..6b0bb12a6 --- /dev/null +++ b/java/res/layout-xlarge/input.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<com.android.inputmethod.latin.LatinKeyboardView + xmlns:android="http://schemas.android.com/apk/res/android" + android:id="@android:id/keyboardView" + android:layout_alignParentBottom="true" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:background="@drawable/keyboard_background" + android:keyBackground="@drawable/btn_keyboard_key" + android:keyTextSize="32sp" + android:verticalCorrection="0dip" + /> diff --git a/java/res/layout-xlarge/input_trans.xml b/java/res/layout-xlarge/input_trans.xml new file mode 100755 index 000000000..1dfdc1555 --- /dev/null +++ b/java/res/layout-xlarge/input_trans.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<com.android.inputmethod.latin.LatinKeyboardView + xmlns:android="http://schemas.android.com/apk/res/android" + android:id="@android:id/keyboardView" + android:layout_alignParentBottom="true" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:background="@color/latinkeyboard_extension_background" + android:verticalCorrection="0dip" + android:keyTextSize="32sp" + android:keyBackground="@drawable/btn_keyboard_key_fulltrans" + /> diff --git a/java/res/layout/input_trans.xml b/java/res/layout/input_trans.xml index 94806f7e3..f66e8cd5c 100755 --- a/java/res/layout/input_trans.xml +++ b/java/res/layout/input_trans.xml @@ -24,7 +24,7 @@ android:layout_alignParentBottom="true" android:layout_width="match_parent" android:layout_height="wrap_content" - android:background="#A0000000" + android:background="@color/latinkeyboard_extension_background" android:verticalCorrection="0dip" android:keyBackground="@drawable/btn_keyboard_key_fulltrans" /> diff --git a/java/res/values-sr/strings.xml b/java/res/values-sr/strings.xml new file mode 100644 index 000000000..64f5af1f1 --- /dev/null +++ b/java/res/values-sr/strings.xml @@ -0,0 +1,313 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2008, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> +<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <!-- Title for Latin keyboard --> + <string name="english_ime_name">Андроидова тастатура</string> + <!-- Title for Latin keyboard settings activity / dialog --> + <string name="english_ime_settings">Подешавања андроидове тастатуре</string> + + <!-- Option to provide vibrate/haptic feedback on keypress --> + <string name="vibrate_on_keypress">Вибрације при притиску</string> + <!-- Option to play back sound on keypress in soft keyboard --> + <string name="sound_on_keypress">Звук при притиску</string> + + <!-- Option to enable using nearby keys when correcting/predicting --> + <string name="hit_correction">Исправљање грешака</string> + + <!-- Description for hit_correction --> + <string name="hit_correction_summary">Исправљање грешака при уносу</string> + + <!-- Option to enable using nearby keys when correcting/predicting in landscape--> + <string name="hit_correction_land">Грешке при водоравној оријентацији</string> + + <!-- Description for hit_correction in landscape --> + <string name="hit_correction_land_summary">Исправљање грешака при уносу при + водоравном положају</string> + + <!-- Option to automatically correct word on hitting space --> + <string name="auto_correction">Предлози речи</string> + + <!-- Description for auto_correction --> + <string name="auto_correction_summary">Аутоматска исправка претходно унесене речи</string> + + <!-- Option to enable text prediction --> + <string name="prediction">Предлози речи</string> + <!-- Category title for text prediction --> + <string name="prediction_category">Подешавања за предлоге речи</string> + <!-- Description for text prediction --> + <string name="prediction_summary">Укључи аутоматске наставке при уносу</string> + + <!-- Dialog title for auto complete choices --> + <string name="auto_complete_dialog_title">Аутоматски наставци</string> + + <!-- Option to enable text prediction in landscape --> + <string name="prediction_landscape">Увећан поље за унос текста</string> + <!-- Description for text prediction --> + <string name="prediction_landscape_summary">Сакриј предлоге речи при водоравном положају</string> + + <!-- Option to enable auto capitalization of sentences --> + <string name="auto_cap">Аутоматска величина слова</string> + <!-- Description for auto cap --> + <string name="auto_cap_summary">Велико слово на почетку реченице</string> + <!-- Option to enable auto punctuate --> + <string name="auto_punctuate">Аутоматска интерпункција</string> + <!-- Description for auto punctuate --> + <string name="auto_punctuate_summary">Аутоматско постављање интерпункцијских знака при уносу.</string> + + <!-- Option to enable quick fixes --> + <string name="quick_fixes">Брзе исправке</string> + <!-- Description for quick fixes --> + <string name="quick_fixes_summary">Аутоматска исправка честих грешака</string> + + <!-- Option to enable showing suggestions --> + <string name="show_suggestions">Приказ предлога</string> + <!-- Description for show suggestions --> + <string name="show_suggestions_summary">Приказује предлоге речи током уноса</string> + + <!-- Option to enable auto completion --> + <string name="auto_complete">Аутоматска допуна</string> + <!-- Description for auto completion --> + <string name="auto_complete_summary">Размакница и интерпункција аутоматски убацују означену реч.</string> + + <!-- Array of prediction modes --> + <string-array name="prediction_modes"> + <item>Искључено</item> + <item>Основно</item> + <item>Напредно</item> + </string-array> + + <!-- Don't translate --> + <string name="prediction_none" translatable="false">0</string> + <!-- Don't translate --> + <string name="prediction_basic" translatable="false">1</string> + <!-- Don't translate --> + <string name="prediction_full" translatable="false">2</string> + + <string-array name="prediction_modes_values" translatable="false"> + <item>@string/prediction_none</item> + <item>@string/prediction_basic</item> + <item>@string/prediction_full</item> + </string-array> + + <!-- Indicates that a word has been added to the dictionary --> + <string name="added_word"><xliff:g id="word">%s</xliff:g> : Saved</string> + <!-- Tip to long press on keys --> + <string name="tip_long_press">Дуги притисак на тастере открива проширене знаке (ø, ö, итд.)</string> + <!-- Tip to dismiss keyboard --> + <string name="tip_dismiss">Притисните тастер за назад \u21B6 како бисте затворили тастатуру</string> + <!-- Tip to press ?123 to access numbers and symbols --> + <string name="tip_access_symbols">Приступ бројевима и симболима</string> + <!-- Tip to long press on typed word to add to dictionary --> + <string name="tip_add_to_dictionary">Притисните и држите притиснуту реч са крајње леве стране + како бисте је додали у речник</string> + + <!-- Instruction to touch the bubble to continue --> + <string name="touch_to_continue">Притисните овај подсетник да наставите »</string> + + <!-- Instruction to touch the bubble to start typing --> + <string name="touch_to_finish">Притисните овде да бисте затворили подсетник и наставили унос!</string> + + <!-- Tutorial tip 1 - The keyboard opens any time you touch a text field --> + <string name="tip_to_open_keyboard"><b>Тастатура се отвара кад год је потребно да унесете текст</b></string> + + <!-- Tutorial tip 2 - Touch and hold a key to view accents (examples) --> + <string name="tip_to_view_accents"><b>Притисните и држите тастер како бисте видели проширене + знаке\n(„, ‟, итд.)</b> + </string> + + <!-- Tutorial tip 3 - How to switch to number/symbol keyboard --> + <string name="tip_to_open_symbols"><b>Пребаците се на бројеве и симболе притиском на овај тастер + </b></string> + + <!-- Tutorial tip 4 - How to switch back to alphabet keyboard --> + <string name="tip_to_close_symbols"><b>Вратите се назад на слова притиском на овај тастер</b></string> + + <!-- Tutorial tip 5 - How to launch keyboard settings --> + <string name="tip_to_launch_settings"><b>Притисните и држите притиснут овај тастер да бисте променили + подешавања тастатуре, попут аутоматског настављања</b></string> + + <!-- Tutorial tip 6 - Done with the tutorial --> + <string name="tip_to_start_typing"><b>Пробајте сами!</b></string> + + + <!-- Label for soft enter key when it performs GO action. Must be short to fit on key! --> + <string name="label_go_key">Иди</string> + <!-- Label for soft enter key when it performs NEXT action. Must be short to fit on key! --> + <string name="label_next_key">Даље</string> + <!-- Label for soft enter key when it performs DONE action. Must be short to fit on key! --> + <string name="label_done_key">Крај</string> + <!-- Label for soft enter key when it performs SEND action. Must be short to fit on key! --> + <string name="label_send_key">Шаљи</string> + <!-- Label for "switch to symbols" key. Must be short to fit on key! --> + <string name="label_symbol_key">\?123</string> + <!-- Label for "switch to numeric" key. Must be short to fit on key! --> + <string name="label_phone_key">123</string> + <!-- Label for "switch to alphabetic" key. Must be short to fit on key! --> + <string name="label_alpha_key">АБВ</string> + <!-- Label for ALT modifier key. Must be short to fit on key! --> + <string name="label_alt_key">ALT</string> + + <!-- Voice related labels --> + + <!-- Title of the warning dialog that shows when a user initiates voice input for + the first time. --> + <string name="voice_warning_title">Говорни унос</string> + + <!-- Message that gets put at the top of the warning dialog if the user is attempting to use + voice input in a currently unsupported locale. Voice input will work for such a user, + but it will only recognize them in English. --> + <string name="voice_warning_locale_not_supported">Говорни унос није тренутно подржан на Вашем језику, + али ради на енглеском.</string> + + <!-- Message of the warning dialog that shows when a user initiates voice input for + the first time, or turns it on in settings. --> + <string name="voice_warning_may_not_understand">Говорни унос је експериментална могућност која користи + Google-ово мрежно препознавање говора.</string> + + <!-- An additional part of the warning dialog for voice input that only shows when the user + actually initiates voice input, rather than just turning it on in settings. --> + <string name="voice_warning_how_to_turn_off">Како бисте искључили говорни унос, изаберите подешавања + тастатуре.</string> + + <!-- Message to show when user clicks the swiping hint (which says + "Swipe across keyboard to speak"). Also shown when enabling settings. --> + <string name="voice_hint_dialog_message">Како бисте укључили говорни унос, притисните дугме са сличицом + микрофона или превуците прстом преко целе дужине тастатуре.</string> + + <!-- Short message to tell the user the system is ready for them to speak. --> + <string name="voice_listening">Говорите сада</string> + + <!-- Short message shown after the user finishes speaking. --> + <string name="voice_working">Обрада је у току</string> + + <!-- Short message shown before the user should speak. --> + <string name="voice_initializing"></string> + + <!-- Short message shown when a generic error occurs. --> + <string name="voice_error">Грешка. Молимо пробајте поново.</string> + + <!-- Short message shown for a network error. --> + <string name="voice_network_error">Повезивање није успело</string> + + <!-- Short message shown for a network error where the utterance was really long, + in which case we should suggest that the user speak less. --> + <string name="voice_too_much_speech">Грешка, говор је предугачак.</string> + + <!-- Short message shown for an audio error. --> + <string name="voice_audio_error">Проблем са звуком</string> + + <!-- Short message shown for an error with the voice server. --> + <string name="voice_server_error">Грешка на серверу</string> + + <!-- Short message shown when no speech is heard. --> + <string name="voice_speech_timeout">Говор није снимљен</string> + + <!-- Short message shown when the server couldn't parse any speech. --> + <string name="voice_no_match">Нема погодака</string> + + <!-- Short message shown when the user initiates voice and voice + search is not installed. --> + <string name="voice_not_installed">Говорна претрага није инсталирана</string> + + <!-- Short hint shown in candidate view to explain voice input. --> + <string name="voice_swipe_hint"><b>Савет:</b> Превуците прстом преко тастатуре а онда говорите.</string> + + <!-- Short hint shown in candidate view to explain that user can speak punctuation. --> + <string name="voice_punctuation_hint"><b>Савет:</b> Следећи пут, изговорите назив интерпункције, + попут „тачка“, „запета“ или „знак питања“.</string> + + <!-- Label on button to stop recognition. Must be short to fit on button. --> + <string name="cancel">Откажи</string> + + <!-- Label on button when an error occurs --> + <string name="ok">У реду</string> + + <!-- Preferences item for enabling speech input --> + <string name="voice_input">Говорни унос</string> + + <!-- Array of Voice Input modes --> + <string-array name="voice_input_modes"> + <item>На главној тастатури</item> + <item>На симболичкој тастатури</item> + <item>Искључен</item> + </string-array> + + <!-- Don't translate --> + <string name="voice_mode_main" translatable="false">0</string> + <!-- Don't translate --> + <string name="voice_mode_symbols" translatable="false">1</string> + <!-- Don't translate --> + <string name="voice_mode_off" translatable="false">2</string> + + <string-array name="voice_input_modes_values" translatable="false"> + <item>@string/voice_mode_main</item> + <item>@string/voice_mode_symbols</item> + <item>@string/voice_mode_off</item> + </string-array> + + <!-- Array of Voice Input modes summary --> + <string-array name="voice_input_modes_summary"> + <item>Микрофон на главној тастатури</item> + <item>Микрофон на симболичкој тастатури</item> + <item>Говорни унос је искључен</item> + </string-array> + + <!-- Press the "enter" key after the user speaks. Option on settings.--> + <string name="auto_submit">Аутоматско слање по говорном уносу</string> + + <!-- Press the "enter" key after the user speaks. Summary of option in settings.--> + <string name="auto_submit_summary">Дугме за претрагу се аутоматски притиска при претрази или преласку + на следеће поље за унос.</string> + + <!-- IME Tutorial screen (ROMAN) --><skip /> + <!-- appears above image showing the user to click on a TextView to show the IME --> + <string name="open_the_keyboard"><font size="17"><b>Отварање тастатуре\n</b></font><font size="3">\n</font>Touch any text field.</string> + + <!-- appears above the image showing the back button used to close the keyboard --> + <string name="close_the_keyboard"><font size="17"><b>Затварање тастатуре\n</b></font><font size="3">\n</font>Press the Back key.</string> + + <!-- appears above image showing how to use touch and hold --> + <string name="touch_and_hold"><font size="17"><b>Притисните \u0026 и држите пристиснут тастер за опције\n</b></font><font size="3">\n</font>Приступ акцентима и интерпункцији.</string> + + <!-- appears above image showing how to access keyboard settings --> + <string name="keyboard_settings"><font size="17"><b>Подешавање тастатуре\n</b></font><font size="3">\n</font>Притисните \u0026 и држите тастер <b>\?123\</b>.</string> + + <!-- popular web domains for the locale - most popular, displayed on the keyboard --> + <string name="popular_domain_0">".rs"</string> + <!-- popular web domains for the locale - item 1, displayed in the popup --> + <string name="popular_domain_1">".com"</string> + <!-- popular web domains for the locale - item 2, displayed in the popup --> + <string name="popular_domain_2">".net"</string> + <!-- popular web domains for the locale - item 3, displayed in the popup --> + <string name="popular_domain_3">".org"</string> + <!-- popular web domains for the locale - item 4, displayed in the popup --> + <string name="popular_domain_4">".edu"</string> + + <!-- Menu item for launching Input method switcher --> + <string name="inputMethod">Метод за унос</string> + + <!-- Title for input language selection screen --> + <string name="language_selection_title">Језици за унос</string> + <!-- Title summary for input language selection screen --> + <string name="language_selection_summary">Превуците прстом по размакници за промену језика</string> + + <!-- Add to dictionary hint --> + <string name="hint_add_to_dictionary">\u2190 Притисните опет да бисте сачували</string> +</resources> diff --git a/java/res/values-xlarge/dimens.xml b/java/res/values-xlarge/dimens.xml new file mode 100644 index 000000000..433176274 --- /dev/null +++ b/java/res/values-xlarge/dimens.xml @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<resources> + <dimen name="key_height">72dip</dimen> + <dimen name="candidate_strip_height">46dip</dimen> + <dimen name="spacebar_vertical_correction">0dip</dimen> +</resources> diff --git a/java/res/values/colors.xml b/java/res/values/colors.xml index c90d9f6af..d4bb13829 100644 --- a/java/res/values/colors.xml +++ b/java/res/values/colors.xml @@ -21,4 +21,9 @@ <color name="candidate_normal">#FF000000</color> <color name="candidate_recommended">#FFE35900</color> <color name="candidate_other">#ff808080</color> + <color name="latinkeyboard_transparent">#00000000</color> + <color name="latinkeyboard_bar_language_shadow">#80000000</color> + <color name="latinkeyboard_bar_language_text">#FF808080</color> + <color name="latinkeyboard_extension_background">#A0000000</color> + <color name="latinkeyboard_text_color">#FF000000</color> </resources>
\ No newline at end of file diff --git a/java/res/values/dimens.xml b/java/res/values/dimens.xml index 5b2095c0e..f057c16a7 100644 --- a/java/res/values/dimens.xml +++ b/java/res/values/dimens.xml @@ -23,4 +23,7 @@ <dimen name="bubble_pointer_offset">22dip</dimen> <dimen name="candidate_strip_height">42dip</dimen> <dimen name="spacebar_vertical_correction">4dip</dimen> + <!-- If the screen height in landscape is larger than the below value, then the keyboard + will not go into extract (fullscreen) mode. --> + <dimen name="max_height_for_fullscreen">2.5in</dimen> </resources>
\ No newline at end of file diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml index 35dd3e089..70a5b7e2e 100644 --- a/java/res/values/strings.xml +++ b/java/res/values/strings.xml @@ -85,6 +85,11 @@ <!-- Description for auto completion --> <string name="auto_complete_summary">Spacebar and punctuation automatically insert highlighted word</string> + <!-- Option to enable bigram completion --> + <string name="bigram_suggestion">Bigram Suggestions</string> + <!-- Description for auto completion --> + <string name="bigram_suggestion_summary">Use previous word to improve suggestion</string> + <!-- Array of prediction modes --> <string-array name="prediction_modes"> <item>None</item> diff --git a/java/res/xml-iw/kbd_qwerty.xml b/java/res/xml-iw/kbd_qwerty.xml new file mode 100755 index 000000000..b893f1a62 --- /dev/null +++ b/java/res/xml-iw/kbd_qwerty.xml @@ -0,0 +1,164 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<Keyboard xmlns:android="http://schemas.android.com/apk/res/android" + android:keyWidth="10%p" + android:horizontalGap="0px" + android:verticalGap="0px" + android:keyHeight="@dimen/key_height" + > + + <Row> + <Key android:keyLabel="ק" + android:horizontalGap="5%p" + android:keyEdgeFlags="left"/> + <Key android:keyLabel="ר"/> + <Key android:keyLabel="א"/> + <Key android:keyLabel="ט"/> + <Key android:keyLabel="ו"/> + <Key android:keyLabel="ן"/> + <Key android:keyLabel="ם"/> + <Key android:keyLabel="פ"/> + <Key android:codes="-5" + android:horizontalGap="1.25%p" + android:keyIcon="@drawable/sym_keyboard_delete" + android:keyWidth="13.75%p" android:keyEdgeFlags="right" + android:iconPreview="@drawable/sym_keyboard_feedback_delete" + android:isRepeatable="true"/> + </Row> + + <Row> + <Key android:keyLabel="ש" android:keyEdgeFlags="left"/> + <Key android:keyLabel="ד"/> + <Key android:keyLabel="ג"/> + <Key android:keyLabel="כ"/> + <Key android:keyLabel="ע"/> + <Key android:keyLabel="י"/> + <Key android:keyLabel="ח"/> + <Key android:keyLabel="ל"/> + <Key android:keyLabel="ך"/> + <Key android:keyLabel="ף" android:keyEdgeFlags="right"/> + </Row> + + <Row> + <Key android:keyLabel="ז" android:horizontalGap="5%p" android:keyEdgeFlags="left"/> + <Key android:keyLabel="ס"/> + <Key android:keyLabel="ב"/> + <Key android:keyLabel="ה"/> + <Key android:keyLabel="נ"/> + <Key android:keyLabel="מ"/> + <Key android:keyLabel="צ"/> + <Key android:keyLabel="ת"/> + <Key android:keyLabel="ץ" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_normal" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:codes="@integer/key_f1" android:keyIcon="@drawable/sym_keyboard_globe" + android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="40%p" android:isRepeatable="true"/> + <Key android:codes="46" android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation" + android:keyWidth="10%p"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_url" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:keyLabel="/" android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="40%p" android:isRepeatable="true"/> + <!--Key android:keyLabel="@string/popular_domain_0" + android:keyOutputText="@string/popular_domain_0" + android:popupKeyboard="@xml/popup_domains" + android:keyWidth="20%p"/--> + <Key android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation" + android:keyWidth="10%p"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_email" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:keyLabel="\@" android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="40%p" android:isRepeatable="true"/> + <!--Key android:keyLabel="@string/popular_domain_0" + android:keyOutputText="@string/popular_domain_0" + android:popupKeyboard="@xml/popup_domains" + android:keyWidth="20%p"/--> + <Key android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_im" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:codes="@integer/key_f1" android:keyIcon="@drawable/sym_keyboard_globe" + android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="40%p" android:isRepeatable="true"/> + <Key android:codes="46" android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation" + android:keyWidth="10%p"/> + <Key android:keyLabel=":-)" android:keyOutputText=":-) " + android:popupKeyboard="@xml/popup_smileys" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_webentry" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:codes="@integer/key_f1" android:keyIcon="@drawable/sym_keyboard_globe" + android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="20%p" android:isRepeatable="true"/> + <Key android:codes="9" android:keyIcon="@drawable/sym_keyboard_tab" + android:iconPreview="@drawable/sym_keyboard_feedback_tab" + android:keyWidth="20%p"/> + <Key android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> +</Keyboard> + diff --git a/java/res/xml-sr/kbd_qwerty.xml b/java/res/xml-sr/kbd_qwerty.xml new file mode 100644 index 000000000..e4884a8a6 --- /dev/null +++ b/java/res/xml-sr/kbd_qwerty.xml @@ -0,0 +1,171 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2008, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<!-- Serbian keyboard layout, based on the X11 layout for Serbian --> +<Keyboard xmlns:android="http://schemas.android.com/apk/res/android" + android:keyWidth="9.09%p" + android:horizontalGap="0px" + android:verticalGap="0px" + android:keyHeight="@dimen/key_height" + > + + <Row> + <Key android:keyLabel="љ" + android:keyEdgeFlags="left" /> + <Key android:keyLabel="њ" /> + <Key android:keyLabel="е" /> + <Key android:keyLabel="р" /> + <Key android:keyLabel="т" /> + <Key android:keyLabel="з" /> + <Key android:keyLabel="у" /> + <Key android:keyLabel="и" /> + <Key android:keyLabel="о" /> + <Key android:keyLabel="п" /> + <Key android:keyLabel="ш" + android:keyEdgeFlags="right" /> + </Row> + + <Row> + <Key android:keyLabel="а" + android:keyEdgeFlags="left" /> + <Key android:keyLabel="с" /> + <Key android:keyLabel="д" /> + <Key android:keyLabel="ф" /> + <Key android:keyLabel="г" /> + <Key android:keyLabel="х" /> + <Key android:keyLabel="ј" /> + <Key android:keyLabel="к" /> + <Key android:keyLabel="л" /> + <Key android:keyLabel="ч" /> + <Key android:keyLabel="ћ" /> + <Key android:keyLabel="ђ" + android:keyEdgeFlags="right" /> + </Row> + + <Row android:keyWidth="8.5%p"> + <Key android:codes="-1" android:keyIcon="@drawable/sym_keyboard_shift" + android:keyWidth="11.75%p" android:isModifier="true" + android:iconPreview="@drawable/sym_keyboard_feedback_shift" + android:isSticky="true" android:keyEdgeFlags="left"/> + <Key android:keyLabel="ж" /> + <Key android:keyLabel="џ" /> + <Key android:keyLabel="ц" /> + <Key android:keyLabel="в" /> + <Key android:keyLabel="б" /> + <Key android:keyLabel="н" /> + <Key android:keyLabel="м" /> + <Key android:codes="-5" android:keyIcon="@drawable/sym_keyboard_delete" + android:keyWidth="11.75%p" android:keyEdgeFlags="right" + android:iconPreview="@drawable/sym_keyboard_feedback_delete" + android:isRepeatable="true"/> + </Row> + + <Row android:keyboardMode="@+id/mode_normal" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:codes="@integer/key_f1" android:keyIcon="@drawable/sym_keyboard_globe" + android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="40%p" android:isRepeatable="true"/> + <Key android:codes="46" android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation" + android:keyWidth="10%p"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_url" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:keyLabel="/" android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="20%p" android:isRepeatable="true"/> + <Key android:keyLabel="@string/popular_domain_0" + android:keyOutputText="@string/popular_domain_0" + android:popupKeyboard="@xml/popup_domains" + android:keyWidth="20%p"/> + <Key android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation" + android:keyWidth="10%p"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_email" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:keyLabel="\@" android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="20%p" android:isRepeatable="true"/> + <Key android:keyLabel="@string/popular_domain_0" + android:keyOutputText="@string/popular_domain_0" + android:popupKeyboard="@xml/popup_domains" + android:keyWidth="20%p"/> + <Key android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_im" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:codes="@integer/key_f1" android:keyIcon="@drawable/sym_keyboard_globe" + android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="40%p" android:isRepeatable="true"/> + <Key android:codes="46" android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation" + android:keyWidth="10%p"/> + <Key android:keyLabel=":-)" android:keyOutputText=":-) " + android:popupKeyboard="@xml/popup_smileys" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> + + <Row android:keyboardMode="@+id/mode_webentry" android:rowEdgeFlags="bottom"> + <Key android:codes="-2" android:keyLabel="@string/label_symbol_key" + android:popupKeyboard="@xml/kbd_popup_template" + android:popupCharacters="_" + android:keyWidth="20%p" android:keyEdgeFlags="left"/> + <Key android:codes="@integer/key_f1" android:keyIcon="@drawable/sym_keyboard_globe" + android:keyWidth="10%p"/> + <Key android:codes="32" android:keyIcon="@drawable/sym_keyboard_space" + android:iconPreview="@drawable/sym_keyboard_feedback_space" + android:keyWidth="20%p" android:isRepeatable="true"/> + <Key android:codes="9" android:keyIcon="@drawable/sym_keyboard_tab" + android:iconPreview="@drawable/sym_keyboard_feedback_tab" + android:keyWidth="20%p"/> + <Key android:keyLabel="." android:popupKeyboard="@xml/popup_punctuation"/> + <Key android:codes="10" android:keyIcon="@drawable/sym_keyboard_return" + android:iconPreview="@drawable/sym_keyboard_feedback_return" + android:keyWidth="20%p" android:keyEdgeFlags="right"/> + </Row> +</Keyboard> diff --git a/java/res/xml/prefs.xml b/java/res/xml/prefs.xml index 535b63f3b..c93fe0ac3 100644 --- a/java/res/xml/prefs.xml +++ b/java/res/xml/prefs.xml @@ -81,6 +81,14 @@ android:defaultValue="@bool/enable_autocorrect" android:dependency="show_suggestions" /> - + + <CheckBoxPreference + android:key="bigram_suggestion" + android:title="@string/bigram_suggestion" + android:summary="@string/bigram_suggestion_summary" + android:persistent="true" + android:defaultValue="true" + android:dependency="auto_complete" + /> </PreferenceCategory> </PreferenceScreen> diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 87de94b76..8d2363012 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -16,6 +16,11 @@ package com.android.inputmethod.latin; +import java.io.InputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.Channels; import java.util.Arrays; import android.content.Context; @@ -27,18 +32,24 @@ import android.util.Log; */ public class BinaryDictionary extends Dictionary { - public static final int MAX_WORD_LENGTH = 48; + private static final String TAG = "BinaryDictionary"; private static final int MAX_ALTERNATIVES = 16; private static final int MAX_WORDS = 16; + private static final int MAX_BIGRAMS = 255; // TODO Probably don't need all 255 private static final int TYPED_LETTER_MULTIPLIER = 2; private static final boolean ENABLE_MISSED_CHARACTERS = true; private int mNativeDict; - private int mDictLength; // This value is set from native code, don't change the name!!!! + private int mDictLength; private int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES]; private char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; + private char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; private int[] mFrequencies = new int[MAX_WORDS]; + private int[] mFrequencies_bigrams = new int[MAX_BIGRAMS]; + // Keep a reference to the native dict direct buffer in Java to avoid + // unexpected deallocation of the direct buffer. + private ByteBuffer mNativeDictDirectBuffer; static { try { @@ -59,19 +70,83 @@ public class BinaryDictionary extends Dictionary { } } - private native int openNative(AssetManager am, String resourcePath, int typedLetterMultiplier, - int fullWordMultiplier); + /** + * Create a dictionary from a byte buffer. This is used for testing. + * @param context application context for reading resources + * @param byteBuffer a ByteBuffer containing the binary dictionary + */ + public BinaryDictionary(Context context, ByteBuffer byteBuffer) { + if (byteBuffer != null) { + if (byteBuffer.isDirect()) { + mNativeDictDirectBuffer = byteBuffer; + } else { + mNativeDictDirectBuffer = ByteBuffer.allocateDirect(byteBuffer.capacity()); + byteBuffer.rewind(); + mNativeDictDirectBuffer.put(byteBuffer); + } + mDictLength = byteBuffer.capacity(); + mNativeDict = openNative(mNativeDictDirectBuffer, + TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER); + } + } + + private native int openNative(ByteBuffer bb, int typedLetterMultiplier, int fullWordMultiplier); private native void closeNative(int dict); private native boolean isValidWordNative(int nativeData, char[] word, int wordLength); private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize, char[] outputChars, int[] frequencies, int maxWordLength, int maxWords, int maxAlternatives, int skipPos, int[] nextLettersFrequencies, int nextLettersSize); + private native int getBigramsNative(int nativeData, char[] prevWord, int prevWordLength, + char[] outputChars, int[] frequencies, int maxWordLength, int maxBigrams); private final void loadDictionary(Context context, int resId) { - AssetManager am = context.getResources().getAssets(); - String assetName = context.getResources().getString(resId); - mNativeDict = openNative(am, assetName, TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER); + InputStream is = context.getResources().openRawResource(resId); + try { + int avail = is.available(); + mNativeDictDirectBuffer = + ByteBuffer.allocateDirect(avail).order(ByteOrder.nativeOrder()); + int got = Channels.newChannel(is).read(mNativeDictDirectBuffer); + if (got != avail) { + Log.e(TAG, "Read " + got + " bytes, expected " + avail); + } else { + mNativeDict = openNative(mNativeDictDirectBuffer, + TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER); + mDictLength = avail; + } + } catch (IOException e) { + Log.w(TAG, "No available size for binary dictionary"); + } finally { + try { + is.close(); + } catch (IOException e) { + Log.w(TAG, "Failed to close input stream"); + } + } + } + + @Override + public void getBigrams(final WordComposer composer, final CharSequence previousWord, + final WordCallback callback, int[] nextLettersFrequencies) { + + char[] chars = previousWord.toString().toCharArray(); + Arrays.fill(mOutputChars_bigrams, (char) 0); + Arrays.fill(mFrequencies_bigrams, 0); + + int count = getBigramsNative(mNativeDict, chars, chars.length, mOutputChars_bigrams, + mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS); + for (int j = 0; j < count; j++) { + if (mFrequencies_bigrams[j] < 1) break; + int start = j * MAX_WORD_LENGTH; + int len = 0; + while (mOutputChars_bigrams[start + len] != 0) { + len++; + } + if (len > 0) { + callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j], + DataType.BIGRAM); + } + } } @Override @@ -119,7 +194,7 @@ public class BinaryDictionary extends Dictionary { len++; } if (len > 0) { - callback.addWord(mOutputChars, start, len, mFrequencies[j]); + callback.addWord(mOutputChars, start, len, mFrequencies[j], DataType.UNIGRAM); } } } diff --git a/java/src/com/android/inputmethod/latin/CandidateView.java b/java/src/com/android/inputmethod/latin/CandidateView.java index 3a199bbaf..ae45001b8 100755 --- a/java/src/com/android/inputmethod/latin/CandidateView.java +++ b/java/src/com/android/inputmethod/latin/CandidateView.java @@ -219,7 +219,7 @@ public class CandidateView extends View { mDivider.getIntrinsicHeight()); } int x = 0; - final int count = mSuggestions.size(); + final int count = Math.min(mSuggestions.size(), MAX_SUGGESTIONS); final int width = getWidth(); final Rect bgPadding = mBgPadding; final Paint paint = mPaint; @@ -335,7 +335,7 @@ public class CandidateView extends View { public void scrollPrev() { int i = 0; - final int count = mSuggestions.size(); + final int count = Math.min(mSuggestions.size(), MAX_SUGGESTIONS); int firstItem = 0; // Actually just before the first item, if at the boundary while (i < count) { if (mWordX[i] < getScrollX() @@ -354,7 +354,7 @@ public class CandidateView extends View { int i = 0; int scrollX = getScrollX(); int targetX = scrollX; - final int count = mSuggestions.size(); + final int count = Math.min(mSuggestions.size(), MAX_SUGGESTIONS); int rightEdge = scrollX + getWidth(); while (i < count) { if (mWordX[i] <= rightEdge && @@ -447,25 +447,6 @@ public class CandidateView extends View { } return true; } - - /** - * For flick through from keyboard, call this method with the x coordinate of the flick - * gesture. - * @param x - */ - public void takeSuggestionAt(float x) { - mTouchX = (int) x; - // To detect candidate - onDraw(null); - if (mSelectedString != null) { - if (!mShowingCompletions) { - TextEntryState.acceptedSuggestion(mSuggestions.get(0), mSelectedString); - } - mService.pickSuggestionManually(mSelectedIndex, mSelectedString); - } - invalidate(); - mHandler.sendMessageDelayed(mHandler.obtainMessage(MSG_REMOVE_THROUGH_PREVIEW), 200); - } private void hidePreview() { mCurrentWordIndex = OUT_OF_BOUNDS; diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index e7b526663..54317c861 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -21,7 +21,9 @@ package com.android.inputmethod.latin; * strokes. */ abstract public class Dictionary { - + + protected static final int MAX_WORD_LENGTH = 48; + /** * Whether or not to replicate the typed word in the suggested list, even if it's valid. */ @@ -31,7 +33,11 @@ abstract public class Dictionary { * The weight to give to a word if it's length is the same as the number of typed characters. */ protected static final int FULL_WORD_FREQ_MULTIPLIER = 2; - + + public static enum DataType { + UNIGRAM, BIGRAM + } + /** * Interface to be implemented by classes requesting words to be fetched from the dictionary. * @see #getWords(WordComposer, WordCallback) @@ -45,9 +51,11 @@ abstract public class Dictionary { * @param wordLength length of valid characters in the character array * @param frequency the frequency of occurence. This is normalized between 1 and 255, but * can exceed those limits + * @param dataType tells type of this data * @return true if the word was added, false if no more words are required */ - boolean addWord(char[] word, int wordOffset, int wordLength, int frequency); + boolean addWord(char[] word, int wordOffset, int wordLength, int frequency, + DataType dataType); } /** @@ -65,6 +73,21 @@ abstract public class Dictionary { int[] nextLettersFrequencies); /** + * Searches for pairs in the bigram dictionary that matches the previous word and all the + * possible words following are added through the callback object. + * @param composer the key sequence to match + * @param callback the callback object to send possible word following previous word + * @param nextLettersFrequencies array of frequencies of next letters that could follow the + * word so far. For instance, "bracke" can be followed by "t", so array['t'] will have + * a non-zero value on returning from this method. + * Pass in null if you don't want the dictionary to look up next letters. + */ + public void getBigrams(final WordComposer composer, final CharSequence previousWord, + final WordCallback callback, int[] nextLettersFrequencies) { + // empty base implementation + } + + /** * Checks if the given word occurs in the dictionary * @param word the word to search for. The search should be case-insensitive. * @return true if the word exists, false otherwise diff --git a/java/src/com/android/inputmethod/voice/EditingUtil.java b/java/src/com/android/inputmethod/latin/EditingUtil.java index 6316d8ccf..5133c60ca 100644 --- a/java/src/com/android/inputmethod/voice/EditingUtil.java +++ b/java/src/com/android/inputmethod/latin/EditingUtil.java @@ -14,7 +14,9 @@ * the License. */ -package com.android.inputmethod.voice; +package com.android.inputmethod.latin; + +import java.util.regex.Pattern; import android.view.inputmethod.ExtractedText; import android.view.inputmethod.ExtractedTextRequest; @@ -24,6 +26,11 @@ import android.view.inputmethod.InputConnection; * Utility methods to deal with editing text through an InputConnection. */ public class EditingUtil { + /** + * Number of characters we want to look back in order to identify the previous word + */ + public static final int LOOKBACK_CHARACTER_NUM = 15; + private EditingUtil() {}; /** @@ -75,9 +82,21 @@ public class EditingUtil { * represents the cursor, then "hello " will be returned. */ public static String getWordAtCursor( - InputConnection connection, String separators) { - Range range = getWordRangeAtCursor(connection, separators); - return (range == null) ? null : range.word; + InputConnection connection, String separators) { + return getWordAtCursor(connection, separators, null); + } + + /** + * @param connection connection to the current text field. + * @param sep characters which may separate words + * @return the word that surrounds the cursor, including up to one trailing + * separator. For example, if the field contains "he|llo world", where | + * represents the cursor, then "hello " will be returned. + */ + public static String getWordAtCursor( + InputConnection connection, String separators, Range range) { + Range r = getWordRangeAtCursor(connection, separators, range); + return (r == null) ? null : r.word; } /** @@ -87,7 +106,7 @@ public class EditingUtil { public static void deleteWordAtCursor( InputConnection connection, String separators) { - Range range = getWordRangeAtCursor(connection, separators); + Range range = getWordRangeAtCursor(connection, separators, null); if (range == null) return; connection.finishComposingText(); @@ -101,18 +120,20 @@ public class EditingUtil { /** * Represents a range of text, relative to the current cursor position. */ - private static class Range { + public static class Range { /** Characters before selection start */ - int charsBefore; + public int charsBefore; /** * Characters after selection start, including one trailing word * separator. */ - int charsAfter; + public int charsAfter; /** The actual characters that make up a word */ - String word; + public String word; + + public Range() {} public Range(int charsBefore, int charsAfter, String word) { if (charsBefore < 0 || charsAfter < 0) { @@ -125,7 +146,7 @@ public class EditingUtil { } private static Range getWordRangeAtCursor( - InputConnection connection, String sep) { + InputConnection connection, String sep, Range range) { if (connection == null || sep == null) { return null; } @@ -137,20 +158,22 @@ public class EditingUtil { // Find first word separator before the cursor int start = before.length(); - while (--start > 0 && !isWhitespace(before.charAt(start - 1), sep)); + while (start > 0 && !isWhitespace(before.charAt(start - 1), sep)) start--; // Find last word separator after the cursor int end = -1; while (++end < after.length() && !isWhitespace(after.charAt(end), sep)); - if (end < after.length() - 1) { - end++; // Include trailing space, if it exists, in word - } int cursor = getCursorPosition(connection); if (start >= 0 && cursor + end <= after.length() + before.length()) { String word = before.toString().substring(start, before.length()) - + after.toString().substring(0, end); - return new Range(before.length() - start, end, word); + + after.toString().substring(0, end); + + Range returnRange = range != null? range : new Range(); + returnRange.charsBefore = before.length() - start; + returnRange.charsAfter = end; + returnRange.word = word; + return returnRange; } return null; @@ -159,4 +182,13 @@ public class EditingUtil { private static boolean isWhitespace(int code, String whitespace) { return whitespace.contains(String.valueOf((char) code)); } + + private static final Pattern spaceRegex = Pattern.compile("\\s+"); + + public static CharSequence getPreviousWord(InputConnection connection) { + //TODO: Should fix this. This could be slow! + CharSequence prev = connection.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0); + String[] w = spaceRegex.split(prev); + return (w.length >= 2) ? w[w.length-2] : null; + } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index 46bc41c42..6f4d925ee 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -267,7 +267,7 @@ public class ExpandableDictionary extends Dictionary { if (completion) { word[depth] = c; if (terminal) { - if (!callback.addWord(word, 0, depth + 1, freq * snr)) { + if (!callback.addWord(word, 0, depth + 1, freq * snr, DataType.UNIGRAM)) { return; } // Add to frequency of next letters for predictive correction @@ -305,7 +305,8 @@ public class ExpandableDictionary extends Dictionary { || !same(word, depth + 1, codes.getTypedWord())) { int finalFreq = freq * snr * addedAttenuation; if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER; - callback.addWord(word, 0, depth + 1, finalFreq); + callback.addWord(word, 0, depth + 1, finalFreq, + DataType.UNIGRAM); } } if (children != null) { diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index b4ed80c1f..783805e2b 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -16,7 +16,6 @@ package com.android.inputmethod.latin; -import com.android.inputmethod.voice.EditingUtil; import com.android.inputmethod.voice.FieldContext; import com.android.inputmethod.voice.SettingsUtil; import com.android.inputmethod.voice.VoiceInput; @@ -40,9 +39,9 @@ import android.os.Message; import android.os.SystemClock; import android.preference.PreferenceManager; import android.speech.SpeechRecognizer; -import android.text.AutoText; import android.text.ClipboardManager; import android.text.TextUtils; +import android.util.DisplayMetrics; import android.util.Log; import android.util.PrintWriterPrinter; import android.util.Printer; @@ -50,8 +49,8 @@ import android.view.HapticFeedbackConstants; import android.view.KeyEvent; import android.view.LayoutInflater; import android.view.View; -import android.view.ViewParent; import android.view.ViewGroup; +import android.view.ViewParent; import android.view.Window; import android.view.WindowManager; import android.view.inputmethod.CompletionInfo; @@ -78,10 +77,12 @@ public class LatinIME extends InputMethodService VoiceInput.UiListener, SharedPreferences.OnSharedPreferenceChangeListener { private static final String TAG = "LatinIME"; + private static final boolean PERF_DEBUG = false; static final boolean DEBUG = false; static final boolean TRACE = false; static final boolean VOICE_INSTALLED = true; static final boolean ENABLE_VOICE_BUTTON = true; + private static final boolean MODIFY_TEXT_FOR_CORRECTION = false; private static final String PREF_VIBRATE_ON = "vibrate_on"; private static final String PREF_SOUND_ON = "sound_on"; @@ -89,6 +90,7 @@ public class LatinIME extends InputMethodService private static final String PREF_QUICK_FIXES = "quick_fixes"; private static final String PREF_SHOW_SUGGESTIONS = "show_suggestions"; private static final String PREF_AUTO_COMPLETE = "auto_complete"; + private static final String PREF_BIGRAM_SUGGESTIONS = "bigram_suggestion"; private static final String PREF_VOICE_MODE = "voice_mode"; // Whether or not the user has used voice input before (and thus, whether to show the @@ -127,6 +129,7 @@ public class LatinIME extends InputMethodService private static final int MSG_UPDATE_SHIFT_STATE = 2; private static final int MSG_VOICE_RESULTS = 3; private static final int MSG_START_LISTENING_AFTER_SWIPE = 4; + private static final int MSG_UPDATE_OLD_SUGGESTIONS = 5; // If we detect a swipe gesture within N ms of typing, then swipe is // ignored, since it may in fact be two key presses in quick succession. @@ -186,10 +189,10 @@ public class LatinIME extends InputMethodService private boolean mAutoSpace; private boolean mJustAddedAutoSpace; private boolean mAutoCorrectEnabled; + private boolean mBigramSuggestionEnabled; private boolean mAutoCorrectOn; private boolean mCapsLock; private boolean mPasswordText; - private boolean mEmailText; private boolean mVibrateOn; private boolean mSoundOn; private boolean mAutoCap; @@ -205,6 +208,12 @@ public class LatinIME extends InputMethodService private boolean mVoiceOnPrimary; private int mOrientation; private List<CharSequence> mSuggestPuncList; + // Keep track of the last selection range to decide if we need to show word alternatives + private int mLastSelectionStart; + private int mLastSelectionEnd; + + // Input type is such that we should not auto-correct + private boolean mInputTypeNoAutoCorrect; // Indicates whether the suggestion strip is to be on in landscape private boolean mJustAccepted; @@ -228,17 +237,66 @@ public class LatinIME extends InputMethodService // Keeps track of most recently inserted text (multi-character key) for reverting private CharSequence mEnteredText; + private boolean mRefreshKeyboardRequired; // For each word, a list of potential replacements, usually from voice. private Map<String, List<CharSequence>> mWordToSuggestions = new HashMap<String, List<CharSequence>>(); + private ArrayList<WordAlternatives> mWordHistory = new ArrayList<WordAlternatives>(); + private class VoiceResults { List<String> candidates; Map<String, List<CharSequence>> alternatives; } + + public abstract static class WordAlternatives { + protected CharSequence mChosenWord; - private boolean mRefreshKeyboardRequired; + public WordAlternatives() { + // Nothing + } + + public WordAlternatives(CharSequence chosenWord) { + mChosenWord = chosenWord; + } + + @Override + public int hashCode() { + return mChosenWord.hashCode(); + } + + public abstract CharSequence getOriginalWord(); + + public CharSequence getChosenWord() { + return mChosenWord; + } + + public abstract List<CharSequence> getAlternatives(); + } + + public class TypedWordAlternatives extends WordAlternatives { + private WordComposer word; + + public TypedWordAlternatives() { + // Nothing + } + + public TypedWordAlternatives(CharSequence chosenWord, WordComposer wordComposer) { + super(chosenWord); + word = wordComposer; + } + + @Override + public CharSequence getOriginalWord() { + return word.getTypedWord(); + } + + @Override + public List<CharSequence> getAlternatives() { + return getTypedSuggestions(word); + } + } Handler mHandler = new Handler() { @Override @@ -247,6 +305,9 @@ public class LatinIME extends InputMethodService case MSG_UPDATE_SUGGESTIONS: updateSuggestions(); break; + case MSG_UPDATE_OLD_SUGGESTIONS: + setOldSuggestions(); + break; case MSG_START_TUTORIAL: if (mTutorial == null) { if (mInputView.isShown()) { @@ -450,13 +511,11 @@ public class LatinIME extends InputMethodService mShowingVoiceSuggestions = false; mImmediatelyAfterVoiceSuggestions = false; mVoiceInputHighlighted = false; - mWordToSuggestions.clear(); mInputTypeNoAutoCorrect = false; mPredictionOn = false; mCompletionOn = false; mCompletions = null; mCapsLock = false; - mEmailText = false; mEnteredText = null; switch (attribute.inputType & EditorInfo.TYPE_MASK_CLASS) { @@ -479,9 +538,6 @@ public class LatinIME extends InputMethodService variation == EditorInfo.TYPE_TEXT_VARIATION_VISIBLE_PASSWORD ) { mPredictionOn = false; } - if (variation == EditorInfo.TYPE_TEXT_VARIATION_EMAIL_ADDRESS) { - mEmailText = true; - } if (variation == EditorInfo.TYPE_TEXT_VARIATION_EMAIL_ADDRESS || variation == EditorInfo.TYPE_TEXT_VARIATION_PERSON_NAME) { mAutoSpace = false; @@ -610,10 +666,11 @@ public class LatinIME extends InputMethodService // clear whatever candidate text we have. if ((((mComposing.length() > 0 && mPredicting) || mVoiceInputHighlighted) && (newSelStart != candidatesEnd - || newSelEnd != candidatesEnd))) { + || newSelEnd != candidatesEnd) + && mLastSelectionStart != newSelStart)) { mComposing.setLength(0); mPredicting = false; - updateSuggestions(); + postUpdateSuggestions(); TextEntryState.reset(); InputConnection ic = getCurrentInputConnection(); if (ic != null) { @@ -633,26 +690,20 @@ public class LatinIME extends InputMethodService mJustAccepted = false; postUpdateShiftKeyState(); - if (VOICE_INSTALLED) { - if (mShowingVoiceSuggestions) { - if (mImmediatelyAfterVoiceSuggestions) { - mImmediatelyAfterVoiceSuggestions = false; - } else { - updateSuggestions(); - mShowingVoiceSuggestions = false; - } - } - if (VoiceInput.ENABLE_WORD_CORRECTIONS) { - // If we have alternatives for the current word, then show them. - String word = EditingUtil.getWordAtCursor( - getCurrentInputConnection(), getWordSeparators()); - if (word != null && mWordToSuggestions.containsKey(word.trim())) { - mSuggestionShouldReplaceCurrentWord = true; - final List<CharSequence> suggestions = mWordToSuggestions.get(word.trim()); + // Make a note of the cursor position + mLastSelectionStart = newSelStart; + mLastSelectionEnd = newSelEnd; - setSuggestions(suggestions, false, true, true); - setCandidatesViewShown(true); - } + + // If a word is selected + if (isPredictionOn() && mJustRevertedSeparator == null + && (candidatesStart == candidatesEnd || newSelStart != oldSelStart) + && (newSelStart < newSelEnd - 1 || (!mPredicting)) + && !mVoiceInputHighlighted) { + if (isCursorTouchingWord() || mLastSelectionStart < mLastSelectionEnd) { + postUpdateOldSuggestions(); + } else { + abortCorrection(false); } } } @@ -675,13 +726,15 @@ public class LatinIME extends InputMethodService mVoiceInput.cancel(); } } + mWordToSuggestions.clear(); + mWordHistory.clear(); super.hideWindow(); TextEntryState.endSession(); } @Override public void onDisplayCompletions(CompletionInfo[] completions) { - if (false) { + if (DEBUG) { Log.i("foo", "Received completions:"); for (int i=0; i<(completions != null ? completions.length : 0); i++) { Log.i("foo", " #" + i + ": " + completions[i]); @@ -699,7 +752,6 @@ public class LatinIME extends InputMethodService CompletionInfo ci = completions[i]; if (ci != null) stringList.add(ci.getText()); } - //CharSequence typedWord = mWord.getTypedWord(); setSuggestions(stringList, true, true, true); mBestWord = null; setCandidatesViewShown(isCandidateStripVisible() || mCompletionOn); @@ -724,6 +776,19 @@ public class LatinIME extends InputMethodService } @Override + public boolean onEvaluateFullscreenMode() { + DisplayMetrics dm = getResources().getDisplayMetrics(); + float displayHeight = dm.heightPixels; + // If the display is more than X inches high, don't go to fullscreen mode + float dimen = getResources().getDimension(R.dimen.max_height_for_fullscreen); + if (displayHeight > dimen) { + return false; + } else { + return super.onEvaluateFullscreenMode(); + } + } + + @Override public boolean onKeyDown(int keyCode, KeyEvent event) { switch (keyCode) { case KeyEvent.KEYCODE_BACK: @@ -998,6 +1063,7 @@ public class LatinIME extends InputMethodService } InputConnection ic = getCurrentInputConnection(); if (ic == null) return; + abortCorrection(false); ic.beginBatchEdit(); if (mPredicting) { commitTyped(ic); @@ -1022,6 +1088,8 @@ public class LatinIME extends InputMethodService InputConnection ic = getCurrentInputConnection(); if (ic == null) return; + ic.beginBatchEdit(); + if (mAfterVoiceInput) { // Don't log delete if the user is pressing delete at // the beginning of the text box (hence not deleting anything) @@ -1054,6 +1122,7 @@ public class LatinIME extends InputMethodService TextEntryState.backspace(); if (TextEntryState.getState() == TextEntryState.STATE_UNDO_COMMIT) { revertLastWord(deleteChar); + ic.endBatchEdit(); return; } else if (mEnteredText != null && sameAsTextBeforeCursor(ic, mEnteredText)) { ic.deleteSurroundingText(mEnteredText.length(), 0); @@ -1064,6 +1133,7 @@ public class LatinIME extends InputMethodService } } mJustRevertedSeparator = null; + ic.endBatchEdit(); } private void handleShift() { @@ -1077,6 +1147,13 @@ public class LatinIME extends InputMethodService } } + private void abortCorrection(boolean force) { + if (force || TextEntryState.isCorrecting()) { + getCurrentInputConnection().finishComposingText(); + setSuggestions(null, false, false, false); + } + } + private void handleCharacter(int primaryCode, int[] keyCodes) { if (VOICE_INSTALLED && mVoiceInputHighlighted) { commitVoiceInput(); @@ -1086,11 +1163,13 @@ public class LatinIME extends InputMethodService // Assume input length is 1. This assumption fails for smiley face insertions. mVoiceInput.incrementTextModificationInsertCount(1); } + abortCorrection(false); if (isAlphabet(primaryCode) && isPredictionOn() && !isCursorTouchingWord()) { if (!mPredicting) { mPredicting = true; mComposing.setLength(0); + saveWordInHistory(mBestWord); mWord.reset(); } } @@ -1122,7 +1201,7 @@ public class LatinIME extends InputMethodService sendKeyChar((char)primaryCode); } updateShiftKeyState(getCurrentInputEditorInfo()); - measureCps(); + if (LatinIME.PERF_DEBUG) measureCps(); TextEntryState.typedCharacter((char) primaryCode, isWordSeparator(primaryCode)); } @@ -1141,6 +1220,7 @@ public class LatinIME extends InputMethodService InputConnection ic = getCurrentInputConnection(); if (ic != null) { ic.beginBatchEdit(); + abortCorrection(false); } if (mPredicting) { // In certain languages where single quote is a separator, it's better @@ -1180,7 +1260,6 @@ public class LatinIME extends InputMethodService && primaryCode != KEYCODE_ENTER) { swapPunctuationAndSpace(); } else if (isPredictionOn() && primaryCode == KEYCODE_SPACE) { - //else if (TextEntryState.STATE_SPACE_AFTER_ACCEPTED) { doubleSpace(); } if (pickedDefault && mBestWord != null) { @@ -1202,6 +1281,18 @@ public class LatinIME extends InputMethodService TextEntryState.endSession(); } + private void saveWordInHistory(CharSequence result) { + if (mWord.size() <= 1) { + mWord.reset(); + return; + } + // Make a copy of the CharSequence, since it is/could be a mutable CharSequence + final String resultCopy = result.toString(); + TypedWordAlternatives entry = new TypedWordAlternatives(resultCopy, + new WordComposer(mWord)); + mWordHistory.add(entry); + } + private void checkToggleCapsLock() { if (mInputView.getKeyboard().isShifted()) { toggleCapsLock(); @@ -1220,6 +1311,11 @@ public class LatinIME extends InputMethodService mHandler.sendMessageDelayed(mHandler.obtainMessage(MSG_UPDATE_SUGGESTIONS), 100); } + private void postUpdateOldSuggestions() { + mHandler.removeMessages(MSG_UPDATE_OLD_SUGGESTIONS); + mHandler.sendMessageDelayed(mHandler.obtainMessage(MSG_UPDATE_OLD_SUGGESTIONS), 300); + } + private boolean isPredictionOn() { boolean predictionOn = mPredictionOn; return predictionOn; @@ -1403,9 +1499,6 @@ public class LatinIME extends InputMethodService // Show N-Best alternates, if there is more than one choice. if (nBest.size() > 1) { mImmediatelyAfterVoiceSuggestions = true; - mShowingVoiceSuggestions = true; - setSuggestions(nBest.subList(1, nBest.size()), false, true, true); - setCandidatesViewShown(true); } mVoiceInputHighlighted = true; mWordToSuggestions.putAll(mVoiceResults.alternatives); @@ -1444,24 +1537,52 @@ public class LatinIME extends InputMethodService setNextSuggestions(); return; } + showSuggestions(mWord); + } + + private List<CharSequence> getTypedSuggestions(WordComposer word) { + List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, word, false, null); + return stringList; + } + + private void showCorrections(WordAlternatives alternatives) { + List<CharSequence> stringList = alternatives.getAlternatives(); + ((LatinKeyboard) mInputView.getKeyboard()).setPreferredLetters(null); + showSuggestions(stringList, alternatives.getOriginalWord(), false, false); + } + + private void showSuggestions(WordComposer word) { + //long startTime = System.currentTimeMillis(); // TIME MEASUREMENT! + // TODO Maybe need better way of retrieving previous word + CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection()); + List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, word, false, + prevWord); + //long stopTime = System.currentTimeMillis(); // TIME MEASUREMENT! + //Log.d("LatinIME","Suggest Total Time - " + (stopTime - startTime)); - List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, mWord, false); int[] nextLettersFrequencies = mSuggest.getNextLettersFrequencies(); ((LatinKeyboard) mInputView.getKeyboard()).setPreferredLetters(nextLettersFrequencies); boolean correctionAvailable = !mInputTypeNoAutoCorrect && mSuggest.hasMinimalCorrection(); //|| mCorrectionMode == mSuggest.CORRECTION_FULL; - CharSequence typedWord = mWord.getTypedWord(); + CharSequence typedWord = word.getTypedWord(); // If we're in basic correct boolean typedWordValid = mSuggest.isValidWord(typedWord) || - (preferCapitalization() && mSuggest.isValidWord(typedWord.toString().toLowerCase())); + (preferCapitalization() + && mSuggest.isValidWord(typedWord.toString().toLowerCase())); if (mCorrectionMode == Suggest.CORRECTION_FULL) { correctionAvailable |= typedWordValid; } // Don't auto-correct words with multiple capital letter - correctionAvailable &= !mWord.isMostlyCaps(); + correctionAvailable &= !word.isMostlyCaps(); + correctionAvailable &= !TextEntryState.isCorrecting(); + + showSuggestions(stringList, typedWord, typedWordValid, correctionAvailable); + } + private void showSuggestions(List<CharSequence> stringList, CharSequence typedWord, + boolean typedWordValid, boolean correctionAvailable) { setSuggestions(stringList, false, typedWordValid, correctionAvailable); if (stringList.size() > 0) { if (correctionAvailable && !typedWordValid && stringList.size() > 1) { @@ -1484,7 +1605,7 @@ public class LatinIME extends InputMethodService if (mBestWord != null && mBestWord.length() > 0) { TextEntryState.acceptedDefault(mWord.getTypedWord(), mBestWord); mJustAccepted = true; - pickSuggestion(mBestWord); + pickSuggestion(mBestWord, false); // Add the word to the auto dictionary if it's not a known word checkAddToDictionary(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED); } @@ -1499,6 +1620,7 @@ public class LatinIME extends InputMethodService mVoiceInput.logTextModifiedByChooseSuggestion(suggestion.length()); } + final boolean correcting = TextEntryState.isCorrecting(); InputConnection ic = getCurrentInputConnection(); if (ic != null) { ic.beginBatchEdit(); @@ -1529,20 +1651,22 @@ public class LatinIME extends InputMethodService return; } mJustAccepted = true; - pickSuggestion(suggestion); + pickSuggestion(suggestion, correcting); // Add the word to the auto dictionary if it's not a known word if (index == 0) { checkAddToDictionary(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED); } TextEntryState.acceptedSuggestion(mComposing.toString(), suggestion); // Follow it with a space - if (mAutoSpace) { + if (mAutoSpace && !correcting) { sendSpace(); mJustAddedAutoSpace = true; } + // Fool the state watcher so that a subsequent backspace will not do a revert TextEntryState.typedCharacter((char) KEYCODE_SPACE, true); - if (index == 0 && mCorrectionMode > 0 && !mSuggest.isValidWord(suggestion)) { + if (index == 0 && mCorrectionMode > 0 && !mSuggest.isValidWord(suggestion) + && !mSuggest.isValidWord(suggestion.toString().toLowerCase())) { mCandidateView.showAddToDictionaryHint(suggestion); } if (ic != null) { @@ -1550,7 +1674,38 @@ public class LatinIME extends InputMethodService } } - private void pickSuggestion(CharSequence suggestion) { + private void rememberReplacedWord(CharSequence suggestion) { + if (mShowingVoiceSuggestions) { + // Retain the replaced word in the alternatives array. + InputConnection ic = getCurrentInputConnection(); + EditingUtil.Range range = new EditingUtil.Range(); + String wordToBeReplaced = EditingUtil.getWordAtCursor(getCurrentInputConnection(), + mWordSeparators, range); + if (!mWordToSuggestions.containsKey(wordToBeReplaced)) { + wordToBeReplaced = wordToBeReplaced.toLowerCase(); + } + if (mWordToSuggestions.containsKey(wordToBeReplaced)) { + List<CharSequence> suggestions = mWordToSuggestions.get(wordToBeReplaced); + if (suggestions.contains(suggestion)) { + suggestions.remove(suggestion); + } + suggestions.add(wordToBeReplaced); + mWordToSuggestions.remove(wordToBeReplaced); + mWordToSuggestions.put(suggestion.toString(), suggestions); + } + } + // TODO: implement rememberReplacedWord for typed words + } + + /** + * Commits the chosen word to the text field and saves it for later + * retrieval. + * @param suggestion the suggestion picked by the user to be committed to + * the text field + * @param correcting whether this is due to a correction of an existing + * word. + */ + private void pickSuggestion(CharSequence suggestion, boolean correcting) { if (mCapsLock) { suggestion = suggestion.toString().toUpperCase(); } else if (preferCapitalization() @@ -1560,13 +1715,20 @@ public class LatinIME extends InputMethodService } InputConnection ic = getCurrentInputConnection(); if (ic != null) { - if (mSuggestionShouldReplaceCurrentWord) { + rememberReplacedWord(suggestion); + // If text is in correction mode and we're not using composing + // text to underline, then the word at the cursor position needs + // to be removed before committing the correction + if (correcting && !MODIFY_TEXT_FOR_CORRECTION) { + if (mLastSelectionStart < mLastSelectionEnd) { + ic.setSelection(mLastSelectionStart, mLastSelectionStart); + } EditingUtil.deleteWordAtCursor(ic, getWordSeparators()); } - if (!VoiceInput.DELETE_SYMBOL.equals(suggestion)) { - ic.commitText(suggestion, 1); - } + + ic.commitText(suggestion, 1); } + saveWordInHistory(suggestion); mPredicting = false; mCommittedLength = suggestion.length(); ((LatinKeyboard) mInputView.getKeyboard()).setPreferredLetters(null); @@ -1574,16 +1736,130 @@ public class LatinIME extends InputMethodService updateShiftKeyState(getCurrentInputEditorInfo()); } + private void setOldSuggestions() { + // TODO: Inefficient to check if touching word and then get the touching word. Do it + // in one go. + mShowingVoiceSuggestions = false; + InputConnection ic = getCurrentInputConnection(); + if (ic == null) return; + ic.beginBatchEdit(); + // If there is a selection, then undo the selection first. Unfortunately this causes + // a flicker. TODO: Add getSelectionText() to InputConnection API. + if (mLastSelectionStart < mLastSelectionEnd) { + ic.setSelection(mLastSelectionStart, mLastSelectionStart); + } + if (!mPredicting && isCursorTouchingWord()) { + EditingUtil.Range range = new EditingUtil.Range(); + CharSequence touching = EditingUtil.getWordAtCursor(getCurrentInputConnection(), + mWordSeparators, range); + if (touching != null && touching.length() > 1) { + if (mWordSeparators.indexOf(touching.charAt(touching.length() - 1)) > 0) { + touching = touching.toString().substring(0, touching.length() - 1); + } + + // Search for result in spoken word alternatives + // TODO: possibly combine the spoken suggestions with the typed suggestions. + String selectedWord = touching.toString().trim(); + if (!mWordToSuggestions.containsKey(selectedWord)){ + selectedWord = selectedWord.toLowerCase(); + } + if (mWordToSuggestions.containsKey(selectedWord)){ + mShowingVoiceSuggestions = true; + mSuggestionShouldReplaceCurrentWord = true; + underlineWord(touching, range.charsBefore, range.charsAfter); + List<CharSequence> suggestions = mWordToSuggestions.get(selectedWord); + // If the first letter of touching is capitalized, make all the suggestions + // start with a capital letter. + if (Character.isUpperCase((char) touching.charAt(0))) { + for (int i=0; i< suggestions.size(); i++) { + String origSugg = (String) suggestions.get(i); + String capsSugg = origSugg.toUpperCase().charAt(0) + + origSugg.subSequence(1, origSugg.length()).toString(); + suggestions.set(i,capsSugg); + } + } + setSuggestions(suggestions, false, true, true); + setCandidatesViewShown(true); + TextEntryState.selectedForCorrection(); + ic.endBatchEdit(); + return; + } + // If we didn't find a match, search for result in word history + WordComposer foundWord = null; + WordAlternatives alternatives = null; + for (WordAlternatives entry : mWordHistory) { + if (TextUtils.equals(entry.getChosenWord(), touching)) { + if (entry instanceof TypedWordAlternatives) { + foundWord = ((TypedWordAlternatives)entry).word; + } + alternatives = entry; + break; + } + } + // If we didn't find a match, at least suggest completions + if (foundWord == null && mSuggest.isValidWord(touching)) { + foundWord = new WordComposer(); + for (int i = 0; i < touching.length(); i++) { + foundWord.add(touching.charAt(i), new int[] { touching.charAt(i) }); + } + } + // Found a match, show suggestions + if (foundWord != null || alternatives != null) { + mSuggestionShouldReplaceCurrentWord = true; + underlineWord(touching, range.charsBefore, range.charsAfter); + TextEntryState.selectedForCorrection(); + if (alternatives == null) alternatives = new TypedWordAlternatives(touching, + foundWord); + showCorrections(alternatives); + if (foundWord != null) { + mWord = new WordComposer(foundWord); + } else { + mWord.reset(); + } + // Revert the selection + if (mLastSelectionStart < mLastSelectionEnd) { + ic.setSelection(mLastSelectionStart, mLastSelectionEnd); + } + ic.endBatchEdit(); + return; + } + abortCorrection(true); + } else { + abortCorrection(true); + setNextSuggestions(); + } + } else { + abortCorrection(true); + } + // Revert the selection + if (mLastSelectionStart < mLastSelectionEnd) { + ic.setSelection(mLastSelectionStart, mLastSelectionEnd); + } + ic.endBatchEdit(); + } + private void setNextSuggestions() { setSuggestions(mSuggestPuncList, false, false, false); } + private void underlineWord(CharSequence word, int left, int right) { + InputConnection ic = getCurrentInputConnection(); + if (ic == null) return; + if (MODIFY_TEXT_FOR_CORRECTION) { + ic.finishComposingText(); + ic.deleteSurroundingText(left, right); + ic.setComposingText(word, 1); + } + ic.setSelection(mLastSelectionStart, mLastSelectionStart); + } + private void checkAddToDictionary(CharSequence suggestion, int frequencyDelta) { + if (suggestion == null || suggestion.length() < 1) return; // Only auto-add to dictionary if auto-correct is ON. Otherwise we'll be // adding words in situations where the user or application really didn't // want corrections enabled or learned. if (!(mCorrectionMode == Suggest.CORRECTION_FULL)) return; - if (mAutoDictionary.isValidWord(suggestion) + if (suggestion != null && mAutoDictionary.isValidWord(suggestion) || (!mSuggest.isValidWord(suggestion.toString()) && !mSuggest.isValidWord(suggestion.toString().toLowerCase()))) { mAutoDictionary.addWord(suggestion.toString(), frequencyDelta); @@ -1616,7 +1892,6 @@ public class LatinIME extends InputMethodService if (!mPredicting && length > 0) { final InputConnection ic = getCurrentInputConnection(); mPredicting = true; - ic.beginBatchEdit(); mJustRevertedSeparator = ic.getTextBeforeCursor(1, 0); if (deleteChar) ic.deleteSurroundingText(1, 0); int toDelete = mCommittedLength; @@ -1628,7 +1903,6 @@ public class LatinIME extends InputMethodService ic.deleteSurroundingText(toDelete, 0); ic.setComposingText(mComposing, 1); TextEntryState.backspace(); - ic.endBatchEdit(); postUpdateSuggestions(); } else { sendDownUpKeyEvents(KeyEvent.KEYCODE_DEL); @@ -1842,6 +2116,8 @@ public class LatinIME extends InputMethodService mCorrectionMode = (mAutoCorrectOn && mAutoCorrectEnabled) ? Suggest.CORRECTION_FULL : (mAutoCorrectOn ? Suggest.CORRECTION_BASIC : Suggest.CORRECTION_NONE); + mCorrectionMode = (mBigramSuggestionEnabled && mAutoCorrectOn && mAutoCorrectEnabled) + ? Suggest.CORRECTION_FULL_BIGRAM : mCorrectionMode; if (mSuggest != null) { mSuggest.setCorrectionMode(mCorrectionMode); } @@ -1858,7 +2134,7 @@ public class LatinIME extends InputMethodService launchSettings(LatinIMESettings.class); } - protected void launchSettings(Class settingsClass) { + protected void launchSettings(Class<LatinIMESettings> settingsClass) { handleClose(); Intent intent = new Intent(); intent.setClass(LatinIME.this, settingsClass); @@ -1908,6 +2184,7 @@ public class LatinIME extends InputMethodService } mAutoCorrectEnabled = sp.getBoolean(PREF_AUTO_COMPLETE, mResources.getBoolean(R.bool.enable_autocorrect)) & mShowSuggestions; + mBigramSuggestionEnabled = sp.getBoolean(PREF_BIGRAM_SUGGESTIONS, true) & mShowSuggestions; updateCorrectionMode(); updateAutoTextEnabled(mResources.getConfiguration().locale); mLanguageSwitcher.loadLocales(sp); @@ -1995,15 +2272,12 @@ public class LatinIME extends InputMethodService // Characters per second measurement - private static final boolean PERF_DEBUG = false; private long mLastCpsTime; private static final int CPS_BUFFER_SIZE = 16; private long[] mCpsIntervals = new long[CPS_BUFFER_SIZE]; private int mCpsIndex; - private boolean mInputTypeNoAutoCorrect; private void measureCps() { - if (!LatinIME.PERF_DEBUG) return; long now = System.currentTimeMillis(); if (mLastCpsTime == 0) mLastCpsTime = now - 100; // Initial mCpsIntervals[mCpsIndex] = now - mLastCpsTime; diff --git a/java/src/com/android/inputmethod/latin/LatinKeyboard.java b/java/src/com/android/inputmethod/latin/LatinKeyboard.java index 6aea5d13a..ea6b74e1b 100644 --- a/java/src/com/android/inputmethod/latin/LatinKeyboard.java +++ b/java/src/com/android/inputmethod/latin/LatinKeyboard.java @@ -47,7 +47,6 @@ public class LatinKeyboard extends Keyboard { private Drawable mShiftLockIcon; private Drawable mShiftLockPreviewIcon; private Drawable mOldShiftIcon; - private Drawable mOldShiftPreviewIcon; private Drawable mSpaceIcon; private Drawable mSpacePreviewIcon; private Drawable mMicIcon; @@ -68,7 +67,6 @@ public class LatinKeyboard extends Keyboard { private LanguageSwitcher mLanguageSwitcher; private Resources mRes; private Context mContext; - private int mMode; // Whether this keyboard has voice icon on it private boolean mHasVoiceButton; // Whether voice icon is enabled at all @@ -77,9 +75,7 @@ public class LatinKeyboard extends Keyboard { private CharSequence m123Label; private boolean mCurrentlyInSpace; private SlidingLocaleDrawable mSlidingLocaleIcon; - private Rect mBounds = new Rect(); private int[] mPrefLetterFrequencies; - private boolean mPreemptiveCorrection; private int mPrefLetter; private int mPrefLetterX; private int mPrefLetterY; @@ -107,7 +103,6 @@ public class LatinKeyboard extends Keyboard { super(context, xmlLayoutResId, mode); final Resources res = context.getResources(); mContext = context; - mMode = mode; mRes = res; mShiftLockIcon = res.getDrawable(R.drawable.sym_keyboard_shift_locked); mShiftLockPreviewIcon = res.getDrawable(R.drawable.sym_keyboard_feedback_shift_locked); @@ -224,7 +219,6 @@ public class LatinKeyboard extends Keyboard { ((LatinKey)mShiftKey).enableShiftLock(); } mOldShiftIcon = mShiftKey.icon; - mOldShiftPreviewIcon = mShiftKey.iconPreview; } } @@ -338,21 +332,19 @@ public class LatinKeyboard extends Keyboard { } private void drawSpaceBar(Canvas canvas, int width, int height, int opacity) { - canvas.drawColor(0x00000000, PorterDuff.Mode.CLEAR); + canvas.drawColor(mRes.getColor(R.color.latinkeyboard_transparent), PorterDuff.Mode.CLEAR); Paint paint = new Paint(); paint.setAntiAlias(true); paint.setAlpha(opacity); // Get the text size from the theme paint.setTextSize(getTextSizeFromTheme(android.R.style.TextAppearance_Small, 14)); paint.setTextAlign(Align.CENTER); - //// Draw a drop shadow for the text - //paint.setShadowLayer(2f, 0, 0, 0xFF000000); final String language = getInputLanguage(mSpaceKey.width, paint); final int ascent = (int) -paint.ascent(); - paint.setColor(0x80000000); + paint.setColor(mRes.getColor(R.color.latinkeyboard_bar_language_shadow)); canvas.drawText(language, width / 2, ascent - 1, paint); - paint.setColor(0xFF808080); + paint.setColor(mRes.getColor(R.color.latinkeyboard_bar_language_text)); canvas.drawText(language, width / 2, ascent, paint); // Put arrows on either side of the text @@ -503,9 +495,10 @@ public class LatinKeyboard extends Keyboard { // Handle preferred next letter final int[] pref = mPrefLetterFrequencies; if (mPrefLetter > 0) { - if (DEBUG_PREFERRED_LETTER && mPrefLetter == code - && !key.isInsideSuper(x, y)) { - Log.d(TAG, "CORRECTED !!!!!!"); + if (DEBUG_PREFERRED_LETTER) { + if (mPrefLetter == code && !key.isInsideSuper(x, y)) { + Log.d(TAG, "CORRECTED !!!!!!"); + } } return mPrefLetter == code; } else { @@ -718,7 +711,7 @@ public class LatinKeyboard extends Keyboard { public void draw(Canvas canvas) { canvas.save(); if (mHitThreshold) { - mTextPaint.setColor(0xFF000000); + mTextPaint.setColor(mRes.getColor(R.color.latinkeyboard_text_color)); canvas.clipRect(0, 0, mWidth, mHeight); if (mCurrentLanguage == null) { mCurrentLanguage = getInputLanguage(mWidth, mTextPaint); diff --git a/java/src/com/android/inputmethod/latin/LatinKeyboardView.java b/java/src/com/android/inputmethod/latin/LatinKeyboardView.java index 74fc475e6..323f4bf6b 100644 --- a/java/src/com/android/inputmethod/latin/LatinKeyboardView.java +++ b/java/src/com/android/inputmethod/latin/LatinKeyboardView.java @@ -23,7 +23,6 @@ import android.graphics.Canvas; import android.graphics.Paint; import android.inputmethodservice.Keyboard; import android.inputmethodservice.KeyboardView; -import android.inputmethodservice.KeyboardView.OnKeyboardActionListener; import android.inputmethodservice.Keyboard.Key; import android.os.Handler; import android.os.Message; @@ -452,27 +451,30 @@ public class LatinKeyboardView extends KeyboardView { } } } - - void startPlaying(String s) { - if (!DEBUG_AUTO_PLAY) return; - if (s == null) return; - mStringToPlay = s.toLowerCase(); - mPlaying = true; - mDownDelivered = false; - mStringIndex = 0; - mHandler2.sendEmptyMessageDelayed(MSG_TOUCH_DOWN, 10); + + public void startPlaying(String s) { + if (DEBUG_AUTO_PLAY) { + if (s == null) return; + mStringToPlay = s.toLowerCase(); + mPlaying = true; + mDownDelivered = false; + mStringIndex = 0; + mHandler2.sendEmptyMessageDelayed(MSG_TOUCH_DOWN, 10); + } } @Override public void draw(Canvas c) { super.draw(c); - if (DEBUG_AUTO_PLAY && mPlaying) { - mHandler2.removeMessages(MSG_TOUCH_DOWN); - mHandler2.removeMessages(MSG_TOUCH_UP); - if (mDownDelivered) { - mHandler2.sendEmptyMessageDelayed(MSG_TOUCH_UP, 20); - } else { - mHandler2.sendEmptyMessageDelayed(MSG_TOUCH_DOWN, 20); + if (DEBUG_AUTO_PLAY) { + if (mPlaying) { + mHandler2.removeMessages(MSG_TOUCH_DOWN); + mHandler2.removeMessages(MSG_TOUCH_UP); + if (mDownDelivered) { + mHandler2.sendEmptyMessageDelayed(MSG_TOUCH_UP, 20); + } else { + mHandler2.sendEmptyMessageDelayed(MSG_TOUCH_DOWN, 20); + } } } if (DEBUG_LINE) { diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index a70bea003..3e6090c72 100755 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -16,18 +16,17 @@ package com.android.inputmethod.latin; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + import android.content.Context; import android.text.AutoText; import android.text.TextUtils; import android.util.Log; import android.view.View; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import com.android.inputmethod.latin.WordComposer; - /** * This class loads a dictionary and provides a list of suggestions for a given sequence of * characters. This includes corrections and completions. @@ -38,6 +37,21 @@ public class Suggest implements Dictionary.WordCallback { public static final int CORRECTION_NONE = 0; public static final int CORRECTION_BASIC = 1; public static final int CORRECTION_FULL = 2; + public static final int CORRECTION_FULL_BIGRAM = 3; + + /** + * Words that appear in both bigram and unigram data gets multiplier ranging from + * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from + * bigram data. + */ + public static final double BIGRAM_MULTIPLIER_MIN = 1.2; + public static final double BIGRAM_MULTIPLIER_MAX = 1.5; + + /** + * Maximum possible bigram frequency. Will depend on how many bits are being used in data + * structure. Maximum bigram freqeuncy will get the BIGRAM_MULTIPLIER_MAX as the multiplier. + */ + public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000; @@ -50,10 +64,13 @@ public class Suggest implements Dictionary.WordCallback { private Dictionary mContactsDictionary; private int mPrefMaxSuggestions = 12; + private int mPrefMaxBigrams = 255; private boolean mAutoTextEnabled; private int[] mPriorities = new int[mPrefMaxSuggestions]; + private int[] mBigramPriorities = new int[mPrefMaxBigrams]; + // Handle predictive correction for only the first 1280 characters for performance reasons // If we support scripts that need latin characters beyond that, we should probably use some // kind of a sparse array or language specific list with a mapping lookup table. @@ -61,6 +78,7 @@ public class Suggest implements Dictionary.WordCallback { // latin characters. private int[] mNextLettersFrequencies = new int[1280]; private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); + private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>(); private boolean mHaveCorrection; private CharSequence mOriginalWord; @@ -69,11 +87,19 @@ public class Suggest implements Dictionary.WordCallback { private int mCorrectionMode = CORRECTION_BASIC; - public Suggest(Context context, int dictionaryResId) { mMainDict = new BinaryDictionary(context, dictionaryResId); + initPool(); + } + + public Suggest(Context context, ByteBuffer byteBuffer) { + mMainDict = new BinaryDictionary(context, byteBuffer); + initPool(); + } + + private void initPool() { for (int i = 0; i < mPrefMaxSuggestions; i++) { - StringBuilder sb = new StringBuilder(32); + StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH); mStringPool.add(sb); } } @@ -125,9 +151,10 @@ public class Suggest implements Dictionary.WordCallback { } mPrefMaxSuggestions = maxSuggestions; mPriorities = new int[mPrefMaxSuggestions]; - collectGarbage(); + mBigramPriorities = new int[mPrefMaxBigrams]; + collectGarbage(mSuggestions, mPrefMaxSuggestions); while (mStringPool.size() < mPrefMaxSuggestions) { - StringBuilder sb = new StringBuilder(32); + StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH); mStringPool.add(sb); } } @@ -162,17 +189,16 @@ public class Suggest implements Dictionary.WordCallback { /** * Returns a list of words that match the list of character codes passed in. * This list will be overwritten the next time this function is called. - * @param a view for retrieving the context for AutoText - * @param codes the list of codes. Each list item contains an array of character codes - * in order of probability where the character at index 0 in the array has the highest - * probability. + * @param view a view for retrieving the context for AutoText + * @param wordComposer contains what is currently being typed + * @param prevWordForBigram previous word (used only for bigram) * @return list of suggestions. */ public List<CharSequence> getSuggestions(View view, WordComposer wordComposer, - boolean includeTypedWordIfValid) { + boolean includeTypedWordIfValid, CharSequence prevWordForBigram) { mHaveCorrection = false; mCapitalize = wordComposer.isCapitalized(); - collectGarbage(); + collectGarbage(mSuggestions, mPrefMaxSuggestions); Arrays.fill(mPriorities, 0); Arrays.fill(mNextLettersFrequencies, 0); @@ -184,8 +210,39 @@ public class Suggest implements Dictionary.WordCallback { } else { mLowerOriginalWord = ""; } - // Search the dictionary only if there are at least 2 characters - if (wordComposer.size() > 1) { + + if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM + || mCorrectionMode == CORRECTION_BASIC)) { + // At first character, just get the bigrams + Arrays.fill(mBigramPriorities, 0); + collectGarbage(mBigramSuggestions, mPrefMaxBigrams); + + if (!TextUtils.isEmpty(prevWordForBigram)) { + CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); + if (mMainDict.isValidWord(lowerPrevWord)) { + prevWordForBigram = lowerPrevWord; + } + mMainDict.getBigrams(wordComposer, prevWordForBigram, this, + mNextLettersFrequencies); + char currentChar = wordComposer.getTypedWord().charAt(0); + int count = 0; + int bigramSuggestionSize = mBigramSuggestions.size(); + for (int i = 0; i < bigramSuggestionSize; i++) { + if (mBigramSuggestions.get(i).charAt(0) == currentChar) { + int poolSize = mStringPool.size(); + StringBuilder sb = poolSize > 0 ? + (StringBuilder) mStringPool.remove(poolSize - 1) + : new StringBuilder(Dictionary.MAX_WORD_LENGTH); + sb.setLength(0); + sb.append(mBigramSuggestions.get(i)); + mSuggestions.add(count++, sb); + if (count > mPrefMaxSuggestions) break; + } + } + } + + } else if (wordComposer.size() > 1) { + // Search the dictionary only if there are at least 2 characters if (mUserDictionary != null || mContactsDictionary != null) { if (mUserDictionary != null) { mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies); @@ -195,21 +252,26 @@ public class Suggest implements Dictionary.WordCallback { } if (mSuggestions.size() > 0 && isValidWord(mOriginalWord) - && mCorrectionMode == CORRECTION_FULL) { + && (mCorrectionMode == CORRECTION_FULL + || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { mHaveCorrection = true; } } mMainDict.getWords(wordComposer, this, mNextLettersFrequencies); - if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 0) { + if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM) + && mSuggestions.size() > 0) { mHaveCorrection = true; } } + if (mOriginalWord != null) { mSuggestions.add(0, mOriginalWord.toString()); } - + // Check if the first suggestion has a minimum number of characters in common - if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 1) { + if (wordComposer.size() > 1 && mSuggestions.size() > 1 + && (mCorrectionMode == CORRECTION_FULL + || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) { mHaveCorrection = false; } @@ -240,7 +302,6 @@ public class Suggest implements Dictionary.WordCallback { i++; } } - removeDupes(); return mSuggestions; } @@ -294,20 +355,50 @@ public class Suggest implements Dictionary.WordCallback { return false; } - public boolean addWord(final char[] word, final int offset, final int length, final int freq) { + public boolean addWord(final char[] word, final int offset, final int length, int freq, + final Dictionary.DataType dataType) { + ArrayList<CharSequence> suggestions; + int[] priorities; + int prefMaxSuggestions; + if(dataType == Dictionary.DataType.BIGRAM) { + suggestions = mBigramSuggestions; + priorities = mBigramPriorities; + prefMaxSuggestions = mPrefMaxBigrams; + } else { + suggestions = mSuggestions; + priorities = mPriorities; + prefMaxSuggestions = mPrefMaxSuggestions; + } + int pos = 0; - final int[] priorities = mPriorities; - final int prefMaxSuggestions = mPrefMaxSuggestions; + // Check if it's the same word, only caps are different if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) { pos = 0; } else { + if (dataType == Dictionary.DataType.UNIGRAM) { + // Check if the word was already added before (by bigram data) + int bigramSuggestion = searchBigramSuggestion(word,offset,length); + if(bigramSuggestion >= 0) { + // turn freq from bigram into multiplier specified above + double multiplier = (((double) mBigramPriorities[bigramSuggestion]) + / MAXIMUM_BIGRAM_FREQUENCY) + * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) + + BIGRAM_MULTIPLIER_MIN; + /* Log.d("Suggest","bigram num: " + bigramSuggestion + + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() + + " currentPriority: " + freq + " bigramPriority: " + + mBigramPriorities[bigramSuggestion] + + " multiplier: " + multiplier); */ + freq = (int)Math.round((freq * multiplier)); + } + } + // Check the last one's priority and bail if (priorities[prefMaxSuggestions - 1] >= freq) return true; while (pos < prefMaxSuggestions) { if (priorities[pos] < freq - || (priorities[pos] == freq && length < mSuggestions - .get(pos).length())) { + || (priorities[pos] == freq && length < suggestions.get(pos).length())) { break; } pos++; @@ -317,12 +408,13 @@ public class Suggest implements Dictionary.WordCallback { if (pos >= prefMaxSuggestions) { return true; } + System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1); priorities[pos] = freq; int poolSize = mStringPool.size(); StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) - : new StringBuilder(32); + : new StringBuilder(Dictionary.MAX_WORD_LENGTH); sb.setLength(0); if (mCapitalize) { sb.append(Character.toUpperCase(word[offset])); @@ -332,9 +424,9 @@ public class Suggest implements Dictionary.WordCallback { } else { sb.append(word, offset, length); } - mSuggestions.add(pos, sb); - if (mSuggestions.size() > prefMaxSuggestions) { - CharSequence garbage = mSuggestions.remove(prefMaxSuggestions); + suggestions.add(pos, sb); + if (suggestions.size() > prefMaxSuggestions) { + CharSequence garbage = suggestions.remove(prefMaxSuggestions); if (garbage instanceof StringBuilder) { mStringPool.add(garbage); } @@ -342,6 +434,26 @@ public class Suggest implements Dictionary.WordCallback { return true; } + private int searchBigramSuggestion(final char[] word, final int offset, final int length) { + // TODO This is almost O(n^2). Might need fix. + // search whether the word appeared in bigram data + int bigramSuggestSize = mBigramSuggestions.size(); + for(int i = 0; i < bigramSuggestSize; i++) { + if(mBigramSuggestions.get(i).length() == length) { + boolean chk = true; + for(int j = 0; j < length; j++) { + if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { + chk = false; + break; + } + } + if(chk) return i; + } + } + + return -1; + } + public boolean isValidWord(final CharSequence word) { if (word == null || word.length() == 0) { return false; @@ -352,21 +464,21 @@ public class Suggest implements Dictionary.WordCallback { || (mContactsDictionary != null && mContactsDictionary.isValidWord(word)); } - private void collectGarbage() { + private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) { int poolSize = mStringPool.size(); - int garbageSize = mSuggestions.size(); - while (poolSize < mPrefMaxSuggestions && garbageSize > 0) { - CharSequence garbage = mSuggestions.get(garbageSize - 1); + int garbageSize = suggestions.size(); + while (poolSize < prefMaxSuggestions && garbageSize > 0) { + CharSequence garbage = suggestions.get(garbageSize - 1); if (garbage != null && garbage instanceof StringBuilder) { mStringPool.add(garbage); poolSize++; } garbageSize--; } - if (poolSize == mPrefMaxSuggestions + 1) { + if (poolSize == prefMaxSuggestions + 1) { Log.w("Suggest", "String pool got too big: " + poolSize); } - mSuggestions.clear(); + suggestions.clear(); } public void close() { diff --git a/java/src/com/android/inputmethod/latin/TextEntryState.java b/java/src/com/android/inputmethod/latin/TextEntryState.java index d056ceb16..8fd9b7129 100644 --- a/java/src/com/android/inputmethod/latin/TextEntryState.java +++ b/java/src/com/android/inputmethod/latin/TextEntryState.java @@ -69,9 +69,11 @@ public class TextEntryState { public static final int STATE_SPACE_AFTER_ACCEPTED = 7; public static final int STATE_SPACE_AFTER_PICKED = 8; public static final int STATE_UNDO_COMMIT = 9; - + public static final int STATE_CORRECTING = 10; + public static final int STATE_PICKED_CORRECTION = 11; + private static int sState = STATE_UNKNOWN; - + private static FileOutputStream sKeyLocationFile; private static FileOutputStream sUserActionFile; @@ -139,12 +141,17 @@ public class TextEntryState { public static void acceptedSuggestion(CharSequence typedWord, CharSequence actualWord) { sManualSuggestCount++; + int oldState = sState; if (typedWord.equals(actualWord)) { acceptedTyped(typedWord); } - sState = STATE_PICKED_SUGGESTION; + sState = oldState == STATE_CORRECTING ? STATE_PICKED_CORRECTION : STATE_PICKED_SUGGESTION; } - + + public static void selectedForCorrection() { + sState = STATE_CORRECTING; + } + public static void typedCharacter(char c, boolean isSeparator) { boolean isSpace = c == ' '; switch (sState) { @@ -166,6 +173,7 @@ public class TextEntryState { } break; case STATE_PICKED_SUGGESTION: + case STATE_PICKED_CORRECTION: if (isSpace) { sState = STATE_SPACE_AFTER_PICKED; } else if (isSeparator) { @@ -192,6 +200,10 @@ public class TextEntryState { } else { sState = STATE_IN_WORD; } + break; + case STATE_CORRECTING: + sState = STATE_START; + break; } } @@ -212,7 +224,11 @@ public class TextEntryState { public static int getState() { return sState; } - + + public static boolean isCorrecting() { + return sState == STATE_CORRECTING || sState == STATE_PICKED_CORRECTION; + } + public static void keyPressedAt(Key key, int x, int y) { if (LOGGING && sKeyLocationFile != null && key.codes[0] >= 32) { String out = diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index 19f714ae7..1ea74847a 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -44,11 +44,20 @@ public class WordComposer { */ private boolean mIsCapitalized; - WordComposer() { + public WordComposer() { mCodes = new ArrayList<int[]>(12); mTypedWord = new StringBuilder(20); } + WordComposer(WordComposer copy) { + mCodes = (ArrayList<int[]>) copy.mCodes.clone(); + mPreferredWord = copy.mPreferredWord; + mTypedWord = new StringBuilder(copy.mTypedWord); + mCapsCount = copy.mCapsCount; + mAutoCapitalized = copy.mAutoCapitalized; + mIsCapitalized = copy.mIsCapitalized; + } + /** * Clear out the keys registered so far. */ diff --git a/java/src/com/android/inputmethod/voice/LatinIMEWithVoice.java b/java/src/com/android/inputmethod/voice/LatinIMEWithVoice.java deleted file mode 100644 index ccbf5b6bc..000000000 --- a/java/src/com/android/inputmethod/voice/LatinIMEWithVoice.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2009 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.voice; - -import android.content.Intent; - -import com.android.inputmethod.latin.LatinIME; - -public class LatinIMEWithVoice extends LatinIME { - @Override - protected void launchSettings() { - launchSettings(LatinIMEWithVoiceSettings.class); - } -} diff --git a/java/src/com/android/inputmethod/voice/LatinIMEWithVoiceSettings.java b/java/src/com/android/inputmethod/voice/LatinIMEWithVoiceSettings.java deleted file mode 100644 index 13a58e14d..000000000 --- a/java/src/com/android/inputmethod/voice/LatinIMEWithVoiceSettings.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2009 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.voice; - -import com.android.inputmethod.latin.LatinIMESettings; - -public class LatinIMEWithVoiceSettings extends LatinIMESettings {} diff --git a/java/src/com/android/inputmethod/voice/VoiceInput.java b/java/src/com/android/inputmethod/voice/VoiceInput.java index ac06ab50d..354670969 100644 --- a/java/src/com/android/inputmethod/voice/VoiceInput.java +++ b/java/src/com/android/inputmethod/voice/VoiceInput.java @@ -63,7 +63,7 @@ public class VoiceInput implements OnClickListener { // WARNING! Before enabling this, fix the problem with calling getExtractedText() in // landscape view. It causes Extracted text updates to be rejected due to a token mismatch - public static boolean ENABLE_WORD_CORRECTIONS = false; + public static boolean ENABLE_WORD_CORRECTIONS = true; // Dummy word suggestion which means "delete current word" public static final String DELETE_SYMBOL = " \u00D7 "; // times symbol @@ -308,7 +308,7 @@ public class VoiceInput implements OnClickListener { SettingsUtil.getSettingsInt( mContext.getContentResolver(), SettingsUtil.LATIN_IME_MAX_VOICE_RESULTS, - 1)); + 10)); // Get endpointer params from Gservices. // TODO: Consider caching these values for improved performance on slower devices. diff --git a/java/src/com/google/android/voicesearch/LatinIMEWithVoice.java b/java/src/com/google/android/voicesearch/LatinIMEWithVoice.java deleted file mode 100644 index 8a339d14a..000000000 --- a/java/src/com/google/android/voicesearch/LatinIMEWithVoice.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * - * Copyright (C) 2009 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -* License for the specific language governing permissions and limitations under -* the License. -*/ - -package com.google.android.voicesearch; - -import android.content.Intent; - -import com.android.inputmethod.latin.LatinIME; - -public class LatinIMEWithVoice extends LatinIME { - @Override - protected void launchSettings() { - launchSettings(LatinIMEWithVoiceSettings.class); - } -} diff --git a/java/src/com/google/android/voicesearch/LatinIMEWithVoiceSettings.java b/java/src/com/google/android/voicesearch/LatinIMEWithVoiceSettings.java deleted file mode 100644 index a53cebfd9..000000000 --- a/java/src/com/google/android/voicesearch/LatinIMEWithVoiceSettings.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.google.android.voicesearch; - -import com.android.inputmethod.latin.LatinIMESettings; - -public class LatinIMEWithVoiceSettings extends LatinIMESettings {} diff --git a/native/Android.mk b/native/Android.mk index 12b6964b9..6bad9d638 100644 --- a/native/Android.mk +++ b/native/Android.mk @@ -5,19 +5,16 @@ LOCAL_C_INCLUDES += $(LOCAL_PATH)/src LOCAL_SRC_FILES := \ jni/com_android_inputmethod_latin_BinaryDictionary.cpp \ - src/dictionary.cpp - -LOCAL_C_INCLUDES += \ - external/icu4c/common \ - $(JNI_H_INCLUDE) - -LOCAL_LDLIBS := -lm - -LOCAL_SHARED_LIBRARIES := \ - libandroid_runtime \ - libcutils \ - libutils \ - libicuuc + src/dictionary.cpp \ + src/char_utils.cpp + +# NDK does not support sim build. +ifneq ($(TARGET_SIMULATOR),true) + LOCAL_NDK_VERSION := 4 + LOCAL_SDK_VERSION := 8 +else + LOCAL_C_INCLUDES += $(JNI_H_INCLUDE) +endif LOCAL_MODULE := libjni_latinime diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index d068f3faf..4fe80da69 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -15,31 +15,18 @@ ** limitations under the License. */ -#define LOG_TAG "BinaryDictionary" -#include "utils/Log.h" - #include <stdio.h> #include <assert.h> #include <unistd.h> #include <fcntl.h> -#include <nativehelper/jni.h> -#include "utils/AssetManager.h" -#include "utils/Asset.h" - +#include <jni.h> #include "dictionary.h" // ---------------------------------------------------------------------------- using namespace latinime; -using namespace android; - -static jfieldID sDescriptorField; -static jfieldID sAssetManagerNativeField; -static jmethodID sAddWordMethod; -static jfieldID sDictLength; - // // helper function to throw an exception // @@ -54,35 +41,15 @@ static void throwException(JNIEnv *env, const char* ex, const char* fmt, int dat } static jint latinime_BinaryDictionary_open - (JNIEnv *env, jobject object, jobject assetManager, jstring resourceString, + (JNIEnv *env, jobject object, jobject dictDirectBuffer, jint typedLetterMultiplier, jint fullWordMultiplier) { - // Get the native file descriptor from the FileDescriptor object - AssetManager *am = (AssetManager*) env->GetIntField(assetManager, sAssetManagerNativeField); - if (!am) { - LOGE("DICT: Couldn't get AssetManager native peer\n"); - return 0; - } - const char *resourcePath = env->GetStringUTFChars(resourceString, NULL); - - Asset *dictAsset = am->openNonAsset(resourcePath, Asset::ACCESS_BUFFER); - if (dictAsset == NULL) { - LOGE("DICT: Couldn't get asset %s\n", resourcePath); - env->ReleaseStringUTFChars(resourceString, resourcePath); - return 0; - } - - void *dict = (void*) dictAsset->getBuffer(false); + void *dict = env->GetDirectBufferAddress(dictDirectBuffer); if (dict == NULL) { - LOGE("DICT: Dictionary buffer is null\n"); - env->ReleaseStringUTFChars(resourceString, resourcePath); + fprintf(stderr, "DICT: Dictionary buffer is null\n"); return 0; } Dictionary *dictionary = new Dictionary(dict, typedLetterMultiplier, fullWordMultiplier); - dictionary->setAsset(dictAsset); - env->SetIntField(object, sDictLength, (jint) dictAsset->getLength()); - - env->ReleaseStringUTFChars(resourceString, resourcePath); return (jint) dictionary; } @@ -92,8 +59,7 @@ static int latinime_BinaryDictionary_getSuggestions( jint maxAlternatives, jint skipPos, jintArray nextLettersArray, jint nextLettersSize) { Dictionary *dictionary = (Dictionary*) dict; - if (dictionary == NULL) - return 0; + if (dictionary == NULL) return 0; int *frequencies = env->GetIntArrayElements(frequencyArray, NULL); int *inputCodes = env->GetIntArrayElements(inputArray, NULL); @@ -114,6 +80,28 @@ static int latinime_BinaryDictionary_getSuggestions( return count; } +static int latinime_BinaryDictionary_getBigrams + (JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength, + jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) +{ + Dictionary *dictionary = (Dictionary*) dict; + if (dictionary == NULL) return 0; + + jchar *word = env->GetCharArrayElements(wordArray, NULL); + jchar *outputChars = env->GetCharArrayElements(outputArray, NULL); + int *frequencies = env->GetIntArrayElements(frequencyArray, NULL); + + int count = dictionary->getBigrams((unsigned short*) word, wordLength, + (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams); + + env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT); + env->ReleaseCharArrayElements(outputArray, outputChars, 0); + env->ReleaseIntArrayElements(frequencyArray, frequencies, 0); + + return count; +} + + static jboolean latinime_BinaryDictionary_isValidWord (JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength) { @@ -131,18 +119,18 @@ static void latinime_BinaryDictionary_close (JNIEnv *env, jobject object, jint dict) { Dictionary *dictionary = (Dictionary*) dict; - ((Asset*) dictionary->getAsset())->close(); delete (Dictionary*) dict; } // ---------------------------------------------------------------------------- static JNINativeMethod gMethods[] = { - {"openNative", "(Landroid/content/res/AssetManager;Ljava/lang/String;II)I", + {"openNative", "(Ljava/nio/ByteBuffer;II)I", (void*)latinime_BinaryDictionary_open}, {"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(I[II[C[IIIII[II)I", (void*)latinime_BinaryDictionary_getSuggestions}, - {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord} + {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, + {"getBigramsNative", "(I[CI[C[III)I", (void*)latinime_BinaryDictionary_getBigrams} }; static int registerNativeMethods(JNIEnv* env, const char* className, @@ -167,30 +155,6 @@ static int registerNativeMethods(JNIEnv* env, const char* className, static int registerNatives(JNIEnv *env) { const char* const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary"; - jclass clazz; - - clazz = env->FindClass("java/io/FileDescriptor"); - if (clazz == NULL) { - LOGE("Can't find %s", "java/io/FileDescriptor"); - return -1; - } - sDescriptorField = env->GetFieldID(clazz, "descriptor", "I"); - - clazz = env->FindClass("android/content/res/AssetManager"); - if (clazz == NULL) { - LOGE("Can't find %s", "java/io/FileDescriptor"); - return -1; - } - sAssetManagerNativeField = env->GetFieldID(clazz, "mObject", "I"); - - // Get the field pointer for the dictionary length - clazz = env->FindClass(kClassPathName); - if (clazz == NULL) { - LOGE("Can't find %s", kClassPathName); - return -1; - } - sDictLength = env->GetFieldID(clazz, "mDictLength", "I"); - return registerNativeMethods(env, kClassPathName, gMethods, sizeof(gMethods) / sizeof(gMethods[0])); } diff --git a/native/src/char_utils.cpp b/native/src/char_utils.cpp new file mode 100644 index 000000000..a31a0632c --- /dev/null +++ b/native/src/char_utils.cpp @@ -0,0 +1,899 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> + +namespace latinime { + +struct LatinCapitalSmallPair { + unsigned short capital; + unsigned short small; +}; + +// Generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt +// +// 1. Run the following code. Bascially taken from +// Dictionary::toLowerCase(unsigned short c) in dictionary.cpp. +// Then, get the list of chars where cc != ccc. +// +// unsigned short c, cc, ccc, ccc2; +// for (c = 0; c < 0xFFFF ; c++) { +// if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { +// cc = BASE_CHARS[c]; +// } else { +// cc = c; +// } +// +// // tolower +// int isBase = 0; +// if (cc >='A' && cc <= 'Z') { +// ccc = (cc | 0x20); +// ccc2 = ccc; +// isBase = 1; +// } else if (cc > 0x7F) { +// ccc = u_tolower(cc); +// ccc2 = latin_tolower(cc); +// } else { +// ccc = cc; +// ccc2 = ccc; +// } +// if (!isBase && cc != ccc) { +// wprintf(L" 0x%04X => 0x%04X => 0x%04X %lc => %lc => %lc \n", +// c, cc, ccc, c, cc, ccc); +// //assert(ccc == ccc2); +// } +// } +// +// Initially, started with an empty latin_tolower() as below. +// +// unsigned short latin_tolower(unsigned short c) { +// return c; +// } +// +// +// 2. Process the list obtained by 1 by the following perl script and apply +// 'sort -u' as well. Get the SORTED_CHAR_MAP[]. +// Note that '$1' in the perl script is 'cc' in the above C code. +// +// while(<>) { +// / 0x\w* => 0x(\w*) =/; +// open(HDL, "grep -iw ^" . $1 . " UnicodeData.txt | "); +// $line = <HDL>; +// chomp $line; +// @cols = split(/;/, $line); +// print " { 0x$1, 0x$cols[13] }, // $cols[1]\n"; +// } +// +// +// 3. Update the latin_tolower() function above with SORTED_CHAR_MAP. Enable +// the assert(ccc == ccc2) above and confirm the function exits successfully. +// +static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = { + { 0x00C4, 0x00E4 }, // LATIN CAPITAL LETTER A WITH DIAERESIS + { 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE + { 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE + { 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH + { 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE + { 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS + { 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE + { 0x00DC, 0x00FC }, // LATIN CAPITAL LETTER U WITH DIAERESIS + { 0x00DE, 0x00FE }, // LATIN CAPITAL LETTER THORN + { 0x0110, 0x0111 }, // LATIN CAPITAL LETTER D WITH STROKE + { 0x0126, 0x0127 }, // LATIN CAPITAL LETTER H WITH STROKE + { 0x0141, 0x0142 }, // LATIN CAPITAL LETTER L WITH STROKE + { 0x014A, 0x014B }, // LATIN CAPITAL LETTER ENG + { 0x0152, 0x0153 }, // LATIN CAPITAL LIGATURE OE + { 0x0166, 0x0167 }, // LATIN CAPITAL LETTER T WITH STROKE + { 0x0181, 0x0253 }, // LATIN CAPITAL LETTER B WITH HOOK + { 0x0182, 0x0183 }, // LATIN CAPITAL LETTER B WITH TOPBAR + { 0x0184, 0x0185 }, // LATIN CAPITAL LETTER TONE SIX + { 0x0186, 0x0254 }, // LATIN CAPITAL LETTER OPEN O + { 0x0187, 0x0188 }, // LATIN CAPITAL LETTER C WITH HOOK + { 0x0189, 0x0256 }, // LATIN CAPITAL LETTER AFRICAN D + { 0x018A, 0x0257 }, // LATIN CAPITAL LETTER D WITH HOOK + { 0x018B, 0x018C }, // LATIN CAPITAL LETTER D WITH TOPBAR + { 0x018E, 0x01DD }, // LATIN CAPITAL LETTER REVERSED E + { 0x018F, 0x0259 }, // LATIN CAPITAL LETTER SCHWA + { 0x0190, 0x025B }, // LATIN CAPITAL LETTER OPEN E + { 0x0191, 0x0192 }, // LATIN CAPITAL LETTER F WITH HOOK + { 0x0193, 0x0260 }, // LATIN CAPITAL LETTER G WITH HOOK + { 0x0194, 0x0263 }, // LATIN CAPITAL LETTER GAMMA + { 0x0196, 0x0269 }, // LATIN CAPITAL LETTER IOTA + { 0x0197, 0x0268 }, // LATIN CAPITAL LETTER I WITH STROKE + { 0x0198, 0x0199 }, // LATIN CAPITAL LETTER K WITH HOOK + { 0x019C, 0x026F }, // LATIN CAPITAL LETTER TURNED M + { 0x019D, 0x0272 }, // LATIN CAPITAL LETTER N WITH LEFT HOOK + { 0x019F, 0x0275 }, // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + { 0x01A2, 0x01A3 }, // LATIN CAPITAL LETTER OI + { 0x01A4, 0x01A5 }, // LATIN CAPITAL LETTER P WITH HOOK + { 0x01A6, 0x0280 }, // LATIN LETTER YR + { 0x01A7, 0x01A8 }, // LATIN CAPITAL LETTER TONE TWO + { 0x01A9, 0x0283 }, // LATIN CAPITAL LETTER ESH + { 0x01AC, 0x01AD }, // LATIN CAPITAL LETTER T WITH HOOK + { 0x01AE, 0x0288 }, // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + { 0x01B1, 0x028A }, // LATIN CAPITAL LETTER UPSILON + { 0x01B2, 0x028B }, // LATIN CAPITAL LETTER V WITH HOOK + { 0x01B3, 0x01B4 }, // LATIN CAPITAL LETTER Y WITH HOOK + { 0x01B5, 0x01B6 }, // LATIN CAPITAL LETTER Z WITH STROKE + { 0x01B7, 0x0292 }, // LATIN CAPITAL LETTER EZH + { 0x01B8, 0x01B9 }, // LATIN CAPITAL LETTER EZH REVERSED + { 0x01BC, 0x01BD }, // LATIN CAPITAL LETTER TONE FIVE + { 0x01E4, 0x01E5 }, // LATIN CAPITAL LETTER G WITH STROKE + { 0x01EA, 0x01EB }, // LATIN CAPITAL LETTER O WITH OGONEK + { 0x01F6, 0x0195 }, // LATIN CAPITAL LETTER HWAIR + { 0x01F7, 0x01BF }, // LATIN CAPITAL LETTER WYNN + { 0x021C, 0x021D }, // LATIN CAPITAL LETTER YOGH + { 0x0220, 0x019E }, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + { 0x0222, 0x0223 }, // LATIN CAPITAL LETTER OU + { 0x0224, 0x0225 }, // LATIN CAPITAL LETTER Z WITH HOOK + { 0x0226, 0x0227 }, // LATIN CAPITAL LETTER A WITH DOT ABOVE + { 0x022E, 0x022F }, // LATIN CAPITAL LETTER O WITH DOT ABOVE + { 0x023A, 0x2C65 }, // LATIN CAPITAL LETTER A WITH STROKE + { 0x023B, 0x023C }, // LATIN CAPITAL LETTER C WITH STROKE + { 0x023D, 0x019A }, // LATIN CAPITAL LETTER L WITH BAR + { 0x023E, 0x2C66 }, // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + { 0x0241, 0x0242 }, // LATIN CAPITAL LETTER GLOTTAL STOP + { 0x0243, 0x0180 }, // LATIN CAPITAL LETTER B WITH STROKE + { 0x0244, 0x0289 }, // LATIN CAPITAL LETTER U BAR + { 0x0245, 0x028C }, // LATIN CAPITAL LETTER TURNED V + { 0x0246, 0x0247 }, // LATIN CAPITAL LETTER E WITH STROKE + { 0x0248, 0x0249 }, // LATIN CAPITAL LETTER J WITH STROKE + { 0x024A, 0x024B }, // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + { 0x024C, 0x024D }, // LATIN CAPITAL LETTER R WITH STROKE + { 0x024E, 0x024F }, // LATIN CAPITAL LETTER Y WITH STROKE + { 0x0370, 0x0371 }, // GREEK CAPITAL LETTER HETA + { 0x0372, 0x0373 }, // GREEK CAPITAL LETTER ARCHAIC SAMPI + { 0x0376, 0x0377 }, // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + { 0x0391, 0x03B1 }, // GREEK CAPITAL LETTER ALPHA + { 0x0392, 0x03B2 }, // GREEK CAPITAL LETTER BETA + { 0x0393, 0x03B3 }, // GREEK CAPITAL LETTER GAMMA + { 0x0394, 0x03B4 }, // GREEK CAPITAL LETTER DELTA + { 0x0395, 0x03B5 }, // GREEK CAPITAL LETTER EPSILON + { 0x0396, 0x03B6 }, // GREEK CAPITAL LETTER ZETA + { 0x0397, 0x03B7 }, // GREEK CAPITAL LETTER ETA + { 0x0398, 0x03B8 }, // GREEK CAPITAL LETTER THETA + { 0x0399, 0x03B9 }, // GREEK CAPITAL LETTER IOTA + { 0x039A, 0x03BA }, // GREEK CAPITAL LETTER KAPPA + { 0x039B, 0x03BB }, // GREEK CAPITAL LETTER LAMDA + { 0x039C, 0x03BC }, // GREEK CAPITAL LETTER MU + { 0x039D, 0x03BD }, // GREEK CAPITAL LETTER NU + { 0x039E, 0x03BE }, // GREEK CAPITAL LETTER XI + { 0x039F, 0x03BF }, // GREEK CAPITAL LETTER OMICRON + { 0x03A0, 0x03C0 }, // GREEK CAPITAL LETTER PI + { 0x03A1, 0x03C1 }, // GREEK CAPITAL LETTER RHO + { 0x03A3, 0x03C3 }, // GREEK CAPITAL LETTER SIGMA + { 0x03A4, 0x03C4 }, // GREEK CAPITAL LETTER TAU + { 0x03A5, 0x03C5 }, // GREEK CAPITAL LETTER UPSILON + { 0x03A6, 0x03C6 }, // GREEK CAPITAL LETTER PHI + { 0x03A7, 0x03C7 }, // GREEK CAPITAL LETTER CHI + { 0x03A8, 0x03C8 }, // GREEK CAPITAL LETTER PSI + { 0x03A9, 0x03C9 }, // GREEK CAPITAL LETTER OMEGA + { 0x03CF, 0x03D7 }, // GREEK CAPITAL KAI SYMBOL + { 0x03D8, 0x03D9 }, // GREEK LETTER ARCHAIC KOPPA + { 0x03DA, 0x03DB }, // GREEK LETTER STIGMA + { 0x03DC, 0x03DD }, // GREEK LETTER DIGAMMA + { 0x03DE, 0x03DF }, // GREEK LETTER KOPPA + { 0x03E0, 0x03E1 }, // GREEK LETTER SAMPI + { 0x03E2, 0x03E3 }, // COPTIC CAPITAL LETTER SHEI + { 0x03E4, 0x03E5 }, // COPTIC CAPITAL LETTER FEI + { 0x03E6, 0x03E7 }, // COPTIC CAPITAL LETTER KHEI + { 0x03E8, 0x03E9 }, // COPTIC CAPITAL LETTER HORI + { 0x03EA, 0x03EB }, // COPTIC CAPITAL LETTER GANGIA + { 0x03EC, 0x03ED }, // COPTIC CAPITAL LETTER SHIMA + { 0x03EE, 0x03EF }, // COPTIC CAPITAL LETTER DEI + { 0x03F7, 0x03F8 }, // GREEK CAPITAL LETTER SHO + { 0x03FA, 0x03FB }, // GREEK CAPITAL LETTER SAN + { 0x03FD, 0x037B }, // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + { 0x03FE, 0x037C }, // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL + { 0x03FF, 0x037D }, // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL + { 0x0402, 0x0452 }, // CYRILLIC CAPITAL LETTER DJE + { 0x0404, 0x0454 }, // CYRILLIC CAPITAL LETTER UKRAINIAN IE + { 0x0405, 0x0455 }, // CYRILLIC CAPITAL LETTER DZE + { 0x0406, 0x0456 }, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + { 0x0408, 0x0458 }, // CYRILLIC CAPITAL LETTER JE + { 0x0409, 0x0459 }, // CYRILLIC CAPITAL LETTER LJE + { 0x040A, 0x045A }, // CYRILLIC CAPITAL LETTER NJE + { 0x040B, 0x045B }, // CYRILLIC CAPITAL LETTER TSHE + { 0x040F, 0x045F }, // CYRILLIC CAPITAL LETTER DZHE + { 0x0410, 0x0430 }, // CYRILLIC CAPITAL LETTER A + { 0x0411, 0x0431 }, // CYRILLIC CAPITAL LETTER BE + { 0x0412, 0x0432 }, // CYRILLIC CAPITAL LETTER VE + { 0x0413, 0x0433 }, // CYRILLIC CAPITAL LETTER GHE + { 0x0414, 0x0434 }, // CYRILLIC CAPITAL LETTER DE + { 0x0415, 0x0435 }, // CYRILLIC CAPITAL LETTER IE + { 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE + { 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE + { 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I + { 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA + { 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL + { 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM + { 0x041D, 0x043D }, // CYRILLIC CAPITAL LETTER EN + { 0x041E, 0x043E }, // CYRILLIC CAPITAL LETTER O + { 0x041F, 0x043F }, // CYRILLIC CAPITAL LETTER PE + { 0x0420, 0x0440 }, // CYRILLIC CAPITAL LETTER ER + { 0x0421, 0x0441 }, // CYRILLIC CAPITAL LETTER ES + { 0x0422, 0x0442 }, // CYRILLIC CAPITAL LETTER TE + { 0x0423, 0x0443 }, // CYRILLIC CAPITAL LETTER U + { 0x0424, 0x0444 }, // CYRILLIC CAPITAL LETTER EF + { 0x0425, 0x0445 }, // CYRILLIC CAPITAL LETTER HA + { 0x0426, 0x0446 }, // CYRILLIC CAPITAL LETTER TSE + { 0x0427, 0x0447 }, // CYRILLIC CAPITAL LETTER CHE + { 0x0428, 0x0448 }, // CYRILLIC CAPITAL LETTER SHA + { 0x0429, 0x0449 }, // CYRILLIC CAPITAL LETTER SHCHA + { 0x042A, 0x044A }, // CYRILLIC CAPITAL LETTER HARD SIGN + { 0x042B, 0x044B }, // CYRILLIC CAPITAL LETTER YERU + { 0x042C, 0x044C }, // CYRILLIC CAPITAL LETTER SOFT SIGN + { 0x042D, 0x044D }, // CYRILLIC CAPITAL LETTER E + { 0x042E, 0x044E }, // CYRILLIC CAPITAL LETTER YU + { 0x042F, 0x044F }, // CYRILLIC CAPITAL LETTER YA + { 0x0460, 0x0461 }, // CYRILLIC CAPITAL LETTER OMEGA + { 0x0462, 0x0463 }, // CYRILLIC CAPITAL LETTER YAT + { 0x0464, 0x0465 }, // CYRILLIC CAPITAL LETTER IOTIFIED E + { 0x0466, 0x0467 }, // CYRILLIC CAPITAL LETTER LITTLE YUS + { 0x0468, 0x0469 }, // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS + { 0x046A, 0x046B }, // CYRILLIC CAPITAL LETTER BIG YUS + { 0x046C, 0x046D }, // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS + { 0x046E, 0x046F }, // CYRILLIC CAPITAL LETTER KSI + { 0x0470, 0x0471 }, // CYRILLIC CAPITAL LETTER PSI + { 0x0472, 0x0473 }, // CYRILLIC CAPITAL LETTER FITA + { 0x0474, 0x0475 }, // CYRILLIC CAPITAL LETTER IZHITSA + { 0x0478, 0x0479 }, // CYRILLIC CAPITAL LETTER UK + { 0x047A, 0x047B }, // CYRILLIC CAPITAL LETTER ROUND OMEGA + { 0x047C, 0x047D }, // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO + { 0x047E, 0x047F }, // CYRILLIC CAPITAL LETTER OT + { 0x0480, 0x0481 }, // CYRILLIC CAPITAL LETTER KOPPA + { 0x048A, 0x048B }, // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + { 0x048C, 0x048D }, // CYRILLIC CAPITAL LETTER SEMISOFT SIGN + { 0x048E, 0x048F }, // CYRILLIC CAPITAL LETTER ER WITH TICK + { 0x0490, 0x0491 }, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN + { 0x0492, 0x0493 }, // CYRILLIC CAPITAL LETTER GHE WITH STROKE + { 0x0494, 0x0495 }, // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK + { 0x0496, 0x0497 }, // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + { 0x0498, 0x0499 }, // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER + { 0x049A, 0x049B }, // CYRILLIC CAPITAL LETTER KA WITH DESCENDER + { 0x049C, 0x049D }, // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + { 0x049E, 0x049F }, // CYRILLIC CAPITAL LETTER KA WITH STROKE + { 0x04A0, 0x04A1 }, // CYRILLIC CAPITAL LETTER BASHKIR KA + { 0x04A2, 0x04A3 }, // CYRILLIC CAPITAL LETTER EN WITH DESCENDER + { 0x04A4, 0x04A5 }, // CYRILLIC CAPITAL LIGATURE EN GHE + { 0x04A6, 0x04A7 }, // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK + { 0x04A8, 0x04A9 }, // CYRILLIC CAPITAL LETTER ABKHASIAN HA + { 0x04AA, 0x04AB }, // CYRILLIC CAPITAL LETTER ES WITH DESCENDER + { 0x04AC, 0x04AD }, // CYRILLIC CAPITAL LETTER TE WITH DESCENDER + { 0x04AE, 0x04AF }, // CYRILLIC CAPITAL LETTER STRAIGHT U + { 0x04B0, 0x04B1 }, // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + { 0x04B2, 0x04B3 }, // CYRILLIC CAPITAL LETTER HA WITH DESCENDER + { 0x04B4, 0x04B5 }, // CYRILLIC CAPITAL LIGATURE TE TSE + { 0x04B6, 0x04B7 }, // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + { 0x04B8, 0x04B9 }, // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + { 0x04BA, 0x04BB }, // CYRILLIC CAPITAL LETTER SHHA + { 0x04BC, 0x04BD }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE + { 0x04BE, 0x04BF }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + { 0x04C0, 0x04CF }, // CYRILLIC LETTER PALOCHKA + { 0x04C3, 0x04C4 }, // CYRILLIC CAPITAL LETTER KA WITH HOOK + { 0x04C5, 0x04C6 }, // CYRILLIC CAPITAL LETTER EL WITH TAIL + { 0x04C7, 0x04C8 }, // CYRILLIC CAPITAL LETTER EN WITH HOOK + { 0x04C9, 0x04CA }, // CYRILLIC CAPITAL LETTER EN WITH TAIL + { 0x04CB, 0x04CC }, // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE + { 0x04CD, 0x04CE }, // CYRILLIC CAPITAL LETTER EM WITH TAIL + { 0x04D4, 0x04D5 }, // CYRILLIC CAPITAL LIGATURE A IE + { 0x04D8, 0x04D9 }, // CYRILLIC CAPITAL LETTER SCHWA + { 0x04E0, 0x04E1 }, // CYRILLIC CAPITAL LETTER ABKHASIAN DZE + { 0x04E8, 0x04E9 }, // CYRILLIC CAPITAL LETTER BARRED O + { 0x04F6, 0x04F7 }, // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER + { 0x04FA, 0x04FB }, // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK + { 0x04FC, 0x04FD }, // CYRILLIC CAPITAL LETTER HA WITH HOOK + { 0x04FE, 0x04FF }, // CYRILLIC CAPITAL LETTER HA WITH STROKE + { 0x0500, 0x0501 }, // CYRILLIC CAPITAL LETTER KOMI DE + { 0x0502, 0x0503 }, // CYRILLIC CAPITAL LETTER KOMI DJE + { 0x0504, 0x0505 }, // CYRILLIC CAPITAL LETTER KOMI ZJE + { 0x0506, 0x0507 }, // CYRILLIC CAPITAL LETTER KOMI DZJE + { 0x0508, 0x0509 }, // CYRILLIC CAPITAL LETTER KOMI LJE + { 0x050A, 0x050B }, // CYRILLIC CAPITAL LETTER KOMI NJE + { 0x050C, 0x050D }, // CYRILLIC CAPITAL LETTER KOMI SJE + { 0x050E, 0x050F }, // CYRILLIC CAPITAL LETTER KOMI TJE + { 0x0510, 0x0511 }, // CYRILLIC CAPITAL LETTER REVERSED ZE + { 0x0512, 0x0513 }, // CYRILLIC CAPITAL LETTER EL WITH HOOK + { 0x0514, 0x0515 }, // CYRILLIC CAPITAL LETTER LHA + { 0x0516, 0x0517 }, // CYRILLIC CAPITAL LETTER RHA + { 0x0518, 0x0519 }, // CYRILLIC CAPITAL LETTER YAE + { 0x051A, 0x051B }, // CYRILLIC CAPITAL LETTER QA + { 0x051C, 0x051D }, // CYRILLIC CAPITAL LETTER WE + { 0x051E, 0x051F }, // CYRILLIC CAPITAL LETTER ALEUT KA + { 0x0520, 0x0521 }, // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK + { 0x0522, 0x0523 }, // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK + { 0x0524, 0x0525 }, // CYRILLIC CAPITAL LETTER PE WITH DESCENDER + { 0x0531, 0x0561 }, // ARMENIAN CAPITAL LETTER AYB + { 0x0532, 0x0562 }, // ARMENIAN CAPITAL LETTER BEN + { 0x0533, 0x0563 }, // ARMENIAN CAPITAL LETTER GIM + { 0x0534, 0x0564 }, // ARMENIAN CAPITAL LETTER DA + { 0x0535, 0x0565 }, // ARMENIAN CAPITAL LETTER ECH + { 0x0536, 0x0566 }, // ARMENIAN CAPITAL LETTER ZA + { 0x0537, 0x0567 }, // ARMENIAN CAPITAL LETTER EH + { 0x0538, 0x0568 }, // ARMENIAN CAPITAL LETTER ET + { 0x0539, 0x0569 }, // ARMENIAN CAPITAL LETTER TO + { 0x053A, 0x056A }, // ARMENIAN CAPITAL LETTER ZHE + { 0x053B, 0x056B }, // ARMENIAN CAPITAL LETTER INI + { 0x053C, 0x056C }, // ARMENIAN CAPITAL LETTER LIWN + { 0x053D, 0x056D }, // ARMENIAN CAPITAL LETTER XEH + { 0x053E, 0x056E }, // ARMENIAN CAPITAL LETTER CA + { 0x053F, 0x056F }, // ARMENIAN CAPITAL LETTER KEN + { 0x0540, 0x0570 }, // ARMENIAN CAPITAL LETTER HO + { 0x0541, 0x0571 }, // ARMENIAN CAPITAL LETTER JA + { 0x0542, 0x0572 }, // ARMENIAN CAPITAL LETTER GHAD + { 0x0543, 0x0573 }, // ARMENIAN CAPITAL LETTER CHEH + { 0x0544, 0x0574 }, // ARMENIAN CAPITAL LETTER MEN + { 0x0545, 0x0575 }, // ARMENIAN CAPITAL LETTER YI + { 0x0546, 0x0576 }, // ARMENIAN CAPITAL LETTER NOW + { 0x0547, 0x0577 }, // ARMENIAN CAPITAL LETTER SHA + { 0x0548, 0x0578 }, // ARMENIAN CAPITAL LETTER VO + { 0x0549, 0x0579 }, // ARMENIAN CAPITAL LETTER CHA + { 0x054A, 0x057A }, // ARMENIAN CAPITAL LETTER PEH + { 0x054B, 0x057B }, // ARMENIAN CAPITAL LETTER JHEH + { 0x054C, 0x057C }, // ARMENIAN CAPITAL LETTER RA + { 0x054D, 0x057D }, // ARMENIAN CAPITAL LETTER SEH + { 0x054E, 0x057E }, // ARMENIAN CAPITAL LETTER VEW + { 0x054F, 0x057F }, // ARMENIAN CAPITAL LETTER TIWN + { 0x0550, 0x0580 }, // ARMENIAN CAPITAL LETTER REH + { 0x0551, 0x0581 }, // ARMENIAN CAPITAL LETTER CO + { 0x0552, 0x0582 }, // ARMENIAN CAPITAL LETTER YIWN + { 0x0553, 0x0583 }, // ARMENIAN CAPITAL LETTER PIWR + { 0x0554, 0x0584 }, // ARMENIAN CAPITAL LETTER KEH + { 0x0555, 0x0585 }, // ARMENIAN CAPITAL LETTER OH + { 0x0556, 0x0586 }, // ARMENIAN CAPITAL LETTER FEH + { 0x10A0, 0x2D00 }, // GEORGIAN CAPITAL LETTER AN + { 0x10A1, 0x2D01 }, // GEORGIAN CAPITAL LETTER BAN + { 0x10A2, 0x2D02 }, // GEORGIAN CAPITAL LETTER GAN + { 0x10A3, 0x2D03 }, // GEORGIAN CAPITAL LETTER DON + { 0x10A4, 0x2D04 }, // GEORGIAN CAPITAL LETTER EN + { 0x10A5, 0x2D05 }, // GEORGIAN CAPITAL LETTER VIN + { 0x10A6, 0x2D06 }, // GEORGIAN CAPITAL LETTER ZEN + { 0x10A7, 0x2D07 }, // GEORGIAN CAPITAL LETTER TAN + { 0x10A8, 0x2D08 }, // GEORGIAN CAPITAL LETTER IN + { 0x10A9, 0x2D09 }, // GEORGIAN CAPITAL LETTER KAN + { 0x10AA, 0x2D0A }, // GEORGIAN CAPITAL LETTER LAS + { 0x10AB, 0x2D0B }, // GEORGIAN CAPITAL LETTER MAN + { 0x10AC, 0x2D0C }, // GEORGIAN CAPITAL LETTER NAR + { 0x10AD, 0x2D0D }, // GEORGIAN CAPITAL LETTER ON + { 0x10AE, 0x2D0E }, // GEORGIAN CAPITAL LETTER PAR + { 0x10AF, 0x2D0F }, // GEORGIAN CAPITAL LETTER ZHAR + { 0x10B0, 0x2D10 }, // GEORGIAN CAPITAL LETTER RAE + { 0x10B1, 0x2D11 }, // GEORGIAN CAPITAL LETTER SAN + { 0x10B2, 0x2D12 }, // GEORGIAN CAPITAL LETTER TAR + { 0x10B3, 0x2D13 }, // GEORGIAN CAPITAL LETTER UN + { 0x10B4, 0x2D14 }, // GEORGIAN CAPITAL LETTER PHAR + { 0x10B5, 0x2D15 }, // GEORGIAN CAPITAL LETTER KHAR + { 0x10B6, 0x2D16 }, // GEORGIAN CAPITAL LETTER GHAN + { 0x10B7, 0x2D17 }, // GEORGIAN CAPITAL LETTER QAR + { 0x10B8, 0x2D18 }, // GEORGIAN CAPITAL LETTER SHIN + { 0x10B9, 0x2D19 }, // GEORGIAN CAPITAL LETTER CHIN + { 0x10BA, 0x2D1A }, // GEORGIAN CAPITAL LETTER CAN + { 0x10BB, 0x2D1B }, // GEORGIAN CAPITAL LETTER JIL + { 0x10BC, 0x2D1C }, // GEORGIAN CAPITAL LETTER CIL + { 0x10BD, 0x2D1D }, // GEORGIAN CAPITAL LETTER CHAR + { 0x10BE, 0x2D1E }, // GEORGIAN CAPITAL LETTER XAN + { 0x10BF, 0x2D1F }, // GEORGIAN CAPITAL LETTER JHAN + { 0x10C0, 0x2D20 }, // GEORGIAN CAPITAL LETTER HAE + { 0x10C1, 0x2D21 }, // GEORGIAN CAPITAL LETTER HE + { 0x10C2, 0x2D22 }, // GEORGIAN CAPITAL LETTER HIE + { 0x10C3, 0x2D23 }, // GEORGIAN CAPITAL LETTER WE + { 0x10C4, 0x2D24 }, // GEORGIAN CAPITAL LETTER HAR + { 0x10C5, 0x2D25 }, // GEORGIAN CAPITAL LETTER HOE + { 0x1E00, 0x1E01 }, // LATIN CAPITAL LETTER A WITH RING BELOW + { 0x1E02, 0x1E03 }, // LATIN CAPITAL LETTER B WITH DOT ABOVE + { 0x1E04, 0x1E05 }, // LATIN CAPITAL LETTER B WITH DOT BELOW + { 0x1E06, 0x1E07 }, // LATIN CAPITAL LETTER B WITH LINE BELOW + { 0x1E08, 0x1E09 }, // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + { 0x1E0A, 0x1E0B }, // LATIN CAPITAL LETTER D WITH DOT ABOVE + { 0x1E0C, 0x1E0D }, // LATIN CAPITAL LETTER D WITH DOT BELOW + { 0x1E0E, 0x1E0F }, // LATIN CAPITAL LETTER D WITH LINE BELOW + { 0x1E10, 0x1E11 }, // LATIN CAPITAL LETTER D WITH CEDILLA + { 0x1E12, 0x1E13 }, // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + { 0x1E14, 0x1E15 }, // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + { 0x1E16, 0x1E17 }, // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + { 0x1E18, 0x1E19 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + { 0x1E1A, 0x1E1B }, // LATIN CAPITAL LETTER E WITH TILDE BELOW + { 0x1E1C, 0x1E1D }, // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + { 0x1E1E, 0x1E1F }, // LATIN CAPITAL LETTER F WITH DOT ABOVE + { 0x1E20, 0x1E21 }, // LATIN CAPITAL LETTER G WITH MACRON + { 0x1E22, 0x1E23 }, // LATIN CAPITAL LETTER H WITH DOT ABOVE + { 0x1E24, 0x1E25 }, // LATIN CAPITAL LETTER H WITH DOT BELOW + { 0x1E26, 0x1E27 }, // LATIN CAPITAL LETTER H WITH DIAERESIS + { 0x1E28, 0x1E29 }, // LATIN CAPITAL LETTER H WITH CEDILLA + { 0x1E2A, 0x1E2B }, // LATIN CAPITAL LETTER H WITH BREVE BELOW + { 0x1E2C, 0x1E2D }, // LATIN CAPITAL LETTER I WITH TILDE BELOW + { 0x1E2E, 0x1E2F }, // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + { 0x1E30, 0x1E31 }, // LATIN CAPITAL LETTER K WITH ACUTE + { 0x1E32, 0x1E33 }, // LATIN CAPITAL LETTER K WITH DOT BELOW + { 0x1E34, 0x1E35 }, // LATIN CAPITAL LETTER K WITH LINE BELOW + { 0x1E36, 0x1E37 }, // LATIN CAPITAL LETTER L WITH DOT BELOW + { 0x1E38, 0x1E39 }, // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + { 0x1E3A, 0x1E3B }, // LATIN CAPITAL LETTER L WITH LINE BELOW + { 0x1E3C, 0x1E3D }, // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + { 0x1E3E, 0x1E3F }, // LATIN CAPITAL LETTER M WITH ACUTE + { 0x1E40, 0x1E41 }, // LATIN CAPITAL LETTER M WITH DOT ABOVE + { 0x1E42, 0x1E43 }, // LATIN CAPITAL LETTER M WITH DOT BELOW + { 0x1E44, 0x1E45 }, // LATIN CAPITAL LETTER N WITH DOT ABOVE + { 0x1E46, 0x1E47 }, // LATIN CAPITAL LETTER N WITH DOT BELOW + { 0x1E48, 0x1E49 }, // LATIN CAPITAL LETTER N WITH LINE BELOW + { 0x1E4A, 0x1E4B }, // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + { 0x1E4C, 0x1E4D }, // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + { 0x1E4E, 0x1E4F }, // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + { 0x1E50, 0x1E51 }, // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + { 0x1E52, 0x1E53 }, // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + { 0x1E54, 0x1E55 }, // LATIN CAPITAL LETTER P WITH ACUTE + { 0x1E56, 0x1E57 }, // LATIN CAPITAL LETTER P WITH DOT ABOVE + { 0x1E58, 0x1E59 }, // LATIN CAPITAL LETTER R WITH DOT ABOVE + { 0x1E5A, 0x1E5B }, // LATIN CAPITAL LETTER R WITH DOT BELOW + { 0x1E5C, 0x1E5D }, // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + { 0x1E5E, 0x1E5F }, // LATIN CAPITAL LETTER R WITH LINE BELOW + { 0x1E60, 0x1E61 }, // LATIN CAPITAL LETTER S WITH DOT ABOVE + { 0x1E62, 0x1E63 }, // LATIN CAPITAL LETTER S WITH DOT BELOW + { 0x1E64, 0x1E65 }, // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + { 0x1E66, 0x1E67 }, // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + { 0x1E68, 0x1E69 }, // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + { 0x1E6A, 0x1E6B }, // LATIN CAPITAL LETTER T WITH DOT ABOVE + { 0x1E6C, 0x1E6D }, // LATIN CAPITAL LETTER T WITH DOT BELOW + { 0x1E6E, 0x1E6F }, // LATIN CAPITAL LETTER T WITH LINE BELOW + { 0x1E70, 0x1E71 }, // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + { 0x1E72, 0x1E73 }, // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + { 0x1E74, 0x1E75 }, // LATIN CAPITAL LETTER U WITH TILDE BELOW + { 0x1E76, 0x1E77 }, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + { 0x1E78, 0x1E79 }, // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + { 0x1E7A, 0x1E7B }, // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + { 0x1E7C, 0x1E7D }, // LATIN CAPITAL LETTER V WITH TILDE + { 0x1E7E, 0x1E7F }, // LATIN CAPITAL LETTER V WITH DOT BELOW + { 0x1E80, 0x1E81 }, // LATIN CAPITAL LETTER W WITH GRAVE + { 0x1E82, 0x1E83 }, // LATIN CAPITAL LETTER W WITH ACUTE + { 0x1E84, 0x1E85 }, // LATIN CAPITAL LETTER W WITH DIAERESIS + { 0x1E86, 0x1E87 }, // LATIN CAPITAL LETTER W WITH DOT ABOVE + { 0x1E88, 0x1E89 }, // LATIN CAPITAL LETTER W WITH DOT BELOW + { 0x1E8A, 0x1E8B }, // LATIN CAPITAL LETTER X WITH DOT ABOVE + { 0x1E8C, 0x1E8D }, // LATIN CAPITAL LETTER X WITH DIAERESIS + { 0x1E8E, 0x1E8F }, // LATIN CAPITAL LETTER Y WITH DOT ABOVE + { 0x1E90, 0x1E91 }, // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + { 0x1E92, 0x1E93 }, // LATIN CAPITAL LETTER Z WITH DOT BELOW + { 0x1E94, 0x1E95 }, // LATIN CAPITAL LETTER Z WITH LINE BELOW + { 0x1E9E, 0x00DF }, // LATIN CAPITAL LETTER SHARP S + { 0x1EA0, 0x1EA1 }, // LATIN CAPITAL LETTER A WITH DOT BELOW + { 0x1EA2, 0x1EA3 }, // LATIN CAPITAL LETTER A WITH HOOK ABOVE + { 0x1EA4, 0x1EA5 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + { 0x1EA6, 0x1EA7 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + { 0x1EA8, 0x1EA9 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + { 0x1EAA, 0x1EAB }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + { 0x1EAC, 0x1EAD }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + { 0x1EAE, 0x1EAF }, // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + { 0x1EB0, 0x1EB1 }, // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + { 0x1EB2, 0x1EB3 }, // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + { 0x1EB4, 0x1EB5 }, // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + { 0x1EB6, 0x1EB7 }, // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + { 0x1EB8, 0x1EB9 }, // LATIN CAPITAL LETTER E WITH DOT BELOW + { 0x1EBA, 0x1EBB }, // LATIN CAPITAL LETTER E WITH HOOK ABOVE + { 0x1EBC, 0x1EBD }, // LATIN CAPITAL LETTER E WITH TILDE + { 0x1EBE, 0x1EBF }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + { 0x1EC0, 0x1EC1 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + { 0x1EC2, 0x1EC3 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + { 0x1EC4, 0x1EC5 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + { 0x1EC6, 0x1EC7 }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + { 0x1EC8, 0x1EC9 }, // LATIN CAPITAL LETTER I WITH HOOK ABOVE + { 0x1ECA, 0x1ECB }, // LATIN CAPITAL LETTER I WITH DOT BELOW + { 0x1ECC, 0x1ECD }, // LATIN CAPITAL LETTER O WITH DOT BELOW + { 0x1ECE, 0x1ECF }, // LATIN CAPITAL LETTER O WITH HOOK ABOVE + { 0x1ED0, 0x1ED1 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + { 0x1ED2, 0x1ED3 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + { 0x1ED4, 0x1ED5 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + { 0x1ED6, 0x1ED7 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + { 0x1ED8, 0x1ED9 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + { 0x1EDA, 0x1EDB }, // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + { 0x1EDC, 0x1EDD }, // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + { 0x1EDE, 0x1EDF }, // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + { 0x1EE0, 0x1EE1 }, // LATIN CAPITAL LETTER O WITH HORN AND TILDE + { 0x1EE2, 0x1EE3 }, // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + { 0x1EE4, 0x1EE5 }, // LATIN CAPITAL LETTER U WITH DOT BELOW + { 0x1EE6, 0x1EE7 }, // LATIN CAPITAL LETTER U WITH HOOK ABOVE + { 0x1EE8, 0x1EE9 }, // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + { 0x1EEA, 0x1EEB }, // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + { 0x1EEC, 0x1EED }, // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + { 0x1EEE, 0x1EEF }, // LATIN CAPITAL LETTER U WITH HORN AND TILDE + { 0x1EF0, 0x1EF1 }, // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + { 0x1EF2, 0x1EF3 }, // LATIN CAPITAL LETTER Y WITH GRAVE + { 0x1EF4, 0x1EF5 }, // LATIN CAPITAL LETTER Y WITH DOT BELOW + { 0x1EF6, 0x1EF7 }, // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + { 0x1EF8, 0x1EF9 }, // LATIN CAPITAL LETTER Y WITH TILDE + { 0x1EFA, 0x1EFB }, // LATIN CAPITAL LETTER MIDDLE-WELSH LL + { 0x1EFC, 0x1EFD }, // LATIN CAPITAL LETTER MIDDLE-WELSH V + { 0x1EFE, 0x1EFF }, // LATIN CAPITAL LETTER Y WITH LOOP + { 0x1F08, 0x1F00 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI + { 0x1F09, 0x1F01 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA + { 0x1F0A, 0x1F02 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + { 0x1F0B, 0x1F03 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + { 0x1F0C, 0x1F04 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + { 0x1F0D, 0x1F05 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + { 0x1F0E, 0x1F06 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + { 0x1F0F, 0x1F07 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + { 0x1F18, 0x1F10 }, // GREEK CAPITAL LETTER EPSILON WITH PSILI + { 0x1F19, 0x1F11 }, // GREEK CAPITAL LETTER EPSILON WITH DASIA + { 0x1F1A, 0x1F12 }, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + { 0x1F1B, 0x1F13 }, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + { 0x1F1C, 0x1F14 }, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + { 0x1F1D, 0x1F15 }, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + { 0x1F28, 0x1F20 }, // GREEK CAPITAL LETTER ETA WITH PSILI + { 0x1F29, 0x1F21 }, // GREEK CAPITAL LETTER ETA WITH DASIA + { 0x1F2A, 0x1F22 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + { 0x1F2B, 0x1F23 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + { 0x1F2C, 0x1F24 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + { 0x1F2D, 0x1F25 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + { 0x1F2E, 0x1F26 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + { 0x1F2F, 0x1F27 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + { 0x1F38, 0x1F30 }, // GREEK CAPITAL LETTER IOTA WITH PSILI + { 0x1F39, 0x1F31 }, // GREEK CAPITAL LETTER IOTA WITH DASIA + { 0x1F3A, 0x1F32 }, // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + { 0x1F3B, 0x1F33 }, // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + { 0x1F3C, 0x1F34 }, // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + { 0x1F3D, 0x1F35 }, // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + { 0x1F3E, 0x1F36 }, // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + { 0x1F3F, 0x1F37 }, // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + { 0x1F48, 0x1F40 }, // GREEK CAPITAL LETTER OMICRON WITH PSILI + { 0x1F49, 0x1F41 }, // GREEK CAPITAL LETTER OMICRON WITH DASIA + { 0x1F4A, 0x1F42 }, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + { 0x1F4B, 0x1F43 }, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + { 0x1F4C, 0x1F44 }, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + { 0x1F4D, 0x1F45 }, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + { 0x1F59, 0x1F51 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA + { 0x1F5B, 0x1F53 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + { 0x1F5D, 0x1F55 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + { 0x1F5F, 0x1F57 }, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + { 0x1F68, 0x1F60 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI + { 0x1F69, 0x1F61 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA + { 0x1F6A, 0x1F62 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + { 0x1F6B, 0x1F63 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + { 0x1F6C, 0x1F64 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + { 0x1F6D, 0x1F65 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + { 0x1F6E, 0x1F66 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + { 0x1F6F, 0x1F67 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + { 0x1F88, 0x1F80 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + { 0x1F89, 0x1F81 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + { 0x1F8A, 0x1F82 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + { 0x1F8B, 0x1F83 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + { 0x1F8C, 0x1F84 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + { 0x1F8D, 0x1F85 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + { 0x1F8E, 0x1F86 }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + { 0x1F8F, 0x1F87 }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + { 0x1F98, 0x1F90 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + { 0x1F99, 0x1F91 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + { 0x1F9A, 0x1F92 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + { 0x1F9B, 0x1F93 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + { 0x1F9C, 0x1F94 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + { 0x1F9D, 0x1F95 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + { 0x1F9E, 0x1F96 }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + { 0x1F9F, 0x1F97 }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + { 0x1FA8, 0x1FA0 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + { 0x1FA9, 0x1FA1 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + { 0x1FAA, 0x1FA2 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + { 0x1FAB, 0x1FA3 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + { 0x1FAC, 0x1FA4 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + { 0x1FAD, 0x1FA5 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + { 0x1FAE, 0x1FA6 }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + { 0x1FAF, 0x1FA7 }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + { 0x1FB8, 0x1FB0 }, // GREEK CAPITAL LETTER ALPHA WITH VRACHY + { 0x1FB9, 0x1FB1 }, // GREEK CAPITAL LETTER ALPHA WITH MACRON + { 0x1FBA, 0x1F70 }, // GREEK CAPITAL LETTER ALPHA WITH VARIA + { 0x1FBB, 0x1F71 }, // GREEK CAPITAL LETTER ALPHA WITH OXIA + { 0x1FBC, 0x1FB3 }, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + { 0x1FC8, 0x1F72 }, // GREEK CAPITAL LETTER EPSILON WITH VARIA + { 0x1FC9, 0x1F73 }, // GREEK CAPITAL LETTER EPSILON WITH OXIA + { 0x1FCA, 0x1F74 }, // GREEK CAPITAL LETTER ETA WITH VARIA + { 0x1FCB, 0x1F75 }, // GREEK CAPITAL LETTER ETA WITH OXIA + { 0x1FCC, 0x1FC3 }, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + { 0x1FD8, 0x1FD0 }, // GREEK CAPITAL LETTER IOTA WITH VRACHY + { 0x1FD9, 0x1FD1 }, // GREEK CAPITAL LETTER IOTA WITH MACRON + { 0x1FDA, 0x1F76 }, // GREEK CAPITAL LETTER IOTA WITH VARIA + { 0x1FDB, 0x1F77 }, // GREEK CAPITAL LETTER IOTA WITH OXIA + { 0x1FE8, 0x1FE0 }, // GREEK CAPITAL LETTER UPSILON WITH VRACHY + { 0x1FE9, 0x1FE1 }, // GREEK CAPITAL LETTER UPSILON WITH MACRON + { 0x1FEA, 0x1F7A }, // GREEK CAPITAL LETTER UPSILON WITH VARIA + { 0x1FEB, 0x1F7B }, // GREEK CAPITAL LETTER UPSILON WITH OXIA + { 0x1FEC, 0x1FE5 }, // GREEK CAPITAL LETTER RHO WITH DASIA + { 0x1FF8, 0x1F78 }, // GREEK CAPITAL LETTER OMICRON WITH VARIA + { 0x1FF9, 0x1F79 }, // GREEK CAPITAL LETTER OMICRON WITH OXIA + { 0x1FFA, 0x1F7C }, // GREEK CAPITAL LETTER OMEGA WITH VARIA + { 0x1FFB, 0x1F7D }, // GREEK CAPITAL LETTER OMEGA WITH OXIA + { 0x1FFC, 0x1FF3 }, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + { 0x2126, 0x03C9 }, // OHM SIGN + { 0x212A, 0x006B }, // KELVIN SIGN + { 0x212B, 0x00E5 }, // ANGSTROM SIGN + { 0x2132, 0x214E }, // TURNED CAPITAL F + { 0x2160, 0x2170 }, // ROMAN NUMERAL ONE + { 0x2161, 0x2171 }, // ROMAN NUMERAL TWO + { 0x2162, 0x2172 }, // ROMAN NUMERAL THREE + { 0x2163, 0x2173 }, // ROMAN NUMERAL FOUR + { 0x2164, 0x2174 }, // ROMAN NUMERAL FIVE + { 0x2165, 0x2175 }, // ROMAN NUMERAL SIX + { 0x2166, 0x2176 }, // ROMAN NUMERAL SEVEN + { 0x2167, 0x2177 }, // ROMAN NUMERAL EIGHT + { 0x2168, 0x2178 }, // ROMAN NUMERAL NINE + { 0x2169, 0x2179 }, // ROMAN NUMERAL TEN + { 0x216A, 0x217A }, // ROMAN NUMERAL ELEVEN + { 0x216B, 0x217B }, // ROMAN NUMERAL TWELVE + { 0x216C, 0x217C }, // ROMAN NUMERAL FIFTY + { 0x216D, 0x217D }, // ROMAN NUMERAL ONE HUNDRED + { 0x216E, 0x217E }, // ROMAN NUMERAL FIVE HUNDRED + { 0x216F, 0x217F }, // ROMAN NUMERAL ONE THOUSAND + { 0x2183, 0x2184 }, // ROMAN NUMERAL REVERSED ONE HUNDRED + { 0x24B6, 0x24D0 }, // CIRCLED LATIN CAPITAL LETTER A + { 0x24B7, 0x24D1 }, // CIRCLED LATIN CAPITAL LETTER B + { 0x24B8, 0x24D2 }, // CIRCLED LATIN CAPITAL LETTER C + { 0x24B9, 0x24D3 }, // CIRCLED LATIN CAPITAL LETTER D + { 0x24BA, 0x24D4 }, // CIRCLED LATIN CAPITAL LETTER E + { 0x24BB, 0x24D5 }, // CIRCLED LATIN CAPITAL LETTER F + { 0x24BC, 0x24D6 }, // CIRCLED LATIN CAPITAL LETTER G + { 0x24BD, 0x24D7 }, // CIRCLED LATIN CAPITAL LETTER H + { 0x24BE, 0x24D8 }, // CIRCLED LATIN CAPITAL LETTER I + { 0x24BF, 0x24D9 }, // CIRCLED LATIN CAPITAL LETTER J + { 0x24C0, 0x24DA }, // CIRCLED LATIN CAPITAL LETTER K + { 0x24C1, 0x24DB }, // CIRCLED LATIN CAPITAL LETTER L + { 0x24C2, 0x24DC }, // CIRCLED LATIN CAPITAL LETTER M + { 0x24C3, 0x24DD }, // CIRCLED LATIN CAPITAL LETTER N + { 0x24C4, 0x24DE }, // CIRCLED LATIN CAPITAL LETTER O + { 0x24C5, 0x24DF }, // CIRCLED LATIN CAPITAL LETTER P + { 0x24C6, 0x24E0 }, // CIRCLED LATIN CAPITAL LETTER Q + { 0x24C7, 0x24E1 }, // CIRCLED LATIN CAPITAL LETTER R + { 0x24C8, 0x24E2 }, // CIRCLED LATIN CAPITAL LETTER S + { 0x24C9, 0x24E3 }, // CIRCLED LATIN CAPITAL LETTER T + { 0x24CA, 0x24E4 }, // CIRCLED LATIN CAPITAL LETTER U + { 0x24CB, 0x24E5 }, // CIRCLED LATIN CAPITAL LETTER V + { 0x24CC, 0x24E6 }, // CIRCLED LATIN CAPITAL LETTER W + { 0x24CD, 0x24E7 }, // CIRCLED LATIN CAPITAL LETTER X + { 0x24CE, 0x24E8 }, // CIRCLED LATIN CAPITAL LETTER Y + { 0x24CF, 0x24E9 }, // CIRCLED LATIN CAPITAL LETTER Z + { 0x2C00, 0x2C30 }, // GLAGOLITIC CAPITAL LETTER AZU + { 0x2C01, 0x2C31 }, // GLAGOLITIC CAPITAL LETTER BUKY + { 0x2C02, 0x2C32 }, // GLAGOLITIC CAPITAL LETTER VEDE + { 0x2C03, 0x2C33 }, // GLAGOLITIC CAPITAL LETTER GLAGOLI + { 0x2C04, 0x2C34 }, // GLAGOLITIC CAPITAL LETTER DOBRO + { 0x2C05, 0x2C35 }, // GLAGOLITIC CAPITAL LETTER YESTU + { 0x2C06, 0x2C36 }, // GLAGOLITIC CAPITAL LETTER ZHIVETE + { 0x2C07, 0x2C37 }, // GLAGOLITIC CAPITAL LETTER DZELO + { 0x2C08, 0x2C38 }, // GLAGOLITIC CAPITAL LETTER ZEMLJA + { 0x2C09, 0x2C39 }, // GLAGOLITIC CAPITAL LETTER IZHE + { 0x2C0A, 0x2C3A }, // GLAGOLITIC CAPITAL LETTER INITIAL IZHE + { 0x2C0B, 0x2C3B }, // GLAGOLITIC CAPITAL LETTER I + { 0x2C0C, 0x2C3C }, // GLAGOLITIC CAPITAL LETTER DJERVI + { 0x2C0D, 0x2C3D }, // GLAGOLITIC CAPITAL LETTER KAKO + { 0x2C0E, 0x2C3E }, // GLAGOLITIC CAPITAL LETTER LJUDIJE + { 0x2C0F, 0x2C3F }, // GLAGOLITIC CAPITAL LETTER MYSLITE + { 0x2C10, 0x2C40 }, // GLAGOLITIC CAPITAL LETTER NASHI + { 0x2C11, 0x2C41 }, // GLAGOLITIC CAPITAL LETTER ONU + { 0x2C12, 0x2C42 }, // GLAGOLITIC CAPITAL LETTER POKOJI + { 0x2C13, 0x2C43 }, // GLAGOLITIC CAPITAL LETTER RITSI + { 0x2C14, 0x2C44 }, // GLAGOLITIC CAPITAL LETTER SLOVO + { 0x2C15, 0x2C45 }, // GLAGOLITIC CAPITAL LETTER TVRIDO + { 0x2C16, 0x2C46 }, // GLAGOLITIC CAPITAL LETTER UKU + { 0x2C17, 0x2C47 }, // GLAGOLITIC CAPITAL LETTER FRITU + { 0x2C18, 0x2C48 }, // GLAGOLITIC CAPITAL LETTER HERU + { 0x2C19, 0x2C49 }, // GLAGOLITIC CAPITAL LETTER OTU + { 0x2C1A, 0x2C4A }, // GLAGOLITIC CAPITAL LETTER PE + { 0x2C1B, 0x2C4B }, // GLAGOLITIC CAPITAL LETTER SHTA + { 0x2C1C, 0x2C4C }, // GLAGOLITIC CAPITAL LETTER TSI + { 0x2C1D, 0x2C4D }, // GLAGOLITIC CAPITAL LETTER CHRIVI + { 0x2C1E, 0x2C4E }, // GLAGOLITIC CAPITAL LETTER SHA + { 0x2C1F, 0x2C4F }, // GLAGOLITIC CAPITAL LETTER YERU + { 0x2C20, 0x2C50 }, // GLAGOLITIC CAPITAL LETTER YERI + { 0x2C21, 0x2C51 }, // GLAGOLITIC CAPITAL LETTER YATI + { 0x2C22, 0x2C52 }, // GLAGOLITIC CAPITAL LETTER SPIDERY HA + { 0x2C23, 0x2C53 }, // GLAGOLITIC CAPITAL LETTER YU + { 0x2C24, 0x2C54 }, // GLAGOLITIC CAPITAL LETTER SMALL YUS + { 0x2C25, 0x2C55 }, // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + { 0x2C26, 0x2C56 }, // GLAGOLITIC CAPITAL LETTER YO + { 0x2C27, 0x2C57 }, // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + { 0x2C28, 0x2C58 }, // GLAGOLITIC CAPITAL LETTER BIG YUS + { 0x2C29, 0x2C59 }, // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + { 0x2C2A, 0x2C5A }, // GLAGOLITIC CAPITAL LETTER FITA + { 0x2C2B, 0x2C5B }, // GLAGOLITIC CAPITAL LETTER IZHITSA + { 0x2C2C, 0x2C5C }, // GLAGOLITIC CAPITAL LETTER SHTAPIC + { 0x2C2D, 0x2C5D }, // GLAGOLITIC CAPITAL LETTER TROKUTASTI A + { 0x2C2E, 0x2C5E }, // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE + { 0x2C60, 0x2C61 }, // LATIN CAPITAL LETTER L WITH DOUBLE BAR + { 0x2C62, 0x026B }, // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + { 0x2C63, 0x1D7D }, // LATIN CAPITAL LETTER P WITH STROKE + { 0x2C64, 0x027D }, // LATIN CAPITAL LETTER R WITH TAIL + { 0x2C67, 0x2C68 }, // LATIN CAPITAL LETTER H WITH DESCENDER + { 0x2C69, 0x2C6A }, // LATIN CAPITAL LETTER K WITH DESCENDER + { 0x2C6B, 0x2C6C }, // LATIN CAPITAL LETTER Z WITH DESCENDER + { 0x2C6D, 0x0251 }, // LATIN CAPITAL LETTER ALPHA + { 0x2C6E, 0x0271 }, // LATIN CAPITAL LETTER M WITH HOOK + { 0x2C6F, 0x0250 }, // LATIN CAPITAL LETTER TURNED A + { 0x2C70, 0x0252 }, // LATIN CAPITAL LETTER TURNED ALPHA + { 0x2C72, 0x2C73 }, // LATIN CAPITAL LETTER W WITH HOOK + { 0x2C75, 0x2C76 }, // LATIN CAPITAL LETTER HALF H + { 0x2C7E, 0x023F }, // LATIN CAPITAL LETTER S WITH SWASH TAIL + { 0x2C7F, 0x0240 }, // LATIN CAPITAL LETTER Z WITH SWASH TAIL + { 0x2C80, 0x2C81 }, // COPTIC CAPITAL LETTER ALFA + { 0x2C82, 0x2C83 }, // COPTIC CAPITAL LETTER VIDA + { 0x2C84, 0x2C85 }, // COPTIC CAPITAL LETTER GAMMA + { 0x2C86, 0x2C87 }, // COPTIC CAPITAL LETTER DALDA + { 0x2C88, 0x2C89 }, // COPTIC CAPITAL LETTER EIE + { 0x2C8A, 0x2C8B }, // COPTIC CAPITAL LETTER SOU + { 0x2C8C, 0x2C8D }, // COPTIC CAPITAL LETTER ZATA + { 0x2C8E, 0x2C8F }, // COPTIC CAPITAL LETTER HATE + { 0x2C90, 0x2C91 }, // COPTIC CAPITAL LETTER THETHE + { 0x2C92, 0x2C93 }, // COPTIC CAPITAL LETTER IAUDA + { 0x2C94, 0x2C95 }, // COPTIC CAPITAL LETTER KAPA + { 0x2C96, 0x2C97 }, // COPTIC CAPITAL LETTER LAULA + { 0x2C98, 0x2C99 }, // COPTIC CAPITAL LETTER MI + { 0x2C9A, 0x2C9B }, // COPTIC CAPITAL LETTER NI + { 0x2C9C, 0x2C9D }, // COPTIC CAPITAL LETTER KSI + { 0x2C9E, 0x2C9F }, // COPTIC CAPITAL LETTER O + { 0x2CA0, 0x2CA1 }, // COPTIC CAPITAL LETTER PI + { 0x2CA2, 0x2CA3 }, // COPTIC CAPITAL LETTER RO + { 0x2CA4, 0x2CA5 }, // COPTIC CAPITAL LETTER SIMA + { 0x2CA6, 0x2CA7 }, // COPTIC CAPITAL LETTER TAU + { 0x2CA8, 0x2CA9 }, // COPTIC CAPITAL LETTER UA + { 0x2CAA, 0x2CAB }, // COPTIC CAPITAL LETTER FI + { 0x2CAC, 0x2CAD }, // COPTIC CAPITAL LETTER KHI + { 0x2CAE, 0x2CAF }, // COPTIC CAPITAL LETTER PSI + { 0x2CB0, 0x2CB1 }, // COPTIC CAPITAL LETTER OOU + { 0x2CB2, 0x2CB3 }, // COPTIC CAPITAL LETTER DIALECT-P ALEF + { 0x2CB4, 0x2CB5 }, // COPTIC CAPITAL LETTER OLD COPTIC AIN + { 0x2CB6, 0x2CB7 }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE + { 0x2CB8, 0x2CB9 }, // COPTIC CAPITAL LETTER DIALECT-P KAPA + { 0x2CBA, 0x2CBB }, // COPTIC CAPITAL LETTER DIALECT-P NI + { 0x2CBC, 0x2CBD }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI + { 0x2CBE, 0x2CBF }, // COPTIC CAPITAL LETTER OLD COPTIC OOU + { 0x2CC0, 0x2CC1 }, // COPTIC CAPITAL LETTER SAMPI + { 0x2CC2, 0x2CC3 }, // COPTIC CAPITAL LETTER CROSSED SHEI + { 0x2CC4, 0x2CC5 }, // COPTIC CAPITAL LETTER OLD COPTIC SHEI + { 0x2CC6, 0x2CC7 }, // COPTIC CAPITAL LETTER OLD COPTIC ESH + { 0x2CC8, 0x2CC9 }, // COPTIC CAPITAL LETTER AKHMIMIC KHEI + { 0x2CCA, 0x2CCB }, // COPTIC CAPITAL LETTER DIALECT-P HORI + { 0x2CCC, 0x2CCD }, // COPTIC CAPITAL LETTER OLD COPTIC HORI + { 0x2CCE, 0x2CCF }, // COPTIC CAPITAL LETTER OLD COPTIC HA + { 0x2CD0, 0x2CD1 }, // COPTIC CAPITAL LETTER L-SHAPED HA + { 0x2CD2, 0x2CD3 }, // COPTIC CAPITAL LETTER OLD COPTIC HEI + { 0x2CD4, 0x2CD5 }, // COPTIC CAPITAL LETTER OLD COPTIC HAT + { 0x2CD6, 0x2CD7 }, // COPTIC CAPITAL LETTER OLD COPTIC GANGIA + { 0x2CD8, 0x2CD9 }, // COPTIC CAPITAL LETTER OLD COPTIC DJA + { 0x2CDA, 0x2CDB }, // COPTIC CAPITAL LETTER OLD COPTIC SHIMA + { 0x2CDC, 0x2CDD }, // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA + { 0x2CDE, 0x2CDF }, // COPTIC CAPITAL LETTER OLD NUBIAN NGI + { 0x2CE0, 0x2CE1 }, // COPTIC CAPITAL LETTER OLD NUBIAN NYI + { 0x2CE2, 0x2CE3 }, // COPTIC CAPITAL LETTER OLD NUBIAN WAU + { 0x2CEB, 0x2CEC }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + { 0x2CED, 0x2CEE }, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA + { 0xA640, 0xA641 }, // CYRILLIC CAPITAL LETTER ZEMLYA + { 0xA642, 0xA643 }, // CYRILLIC CAPITAL LETTER DZELO + { 0xA644, 0xA645 }, // CYRILLIC CAPITAL LETTER REVERSED DZE + { 0xA646, 0xA647 }, // CYRILLIC CAPITAL LETTER IOTA + { 0xA648, 0xA649 }, // CYRILLIC CAPITAL LETTER DJERV + { 0xA64A, 0xA64B }, // CYRILLIC CAPITAL LETTER MONOGRAPH UK + { 0xA64C, 0xA64D }, // CYRILLIC CAPITAL LETTER BROAD OMEGA + { 0xA64E, 0xA64F }, // CYRILLIC CAPITAL LETTER NEUTRAL YER + { 0xA650, 0xA651 }, // CYRILLIC CAPITAL LETTER YERU WITH BACK YER + { 0xA652, 0xA653 }, // CYRILLIC CAPITAL LETTER IOTIFIED YAT + { 0xA654, 0xA655 }, // CYRILLIC CAPITAL LETTER REVERSED YU + { 0xA656, 0xA657 }, // CYRILLIC CAPITAL LETTER IOTIFIED A + { 0xA658, 0xA659 }, // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS + { 0xA65A, 0xA65B }, // CYRILLIC CAPITAL LETTER BLENDED YUS + { 0xA65C, 0xA65D }, // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS + { 0xA65E, 0xA65F }, // CYRILLIC CAPITAL LETTER YN + { 0xA662, 0xA663 }, // CYRILLIC CAPITAL LETTER SOFT DE + { 0xA664, 0xA665 }, // CYRILLIC CAPITAL LETTER SOFT EL + { 0xA666, 0xA667 }, // CYRILLIC CAPITAL LETTER SOFT EM + { 0xA668, 0xA669 }, // CYRILLIC CAPITAL LETTER MONOCULAR O + { 0xA66A, 0xA66B }, // CYRILLIC CAPITAL LETTER BINOCULAR O + { 0xA66C, 0xA66D }, // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O + { 0xA680, 0xA681 }, // CYRILLIC CAPITAL LETTER DWE + { 0xA682, 0xA683 }, // CYRILLIC CAPITAL LETTER DZWE + { 0xA684, 0xA685 }, // CYRILLIC CAPITAL LETTER ZHWE + { 0xA686, 0xA687 }, // CYRILLIC CAPITAL LETTER CCHE + { 0xA688, 0xA689 }, // CYRILLIC CAPITAL LETTER DZZE + { 0xA68A, 0xA68B }, // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK + { 0xA68C, 0xA68D }, // CYRILLIC CAPITAL LETTER TWE + { 0xA68E, 0xA68F }, // CYRILLIC CAPITAL LETTER TSWE + { 0xA690, 0xA691 }, // CYRILLIC CAPITAL LETTER TSSE + { 0xA692, 0xA693 }, // CYRILLIC CAPITAL LETTER TCHE + { 0xA694, 0xA695 }, // CYRILLIC CAPITAL LETTER HWE + { 0xA696, 0xA697 }, // CYRILLIC CAPITAL LETTER SHWE + { 0xA722, 0xA723 }, // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + { 0xA724, 0xA725 }, // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN + { 0xA726, 0xA727 }, // LATIN CAPITAL LETTER HENG + { 0xA728, 0xA729 }, // LATIN CAPITAL LETTER TZ + { 0xA72A, 0xA72B }, // LATIN CAPITAL LETTER TRESILLO + { 0xA72C, 0xA72D }, // LATIN CAPITAL LETTER CUATRILLO + { 0xA72E, 0xA72F }, // LATIN CAPITAL LETTER CUATRILLO WITH COMMA + { 0xA732, 0xA733 }, // LATIN CAPITAL LETTER AA + { 0xA734, 0xA735 }, // LATIN CAPITAL LETTER AO + { 0xA736, 0xA737 }, // LATIN CAPITAL LETTER AU + { 0xA738, 0xA739 }, // LATIN CAPITAL LETTER AV + { 0xA73A, 0xA73B }, // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + { 0xA73C, 0xA73D }, // LATIN CAPITAL LETTER AY + { 0xA73E, 0xA73F }, // LATIN CAPITAL LETTER REVERSED C WITH DOT + { 0xA740, 0xA741 }, // LATIN CAPITAL LETTER K WITH STROKE + { 0xA742, 0xA743 }, // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + { 0xA744, 0xA745 }, // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + { 0xA746, 0xA747 }, // LATIN CAPITAL LETTER BROKEN L + { 0xA748, 0xA749 }, // LATIN CAPITAL LETTER L WITH HIGH STROKE + { 0xA74A, 0xA74B }, // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + { 0xA74C, 0xA74D }, // LATIN CAPITAL LETTER O WITH LOOP + { 0xA74E, 0xA74F }, // LATIN CAPITAL LETTER OO + { 0xA750, 0xA751 }, // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + { 0xA752, 0xA753 }, // LATIN CAPITAL LETTER P WITH FLOURISH + { 0xA754, 0xA755 }, // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + { 0xA756, 0xA757 }, // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + { 0xA758, 0xA759 }, // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + { 0xA75A, 0xA75B }, // LATIN CAPITAL LETTER R ROTUNDA + { 0xA75C, 0xA75D }, // LATIN CAPITAL LETTER RUM ROTUNDA + { 0xA75E, 0xA75F }, // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + { 0xA760, 0xA761 }, // LATIN CAPITAL LETTER VY + { 0xA762, 0xA763 }, // LATIN CAPITAL LETTER VISIGOTHIC Z + { 0xA764, 0xA765 }, // LATIN CAPITAL LETTER THORN WITH STROKE + { 0xA766, 0xA767 }, // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + { 0xA768, 0xA769 }, // LATIN CAPITAL LETTER VEND + { 0xA76A, 0xA76B }, // LATIN CAPITAL LETTER ET + { 0xA76C, 0xA76D }, // LATIN CAPITAL LETTER IS + { 0xA76E, 0xA76F }, // LATIN CAPITAL LETTER CON + { 0xA779, 0xA77A }, // LATIN CAPITAL LETTER INSULAR D + { 0xA77B, 0xA77C }, // LATIN CAPITAL LETTER INSULAR F + { 0xA77D, 0x1D79 }, // LATIN CAPITAL LETTER INSULAR G + { 0xA77E, 0xA77F }, // LATIN CAPITAL LETTER TURNED INSULAR G + { 0xA780, 0xA781 }, // LATIN CAPITAL LETTER TURNED L + { 0xA782, 0xA783 }, // LATIN CAPITAL LETTER INSULAR R + { 0xA784, 0xA785 }, // LATIN CAPITAL LETTER INSULAR S + { 0xA786, 0xA787 }, // LATIN CAPITAL LETTER INSULAR T + { 0xA78B, 0xA78C }, // LATIN CAPITAL LETTER SALTILLO + { 0xFF21, 0xFF41 }, // FULLWIDTH LATIN CAPITAL LETTER A + { 0xFF22, 0xFF42 }, // FULLWIDTH LATIN CAPITAL LETTER B + { 0xFF23, 0xFF43 }, // FULLWIDTH LATIN CAPITAL LETTER C + { 0xFF24, 0xFF44 }, // FULLWIDTH LATIN CAPITAL LETTER D + { 0xFF25, 0xFF45 }, // FULLWIDTH LATIN CAPITAL LETTER E + { 0xFF26, 0xFF46 }, // FULLWIDTH LATIN CAPITAL LETTER F + { 0xFF27, 0xFF47 }, // FULLWIDTH LATIN CAPITAL LETTER G + { 0xFF28, 0xFF48 }, // FULLWIDTH LATIN CAPITAL LETTER H + { 0xFF29, 0xFF49 }, // FULLWIDTH LATIN CAPITAL LETTER I + { 0xFF2A, 0xFF4A }, // FULLWIDTH LATIN CAPITAL LETTER J + { 0xFF2B, 0xFF4B }, // FULLWIDTH LATIN CAPITAL LETTER K + { 0xFF2C, 0xFF4C }, // FULLWIDTH LATIN CAPITAL LETTER L + { 0xFF2D, 0xFF4D }, // FULLWIDTH LATIN CAPITAL LETTER M + { 0xFF2E, 0xFF4E }, // FULLWIDTH LATIN CAPITAL LETTER N + { 0xFF2F, 0xFF4F }, // FULLWIDTH LATIN CAPITAL LETTER O + { 0xFF30, 0xFF50 }, // FULLWIDTH LATIN CAPITAL LETTER P + { 0xFF31, 0xFF51 }, // FULLWIDTH LATIN CAPITAL LETTER Q + { 0xFF32, 0xFF52 }, // FULLWIDTH LATIN CAPITAL LETTER R + { 0xFF33, 0xFF53 }, // FULLWIDTH LATIN CAPITAL LETTER S + { 0xFF34, 0xFF54 }, // FULLWIDTH LATIN CAPITAL LETTER T + { 0xFF35, 0xFF55 }, // FULLWIDTH LATIN CAPITAL LETTER U + { 0xFF36, 0xFF56 }, // FULLWIDTH LATIN CAPITAL LETTER V + { 0xFF37, 0xFF57 }, // FULLWIDTH LATIN CAPITAL LETTER W + { 0xFF38, 0xFF58 }, // FULLWIDTH LATIN CAPITAL LETTER X + { 0xFF39, 0xFF59 }, // FULLWIDTH LATIN CAPITAL LETTER Y + { 0xFF3A, 0xFF5A } // FULLWIDTH LATIN CAPITAL LETTER Z +}; + +static int compare_pair_capital(const void *a, const void *b) { + return (int)(*(unsigned short *)a) + - (int)((struct LatinCapitalSmallPair*)b)->capital; +} + +unsigned short latin_tolower(unsigned short c) { + struct LatinCapitalSmallPair *p = + (struct LatinCapitalSmallPair *)bsearch(&c, SORTED_CHAR_MAP, + sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]), + sizeof(SORTED_CHAR_MAP[0]), + compare_pair_capital); + return p ? p->small : c; +} + +} // namespace latinime diff --git a/native/src/char_utils.h b/native/src/char_utils.h new file mode 100644 index 000000000..921ecb4a5 --- /dev/null +++ b/native/src/char_utils.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_CHAR_UTILS_H +#define LATINIME_CHAR_UTILS_H + +namespace latinime { + +unsigned short latin_tolower(unsigned short c); + +}; // namespace latinime + +#endif // LATINIME_CHAR_UTILS_H diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index 6e6f44182..a1a632faa 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -19,21 +19,18 @@ #include <fcntl.h> #include <sys/mman.h> #include <string.h> -#include <cutils/log.h> - -#include <unicode/uchar.h> - -//#define USE_ASSET_MANAGER - -#ifdef USE_ASSET_MANAGER -#include <utils/AssetManager.h> -#include <utils/Asset.h> -#endif +//#define LOG_TAG "dictionary.cpp" +//#include <cutils/log.h> +#define LOGI #include "dictionary.h" #include "basechars.h" +#include "char_utils.h" #define DEBUG_DICT 0 +#define DICTIONARY_VERSION_MIN 200 +#define DICTIONARY_HEADER_SIZE 2 +#define NOT_VALID_WORD -99 namespace latinime { @@ -42,6 +39,7 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip mDict = (unsigned char*) dict; mTypedLetterMultiplier = typedLetterMultiplier; mFullWordMultiplier = fullWordMultiplier; + getVersionNumber(); } Dictionary::~Dictionary() @@ -65,7 +63,11 @@ int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWor mNextLettersFrequencies = nextLetters; mNextLettersSize = nextLettersSize; - getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0); + if (checkIfDictVersionIsLatest()) { + getWordsRec(DICTIONARY_HEADER_SIZE, 0, mInputLength * 3, false, 1, 0, 0); + } else { + getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0); + } // Get the word count suggWords = 0; @@ -92,6 +94,21 @@ Dictionary::registerNextLetter(unsigned short c) } } +void +Dictionary::getVersionNumber() +{ + mVersion = (mDict[0] & 0xFF); + mBigram = (mDict[1] & 0xFF); + LOGI("IN NATIVE SUGGEST Version: %d Bigram : %d \n", mVersion, mBigram); +} + +// Checks whether it has the latest dictionary or the old dictionary +bool +Dictionary::checkIfDictVersionIsLatest() +{ + return (mVersion >= DICTIONARY_VERSION_MIN) && (mBigram == 1 || mBigram == 0); +} + unsigned short Dictionary::getChar(int *pos) { @@ -120,6 +137,28 @@ Dictionary::getAddress(int *pos) } int +Dictionary::getFreq(int *pos) +{ + int freq = mDict[(*pos)++] & 0xFF; + + if (checkIfDictVersionIsLatest()) { + // skipping bigram + int bigramExist = (mDict[*pos] & FLAG_BIGRAM_READ); + if (bigramExist > 0) { + int nextBigramExist = 1; + while (nextBigramExist > 0) { + (*pos) += 3; + nextBigramExist = (mDict[(*pos)++] & FLAG_BIGRAM_CONTINUED); + } + } else { + (*pos)++; + } + } + + return freq; +} + +int Dictionary::wideStrLen(unsigned short *str) { if (!str) return 0; @@ -168,6 +207,46 @@ Dictionary::addWord(unsigned short *word, int length, int frequency) return false; } +bool +Dictionary::addWordBigram(unsigned short *word, int length, int frequency) +{ + word[length] = 0; + if (DEBUG_DICT) { + char s[length + 1]; + for (int i = 0; i <= length; i++) s[i] = word[i]; + LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency); + } + + // Find the right insertion point + int insertAt = 0; + while (insertAt < mMaxBigrams) { + if (frequency > mBigramFreq[insertAt] + || (mBigramFreq[insertAt] == frequency + && length < wideStrLen(mBigramChars + insertAt * mMaxWordLength))) { + break; + } + insertAt++; + } + LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams); + if (insertAt < mMaxBigrams) { + memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]), + (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]), + (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0])); + mBigramFreq[insertAt] = frequency; + memmove((char*) mBigramChars + (insertAt + 1) * mMaxWordLength * sizeof(short), + (char*) mBigramChars + (insertAt ) * mMaxWordLength * sizeof(short), + (mMaxBigrams - insertAt - 1) * sizeof(short) * mMaxWordLength); + unsigned short *dest = mBigramChars + (insertAt ) * mMaxWordLength; + while (length--) { + *dest++ = *word++; + } + *dest = 0; // NULL terminate + if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt); + return true; + } + return false; +} + unsigned short Dictionary::toLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { @@ -176,7 +255,7 @@ Dictionary::toLowerCase(unsigned short c) { if (c >='A' && c <= 'Z') { c |= 32; } else if (c > 127) { - c = u_tolower(c); + c = latin_tolower(c); } return c; } @@ -220,12 +299,17 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s } for (int i = 0; i < count; i++) { + // -- at char unsigned short c = getChar(&pos); + // -- at flag/add unsigned short lowerC = toLowerCase(c); bool terminal = getTerminal(&pos); int childrenAddress = getAddress(&pos); + // -- after address or flag int freq = 1; if (terminal) freq = getFreq(&pos); + // -- after add or freq + // If we are only doing completions, no need to look at the typed characters. if (completion) { mWord[depth] = c; @@ -239,7 +323,7 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s getWordsRec(childrenAddress, depth + 1, maxDepth, completion, snr, inputIndex, diffs); } - } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) { + } else if ((c == QUOTE && currentChars[0] != QUOTE) || mSkipPos == depth) { // Skip the ' or other letter and continue deeper mWord[depth] = c; if (childrenAddress != 0) { @@ -277,14 +361,185 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s } } +int +Dictionary::getBigramAddress(int *pos, bool advance) +{ + int address = 0; + + address += (mDict[*pos] & 0x3F) << 16; + address += (mDict[*pos + 1] & 0xFF) << 8; + address += (mDict[*pos + 2] & 0xFF); + + if (advance) { + *pos += 3; + } + + return address; +} + +int +Dictionary::getBigramFreq(int *pos) +{ + int freq = mDict[(*pos)++] & FLAG_BIGRAM_FREQ; + + return freq; +} + + +int +Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, unsigned short *bigramChars, + int *bigramFreq, int maxWordLength, int maxBigrams) +{ + mBigramFreq = bigramFreq; + mBigramChars = bigramChars; + mMaxWordLength = maxWordLength; + mMaxBigrams = maxBigrams; + + if (mBigram == 1 && checkIfDictVersionIsLatest()) { + int pos = isValidWordRec(DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); + LOGI("Pos -> %d\n", pos); + if (pos < 0) { + return 0; + } + + int bigramCount = 0; + int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ); + if (bigramExist > 0) { + int nextBigramExist = 1; + while (nextBigramExist > 0) { + int bigramAddress = getBigramAddress(&pos, true); + int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]); + // search for all bigrams and store them + searchForTerminalNode(bigramAddress, frequency); + nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED); + bigramCount++; + } + } + + return bigramCount; + } + return 0; +} + +void +Dictionary::searchForTerminalNode(int addressLookingFor, int frequency) +{ + // track word with such address and store it in an array + unsigned short word[mMaxWordLength]; + + int pos; + int followDownBranchAddress = DICTIONARY_HEADER_SIZE; + bool found = false; + char followingChar = ' '; + int depth = -1; + + while(!found) { + bool followDownAddressSearchStop = false; + bool firstAddress = true; + bool haveToSearchAll = true; + + if (depth >= 0) { + word[depth] = (unsigned short) followingChar; + } + pos = followDownBranchAddress; // pos start at count + int count = mDict[pos] & 0xFF; + LOGI("count - %d\n",count); + pos++; + for (int i = 0; i < count; i++) { + // pos at data + pos++; + // pos now at flag + if (!getFirstBitOfByte(&pos)) { // non-terminal + if (!followDownAddressSearchStop) { + int addr = getBigramAddress(&pos, false); + if (addr > addressLookingFor) { + followDownAddressSearchStop = true; + if (firstAddress) { + firstAddress = false; + haveToSearchAll = true; + } else if (!haveToSearchAll) { + break; + } + } else { + followDownBranchAddress = addr; + followingChar = (char)(0xFF & mDict[pos-1]); + if (firstAddress) { + firstAddress = false; + haveToSearchAll = false; + } + } + } + pos += 3; + } else if (getFirstBitOfByte(&pos)) { // terminal + if (addressLookingFor == (pos-1)) { // found !! + depth++; + word[depth] = (0xFF & mDict[pos-1]); + found = true; + break; + } + if (getSecondBitOfByte(&pos)) { // address + freq (4 byte) + if (!followDownAddressSearchStop) { + int addr = getBigramAddress(&pos, false); + if (addr > addressLookingFor) { + followDownAddressSearchStop = true; + if (firstAddress) { + firstAddress = false; + haveToSearchAll = true; + } else if (!haveToSearchAll) { + break; + } + } else { + followDownBranchAddress = addr; + followingChar = (char)(0xFF & mDict[pos-1]); + if (firstAddress) { + firstAddress = false; + haveToSearchAll = true; + } + } + } + pos += 4; + } else { // freq only (2 byte) + pos += 2; + } + + // skipping bigram + int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ); + if (bigramExist > 0) { + int nextBigramExist = 1; + while (nextBigramExist > 0) { + pos += 3; + nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED); + } + } else { + pos++; + } + } + } + depth++; + if (followDownBranchAddress == 0) { + LOGI("ERROR!!! Cannot find bigram!!"); + break; + } + } + + addWordBigram(word, depth, frequency); +} + bool Dictionary::isValidWord(unsigned short *word, int length) { - return isValidWordRec(0, word, 0, length); + if (checkIfDictVersionIsLatest()) { + return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); + } else { + return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); + } } -bool +int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { + // returns address of bigram data of that word + // return -99 if not found + int count = getCount(&pos); unsigned short currentChar = (unsigned short) word[offset]; for (int j = 0; j < count; j++) { @@ -294,12 +549,13 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length if (c == currentChar) { if (offset == length - 1) { if (terminal) { - return true; + return (pos+1); } } else { if (childPos != 0) { - if (isValidWordRec(childPos, word, offset + 1, length)) { - return true; + int t = isValidWordRec(childPos, word, offset + 1, length); + if (t > 0) { + return t; } } } @@ -310,7 +566,7 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length // There could be two instances of each alphabet - upper and lower case. So continue // looking ... } - return false; + return NOT_VALID_WORD; } diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 3749f3d88..2c574290f 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -28,12 +28,19 @@ namespace latinime { // if the word has other endings. #define FLAG_TERMINAL_MASK 0x80 +#define FLAG_BIGRAM_READ 0x80 +#define FLAG_BIGRAM_CHILDEXIST 0x40 +#define FLAG_BIGRAM_CONTINUED 0x80 +#define FLAG_BIGRAM_FREQ 0x7F + class Dictionary { public: Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier); int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxWords, int maxAlternatives, int skipPos, int *nextLetters, int nextLettersSize); + int getBigrams(unsigned short *word, int length, unsigned short *outWords, int *frequencies, + int maxWordLength, int maxBigrams); bool isValidWord(unsigned short *word, int length); void setAsset(void *asset) { mAsset = asset; } void *getAsset() { return mAsset; } @@ -41,28 +48,40 @@ public: private: + void getVersionNumber(); + bool checkIfDictVersionIsLatest(); int getAddress(int *pos); + int getBigramAddress(int *pos, bool advance); + int getFreq(int *pos); + int getBigramFreq(int *pos); + void searchForTerminalNode(int address, int frequency); + + bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; } + bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; } bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; } - int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; } int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; } unsigned short getChar(int *pos); int wideStrLen(unsigned short *str); bool sameAsTyped(unsigned short *word, int length); bool addWord(unsigned short *word, int length, int frequency); + bool addWordBigram(unsigned short *word, int length, int frequency); unsigned short toLowerCase(unsigned short c); void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency, int inputIndex, int diffs); - bool isValidWordRec(int pos, unsigned short *word, int offset, int length); + int isValidWordRec(int pos, unsigned short *word, int offset, int length); void registerNextLetter(unsigned short c); unsigned char *mDict; void *mAsset; int *mFrequencies; + int *mBigramFreq; int mMaxWords; + int mMaxBigrams; int mMaxWordLength; unsigned short *mOutputChars; + unsigned short *mBigramChars; int *mInputCodes; int mInputLength; int mMaxAlternatives; @@ -74,6 +93,8 @@ private: int mTypedLetterMultiplier; int *mNextLettersFrequencies; int mNextLettersSize; + int mVersion; + int mBigram; }; // ---------------------------------------------------------------------------- diff --git a/tests/Android.mk b/tests/Android.mk new file mode 100644 index 000000000..fba7a8d74 --- /dev/null +++ b/tests/Android.mk @@ -0,0 +1,17 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +# We only want this apk build for tests. +LOCAL_MODULE_TAGS := tests +LOCAL_CERTIFICATE := shared + +LOCAL_JAVA_LIBRARIES := android.test.runner + +# Include all test java files. +LOCAL_SRC_FILES := $(call all-java-files-under, src) + +LOCAL_PACKAGE_NAME := LatinIMETests + +LOCAL_INSTRUMENTATION_FOR := LatinIME + +include $(BUILD_PACKAGE) diff --git a/tests/AndroidManifest.xml b/tests/AndroidManifest.xml new file mode 100644 index 000000000..210e81489 --- /dev/null +++ b/tests/AndroidManifest.xml @@ -0,0 +1,33 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- Copyright (C) 2010 The Android Open Source Project + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<manifest xmlns:android="http://schemas.android.com/apk/res/android" + package="com.android.inputmethod.latin.tests"> + + <uses-permission android:name="android.permission.READ_CONTACTS" /> + + <application> + <uses-library android:name="android.test.runner" /> + <!-- meta-data android:name="com.android.contacts.iconset" android:resource="@xml/iconset" /--> + <uses-permission android:name="android.permission.READ_CONTACTS" /> + + </application> + + <instrumentation android:name="android.test.InstrumentationTestRunner" + android:targetPackage="com.android.inputmethod.latin" + android:label="LatinIME tests"> + </instrumentation> +</manifest> diff --git a/tests/data/bigramlist.xml b/tests/data/bigramlist.xml new file mode 100644 index 000000000..dd3f2916e --- /dev/null +++ b/tests/data/bigramlist.xml @@ -0,0 +1,36 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<bigrams> + <bi w1="I'm" count="1"> + <w w2="about" p="100" /> + </bi> + <bi w1="about" count="3"> + <w w2="part" p="117" /> + <w w2="business" p="100" /> + <w w2="being" p="10" /> + </bi> + <bi w1="business" count="1"> + <w w2="people" p="100" /> + </bi> + <bi w1="from" count="1"> + <w w2="same" p="117" /> + </bi> +</bigrams> diff --git a/tests/data/wordlist.xml b/tests/data/wordlist.xml new file mode 100644 index 000000000..b870eb2a3 --- /dev/null +++ b/tests/data/wordlist.xml @@ -0,0 +1,244 @@ +<wordlist> + <w f="255">the</w> + <w f="246">and</w> + <w f="245">of</w> + <w f="242">to</w> + <w f="231">in</w> + <w f="230">that</w> + <w f="229">for</w> + <w f="224">with</w> + <w f="224">on</w> + <w f="224">it</w> + <w f="223">this</w> + <w f="222">you</w> + <w f="219">is</w> + <w f="219">was</w> + <w f="219">by</w> + <w f="219">or</w> + <w f="218">from</w> + <w f="217">but</w> + <w f="216">be</w> + <w f="216">Sunday</w> + <w f="215">are</w> + <w f="215">he</w> + <w f="214">so</w> + <w f="214">not</w> + <w f="213">have</w> + <w f="213">as</w> + <w f="211">all</w> + <w f="211">his</w> + <w f="210">my</w> + <w f="210">if</w> + <w f="210">which</w> + <w f="210">they</w> + <w f="209">at</w> + <w f="207">it's</w> + <w f="207">an</w> + <w f="207">your</w> + <w f="206">will</w> + <w f="206">about</w> + <w f="206">I'm</w> + <w f="205">there</w> + <w f="205">had</w> + <w f="205">has</w> + <w f="204">when</w> + <w f="203">no</w> + <w f="203">were</w> + <w f="203">what</w> + <w f="203">more</w> + <w f="203">out</w> + <w f="203">just</w> + <w f="202">their</w> + <w f="202">up</w> + <w f="202">would</w> + <w f="202">here</w> + <w f="202">can</w> + <w f="201">who</w> + <w f="200">her</w> + <w f="200">me</w> + <w f="200">now</w> + <w f="200">our</w> + <w f="200">do</w> + <w f="200">some</w> + <w f="199">been</w> + <w f="199">two</w> + <w f="199">like</w> + <w f="199">them</w> + <w f="199">new</w> + <w f="198">time</w> + <w f="198">we</w> + <w f="198">she</w> + <w f="197">one</w> + <w f="197">over</w> + <w f="197">may</w> + <w f="197">any</w> + <w f="197">him</w> + <w f="197">calling</w> + <w f="196">other</w> + <w f="196">how</w> + <w f="196">see</w> + <w f="195">because</w> + <w f="195">then</w> + <w f="195">right</w> + <w f="195">into</w> + <w f="195">well</w> + <w f="195">very</w> + <w f="195">said</w> + <w f="195">people</w> + <w f="194">these</w> + <w f="194">than</w> + <w f="193">only</w> + <w f="193">back</w> + <w f="193">first</w> + <w f="193">dot</w> + <w f="193">after</w> + <w f="193">where</w> + <w f="192">please</w> + <w f="192">could</w> + <w f="192">its</w> + <w f="192">before</w> + <w f="192">us</w> + <w f="192">again</w> + <w f="192">home</w> + <w f="191">also</w> + <w f="191">that's</w> + <w f="191">think</w> + <w f="191">three</w> + <w f="191">good</w> + <w f="191">get</w> + <w f="190">know</w> + <w f="190">thank</w> + <w f="190">should</w> + <w f="190">going</w> + <w f="190">down</w> + <w f="189">last</w> + <w f="189">today</w> + <w f="189">those</w> + <w f="189">go</w> + <w f="189">through</w> + <w f="189">such</w> + <w f="189">don't</w> + <w f="189">did</w> + <w f="188">most</w> + <w f="188">day</w> + <w f="188">man</w> + <w f="188">number</w> + <w f="188">work</w> + <w f="187">too</w> + <w f="187">show</w> + <w f="187">made</w> + <w f="187">even</w> + <w f="187">being</w> + <w f="187">make</w> + <w f="187">give</w> + <w f="186">off</w> + <w f="186">com</w> + <w f="186">much</w> + <w f="186">great</w> + <w f="186">take</w> + <w f="186">call</w> + <w f="186">way</w> + <w f="186">four</w> + <w f="186">say</w> + <w f="185">information</w> + <w f="185">under</w> + <w f="185">page</w> + <w f="185">many</w> + <w f="185">little</w> + <w f="185">thanks</w> + <w f="185">okay</w> + <w f="185">five</w> + <w f="185">we're</w> + <w f="185">between</w> + <w f="184">use</w> + <w f="184">come</w> + <w f="184">years</w> + <w f="184">office</w> + <w f="184">house</w> + <w f="184">search</w> + <w f="184">free</w> + <w f="183">next</w> + <w f="183">without</w> + <w f="183">still</w> + <w f="183">around</w> + <w f="183">I've</w> + <w f="183">business</w> + <w f="183">part</w> + <w f="183">every</w> + <w f="183">bye</w> + <w f="183">upon</w> + <w f="183">you're</w> + <w f="183">state</w> + <w f="183">life</w> + <w f="183">year</w> + <w f="182">thing</w> + <w f="182">since</w> + <w f="182">things</w> + <w f="182">something</w> + <w f="182">long</w> + <w f="182">got</w> + <w f="182">while</w> + <w f="182">I'll</w> + <w f="182">help</w> + <w f="182">service</w> + <w f="182">really</w> + <w f="182">must</w> + <w f="182">does</w> + <w f="182">name</w> + <w f="181">both</w> + <w f="181">six</w> + <w f="181">want</w> + <w f="181">same</w> + <w f="181">each</w> + <w f="181">yet</w> + <w f="181">let</w> + <w f="181">view</w> + <w f="181">place</w> + <w f="181">another</w> + <w f="181">company</w> + <w f="181">talk</w> + <w f="181">might</w> + <w f="181">am</w> + <w f="181">though</w> + <w f="181">find</w> + <w f="180">details</w> + <w f="180">look</w> + <w f="180">world</w> + <w f="180">old</w> + <w f="180">called</w> + <w f="180">case</w> + <w f="180">system</w> + <w f="180">news</w> + <w f="179">used</w> + <w f="179">contact</w> + <w f="179">never</w> + <w f="179">seven</w> + <w f="179">city</w> + <w f="179">until</w> + <w f="179">during</w> + <w f="179">set</w> + <w f="179">why</w> + <w f="179">point</w> + <w f="179">twenty</w> + <w f="179">high</w> + <w f="179">love</w> + <w f="179">services</w> + <w f="170">niño</w> + <w f="170">María</w> + <w f="70">car</w> + <w f="0">hmmm</w> + <w f="0">hon</w> + <w f="0">tty</w> + <w f="0">ttyl</w> + <w f="0">txt</w> + <w f="0">ur</w> + <w f="0">wah</w> + <w f="0">whatcha</w> + <w f="0">woah</w> + <w f="0">ya</w> + <w f="0">yea</w> + <w f="0">yeh</w> + <w f="0">yessir</w> + <w f="0">yikes</w> + <w f="0">yrs</w> +</wordlist> diff --git a/tests/res/raw/test.dict b/tests/res/raw/test.dict Binary files differnew file mode 100644 index 000000000..6a5d6d794 --- /dev/null +++ b/tests/res/raw/test.dict diff --git a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java b/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java new file mode 100644 index 000000000..59720640a --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java @@ -0,0 +1,307 @@ +package com.android.inputmethod.latin.tests; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.Channels; +import java.util.List; + +import android.content.Context; +import android.test.AndroidTestCase; +import android.text.TextUtils; +import android.util.Log; + +import com.android.inputmethod.latin.Suggest; +import com.android.inputmethod.latin.WordComposer; + +public class SuggestTests extends AndroidTestCase { + private static final String TAG = "SuggestTests"; + + private Suggest mSuggest; + + int[][] adjacents = { + {'a','s','w','q',-1}, + {'b','h','v','n','g','j',-1}, + {'c','v','f','x','g',}, + {'d','f','r','e','s','x',-1}, + {'e','w','r','s','d',-1}, + {'f','g','d','c','t','r',-1}, + {'g','h','f','y','t','v',-1}, + {'h','j','u','g','b','y',-1}, + {'i','o','u','k',-1}, + {'j','k','i','h','u','n',-1}, + {'k','l','o','j','i','m',-1}, + {'l','k','o','p',-1}, + {'m','k','n','l',-1}, + {'n','m','j','k','b',-1}, + {'o','p','i','l',-1}, + {'p','o',-1}, + {'q','w',-1}, + {'r','t','e','f',-1}, + {'s','d','e','w','a','z',-1}, + {'t','y','r',-1}, + {'u','y','i','h','j',-1}, + {'v','b','g','c','h',-1}, + {'w','e','q',-1}, + {'x','c','d','z','f',-1}, + {'y','u','t','h','g',-1}, + {'z','s','x','a','d',-1}, + }; + + @Override + protected void setUp() { + final Context context = getTestContext(); + InputStream is = context.getResources().openRawResource(R.raw.test); + Log.i(TAG, "Stream type is " + is); + try { + int avail = is.available(); + if (avail > 0) { + ByteBuffer byteBuffer = + ByteBuffer.allocateDirect(avail).order(ByteOrder.nativeOrder()); + int got = Channels.newChannel(is).read(byteBuffer); + if (got != avail) { + Log.e(TAG, "Read " + got + " bytes, expected " + avail); + } else { + mSuggest = new Suggest(context, byteBuffer); + Log.i(TAG, "Created mSuggest " + avail + " bytes"); + } + } + } catch (IOException ioe) { + Log.w(TAG, "No available size for binary dictionary"); + } + mSuggest.setAutoTextEnabled(false); + mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM); + } + + /************************** Helper functions ************************/ + + private WordComposer createWordComposer(CharSequence s) { + WordComposer word = new WordComposer(); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + int[] codes; + // If it's not a lowercase letter, don't find adjacent letters + if (c < 'a' || c > 'z') { + codes = new int[] { c }; + } else { + codes = adjacents[c - 'a']; + } + word.add(c, codes); + } + return word; + } + + private void showList(String title, List<CharSequence> suggestions) { + Log.i(TAG, title); + for (int i = 0; i < suggestions.size(); i++) { + Log.i(title, suggestions.get(i) + ", "); + } + } + + private boolean isDefaultSuggestion(List<CharSequence> suggestions, CharSequence word) { + // Check if either the word is what you typed or the first alternative + return suggestions.size() > 0 && + (/*TextUtils.equals(suggestions.get(0), word) || */ + (suggestions.size() > 1 && TextUtils.equals(suggestions.get(1), word))); + } + + private boolean isDefaultSuggestion(CharSequence typed, CharSequence expected) { + WordComposer word = createWordComposer(typed); + List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null); + return isDefaultSuggestion(suggestions, expected); + } + + private void getBigramSuggestions(CharSequence previous, CharSequence typed) { + if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) { + WordComposer firstChar = createWordComposer(typed.charAt(0) + ""); + mSuggest.getSuggestions(null, firstChar, false, previous); + } + } + + private boolean isDefaultNextSuggestion(CharSequence previous, CharSequence typed, + CharSequence expected) { + WordComposer word = createWordComposer(typed); + getBigramSuggestions(previous, typed); + List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous); + return isDefaultSuggestion(suggestions, expected); + } + + private boolean isDefaultCorrection(CharSequence typed, CharSequence expected) { + WordComposer word = createWordComposer(typed); + List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null); + return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection(); + } + + private boolean isDefaultNextCorrection(CharSequence previous, CharSequence typed, + CharSequence expected) { + WordComposer word = createWordComposer(typed); + getBigramSuggestions(previous, typed); + List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous); + for(int i=0;i<suggestions.size();i++) { + Log.i(TAG,i+" "+suggestions.get(i)); + } + return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection(); + } + + private boolean isASuggestion(CharSequence typed, CharSequence expected) { + WordComposer word = createWordComposer(typed); + List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null); + for (int i = 1; i < suggestions.size(); i++) { + if (TextUtils.equals(suggestions.get(i), expected)) return true; + } + return false; + } + + private boolean isASuggestion(CharSequence previous, CharSequence typed, + CharSequence expected) { + WordComposer word = createWordComposer(typed); + getBigramSuggestions(previous, typed); + List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous); + for (int i = 1; i < suggestions.size(); i++) { + if (TextUtils.equals(suggestions.get(i), expected)) return true; + } + return false; + } + + private boolean isValid(CharSequence typed) { + return mSuggest.isValidWord(typed); + } + + /************************** Tests ************************/ + + /** + * Tests for simple completions of one character. + */ + public void testCompletion1char() { + assertTrue(isDefaultSuggestion("peopl", "people")); + assertTrue(isDefaultSuggestion("abou", "about")); + assertTrue(isDefaultSuggestion("thei", "their")); + } + + /** + * Tests for simple completions of two characters. + */ + public void testCompletion2char() { + assertTrue(isDefaultSuggestion("peop", "people")); + assertTrue(isDefaultSuggestion("calli", "calling")); + assertTrue(isDefaultSuggestion("busine", "business")); + } + + /** + * Tests for proximity errors. + */ + public void testProximityPositive() { + assertTrue(isDefaultSuggestion("peiple", "people")); + assertTrue(isDefaultSuggestion("peoole", "people")); + assertTrue(isDefaultSuggestion("pwpple", "people")); + } + + /** + * Tests for proximity errors - negative, when the error key is not near. + */ + public void testProximityNegative() { + assertFalse(isDefaultSuggestion("arout", "about")); + assertFalse(isDefaultSuggestion("ire", "are")); + } + + /** + * Tests for checking if apostrophes are added automatically. + */ + public void testApostropheInsertion() { + assertTrue(isDefaultSuggestion("im", "I'm")); + assertTrue(isDefaultSuggestion("dont", "don't")); + } + + /** + * Test to make sure apostrophed word is not suggested for an apostrophed word. + */ + public void testApostrophe() { + assertFalse(isDefaultSuggestion("don't", "don't")); + } + + /** + * Tests for suggestion of capitalized version of a word. + */ + public void testCapitalization() { + assertTrue(isDefaultSuggestion("i'm", "I'm")); + assertTrue(isDefaultSuggestion("sunday", "Sunday")); + assertTrue(isDefaultSuggestion("sundat", "Sunday")); + } + + /** + * Tests to see if more than one completion is provided for certain prefixes. + */ + public void testMultipleCompletions() { + assertTrue(isASuggestion("com", "come")); + assertTrue(isASuggestion("com", "company")); + assertTrue(isASuggestion("th", "the")); + assertTrue(isASuggestion("th", "that")); + assertTrue(isASuggestion("th", "this")); + assertTrue(isASuggestion("th", "they")); + } + + /** + * Does the suggestion engine recognize zero frequency words as valid words. + */ + public void testZeroFrequencyAccepted() { + assertTrue(isValid("yikes")); + assertFalse(isValid("yike")); + } + + /** + * Tests to make sure that zero frequency words are not suggested as completions. + */ + public void testZeroFrequencySuggestionsNegative() { + assertFalse(isASuggestion("yike", "yikes")); + assertFalse(isASuggestion("what", "whatcha")); + } + + /** + * Tests to ensure that words with large edit distances are not suggested, in some cases + * and not considered corrections, in some cases. + */ + public void testTooLargeEditDistance() { + assertFalse(isASuggestion("sniyr", "about")); + assertFalse(isDefaultCorrection("rjw", "the")); + } + + /** + * Make sure isValid is case-sensitive. + */ + public void testValidityCaseSensitivity() { + assertTrue(isValid("Sunday")); + assertFalse(isValid("sunday")); + } + + /** + * Are accented forms of words suggested as corrections? + */ + public void testAccents() { + assertTrue(isDefaultCorrection("nino", "ni\u00F1o")); // niño + assertTrue(isDefaultCorrection("nimo", "ni\u00F1o")); // niño + assertTrue(isDefaultCorrection("maria", "Mar\u00EDa")); // María + } + + /** + * Make sure bigrams are showing when first character is typed + * and don't show any when there aren't any + */ + public void testBigramsAtFirstChar() { + assertTrue(isDefaultNextCorrection("about", "p", "part")); + assertTrue(isDefaultNextCorrection("I'm", "a", "about")); + assertTrue(isDefaultNextCorrection("about", "b", "business")); + assertTrue(isASuggestion("about", "b", "being")); + assertFalse(isDefaultNextSuggestion("about", "p", "business")); + } + + /** + * Make sure bigrams score affects the original score + */ + public void testBigramsScoreEffect() { + assertTrue(isDefaultCorrection("pa", "page")); + assertTrue(isDefaultNextCorrection("about", "pa", "part")); + assertTrue(isDefaultCorrection("sa", "said")); + assertTrue(isDefaultNextCorrection("from", "sa", "same")); + } +} |