diff options
31 files changed, 328 insertions, 258 deletions
diff --git a/java/res/values-km-rKH/strings.xml b/java/res/values-km-rKH/strings.xml index 4d7de93c8..519aa44d0 100644 --- a/java/res/values-km-rKH/strings.xml +++ b/java/res/values-km-rKH/strings.xml @@ -29,7 +29,7 @@ <string name="popup_on_keypress" msgid="123894815723512944">"លេចឡើងនៅពេលចុចគ្រាប់ចុច"</string> <string name="general_category" msgid="1859088467017573195">"ទូទៅ"</string> <string name="correction_category" msgid="2236750915056607613">"ការកែអត្ថបទ"</string> - <string name="gesture_typing_category" msgid="497263612130532630">"បញ្ចូលដោយប្រើកាយវិការ"</string> + <string name="gesture_typing_category" msgid="497263612130532630">"បញ្ចូលដោយប្រើកាយវិការ"</string> <string name="misc_category" msgid="6894192814868233453">"ជម្រើសផ្សេងទៀត"</string> <string name="advanced_settings" msgid="362895144495591463">"ការកំណត់កម្រិតខ្ពស់"</string> <string name="advanced_settings_summary" msgid="4487980456152830271">"ជម្រើសសម្រាប់អ្នកជំនាញ"</string> @@ -39,7 +39,7 @@ <string name="show_language_switch_key_summary" msgid="7343403647474265713">"បង្ហាញនៅពេលដែលបើកភាសាបញ្ចូលច្រើន"</string> <string name="sliding_key_input_preview" msgid="6604262359510068370">"បង្ហាញទ្រនិចបង្ហាញស្លាយ"</string> <string name="sliding_key_input_preview_summary" msgid="6340524345729093886">"បង្ហាញសញ្ញាមើលឃើញខណៈពេលដែលរុញពីឆ្វេង ឬគ្រាប់ចុចនិមិត្តសញ្ញា"</string> - <string name="key_preview_popup_dismiss_delay" msgid="6213164897443068248">"សោលេចឡើងបោះបង់ការពន្យារពេល"</string> + <string name="key_preview_popup_dismiss_delay" msgid="6213164897443068248">"សោលេចឡើងបោះបង់ការពន្យារពេល"</string> <string name="key_preview_popup_dismiss_no_delay" msgid="2096123151571458064">"គ្មានការពន្យារពេល"</string> <string name="key_preview_popup_dismiss_default_delay" msgid="2166964333903906734">"លំនាំដើម"</string> <string name="abbreviation_unit_milliseconds" msgid="8700286094028323363">"<xliff:g id="MILLISECONDS">%s</xliff:g> មិល្លីវិនាទី"</string> @@ -49,7 +49,7 @@ <string name="use_personalized_dicts" msgid="5167396352105467626">"ការស្នើផ្ទាល់ខ្លួន"</string> <string name="use_double_space_period" msgid="8781529969425082860">"រយៈពេលចុចដកឃ្លាពីរដង"</string> <string name="use_double_space_period_summary" msgid="6532892187247952799">"ប៉ះដកឃ្លាពីរដងបញ្ចូលរយៈពេលដែលអនុវត្តតាមដកឃ្លា"</string> - <string name="auto_cap" msgid="1719746674854628252">"ការសរសេរជាអក្សរធំស្វ័យប្រវត្តិ"</string> + <string name="auto_cap" msgid="1719746674854628252">"ការសរសេរជាអក្សរធំស្វ័យប្រវត្តិ"</string> <string name="auto_cap_summary" msgid="7934452761022946874">"សរសេរពាក្យដំបូងជាអក្សរធំនៃប្រយោគនីមួយៗ"</string> <string name="edit_personal_dictionary" msgid="3996910038952940420">"វចនានុក្រមផ្ទាល់ខ្លួន"</string> <string name="configure_dictionaries_title" msgid="4238652338556902049">"ផ្នែកបន្ថែមវចនានុក្រម"</string> @@ -60,7 +60,7 @@ <string name="prefs_suggestion_visibility_show_only_portrait_name" msgid="3859783767435239118">"បង្ហាញនៅក្នុងរបៀបបញ្ឈរ"</string> <string name="prefs_suggestion_visibility_hide_name" msgid="6309143926422234673">"លាក់ជានិច្ច"</string> <string name="prefs_block_potentially_offensive_title" msgid="5078480071057408934">"ទប់ស្កាត់ពាក្យបំពាន"</string> - <string name="prefs_block_potentially_offensive_summary" msgid="2371835479734991364">"កុំស្នើឲ្យពាក្យបំពានមានសក្ដានុពល"</string> + <string name="prefs_block_potentially_offensive_summary" msgid="2371835479734991364">"កុំស្នើឲ្យពាក្យបំពានមានសក្ដានុពល"</string> <string name="auto_correction" msgid="7630720885194996950">"ការកែស្វ័យប្រវត្តិ"</string> <string name="auto_correction_summary" msgid="5625751551134658006">"ចន្លោះមិនឃើញ និងសញ្ញាវណ្ណយុត្តកែពាក្យដែលបានវាយខុសស្វ័យប្រវត្តិ"</string> <string name="auto_correction_threshold_mode_off" msgid="8470882665417944026">"បិទ"</string> @@ -123,7 +123,7 @@ <string name="prefs_keypress_vibration_duration_settings" msgid="7918341459947439226">"ថិរវេលាញ័រពេលចុចគ្រាប់ចុច"</string> <string name="prefs_keypress_sound_volume_settings" msgid="6027007337036891623">"កម្រិតសំឡេងពេលចុចគ្រាប់ចុច"</string> <string name="prefs_read_external_dictionary" msgid="2588931418575013067">"អានឯកសារវចនានុក្រមខាងក្រៅ"</string> - <string name="read_external_dictionary_no_files_message" msgid="4947420942224623792">"គ្មានឯកសារវចនានុក្រមនៅក្នុងថតទាញយក"</string> + <string name="read_external_dictionary_no_files_message" msgid="4947420942224623792">"គ្មានឯកសារវចនានុក្រមនៅក្នុងថតទាញយក"</string> <string name="read_external_dictionary_multiple_files_title" msgid="7637749044265808628">"ជ្រើសឯកសារវចនានុក្រម ដើម្បីដំឡើង"</string> <string name="read_external_dictionary_confirm_install_message" msgid="4782116251651288054">"ពិតជាដំឡើងឯកសារនេះសម្រាប់ <xliff:g id="LANGUAGE_NAME">%s</xliff:g>?"</string> <string name="error" msgid="8940763624668513648">"មានកំហុស"</string> @@ -154,7 +154,7 @@ <string name="dictionary_provider_name" msgid="3027315045397363079">"កម្មវិធីផ្ដល់វចនានុក្រម"</string> <string name="dictionary_service_name" msgid="6237472350693511448">"សេវាកម្មវចនានុក្រម"</string> <string name="download_description" msgid="6014835283119198591">"ព័ត៌មានបច្ចុប្បន្នភាពវចនានុក្រម"</string> - <string name="dictionary_settings_title" msgid="8091417676045693313">"ផ្នែកបន្ថែមវចនានុក្រម"</string> + <string name="dictionary_settings_title" msgid="8091417676045693313">"ផ្នែកបន្ថែមវចនានុក្រម"</string> <string name="dictionary_install_over_metered_network_prompt" msgid="3587517870006332980">"វចនានុក្រមអាចប្រើបាន"</string> <string name="dictionary_settings_summary" msgid="5305694987799824349">"ការកំណត់សម្រាប់វចនានុក្រម"</string> <string name="user_dictionaries" msgid="3582332055892252845">"វចនានុក្រមអ្នកប្រើ"</string> @@ -170,10 +170,10 @@ <string name="message_updating" msgid="4457761393932375219">"ពិនិត្យមើលបច្ចុប្បន្នភាព"</string> <string name="message_loading" msgid="5638680861387748936">"កំពុងផ្ទុក..."</string> <string name="main_dict_description" msgid="3072821352793492143">"វចនានុក្រមចម្បង"</string> - <string name="cancel" msgid="6830980399865683324">"បោះបង់"</string> + <string name="cancel" msgid="6830980399865683324">"បោះបង់"</string> <string name="go_to_settings" msgid="3876892339342569259">"ការកំណត់"</string> <string name="install_dict" msgid="180852772562189365">"ដំឡើង"</string> - <string name="cancel_download_dict" msgid="7843340278507019303">"បោះបង់"</string> + <string name="cancel_download_dict" msgid="7843340278507019303">"បោះបង់"</string> <string name="delete_dict" msgid="756853268088330054">"លុប"</string> <string name="should_download_over_metered_prompt" msgid="1583881200688185508">"ភាសាដែលបានជ្រើសនៅលើឧបករណ៍ចល័តមានវចនានុក្រមអាចប្រើបាន។<br/> យើងផ្ដល់អនុសាសន៍ឲ្យ <b>ទាញយក</b> វចនានុក្រមភាសា <xliff:g id="LANGUAGE_NAME">%1$s</xliff:g> ដើម្បីបង្កើនបទពិសោធន៍វាយបញ្ចូលរបស់អ្នក។<br/> <br/> ការទាញយកអាចចំណាយពេលប្រហែលពីរនាទីនៅតាម 3G។ ការគិតថ្លៃអាចអនុវត្តប្រសិនបើអ្នកមិនប្រើ <b>ផែនការទិន្នន័យគ្មានដែនកំណត់</b>.<br/> បើអ្នកមិនប្រាកដថាផែនការណាមួយដែលអ្នកមាន យើងផ្ដល់អនុសាសន៍ឲ្យភ្ជាប់វ៉ាយហ្វាយ ដើម្បីចាប់ផ្ដើមទាញយកដោយស្វ័យប្រវត្តិ។<br/> <br/> ជំនួយ៖ អ្នកអាចទាញយក និងលុបវចនានុក្រមដោយចូលទៅ <b>ភាសា & ការបញ្ចូល</b> នៅក្នុងម៉ឺនុយ <b>ការកំណត់</b> សម្រាប់ឧបករណ៍ចល័ត។"</string> <string name="download_over_metered" msgid="1643065851159409546">"ទាញយកឥឡូវនេះ (<xliff:g id="SIZE_IN_MEGABYTES">%1$.1f</xliff:g> មេកាបៃ)"</string> @@ -191,7 +191,7 @@ <string name="user_dict_settings_add_word_option_name" msgid="6665558053408962865">"ពាក្យ៖"</string> <string name="user_dict_settings_add_shortcut_option_name" msgid="3094731590655523777">"ផ្លូវកាត់៖"</string> <string name="user_dict_settings_add_locale_option_name" msgid="4738643440987277705">"ភាសា៖"</string> - <string name="user_dict_settings_add_word_hint" msgid="4902434148985906707">"វាយបញ្ចូលពាក្យ"</string> + <string name="user_dict_settings_add_word_hint" msgid="4902434148985906707">"វាយបញ្ចូលពាក្យ"</string> <string name="user_dict_settings_add_shortcut_hint" msgid="2265453012555060178">"ផ្លូវកាត់ជាជម្រើស"</string> <string name="user_dict_settings_edit_dialog_title" msgid="3765774633869590352">"កែពាក្យ"</string> <string name="user_dict_settings_context_menu_edit_title" msgid="6812255903472456302">"កែ"</string> diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsTable.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsTable.java index 1a27be95b..7e6181a4e 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsTable.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsTable.java @@ -3588,33 +3588,33 @@ public final class KeyboardTextsTable { // U+00E5: "å" LATIN SMALL LETTER A WITH RING ABOVE // U+0101: "ā" LATIN SMALL LETTER A WITH MACRON /* morekeys_a */ "\u00E0,\u00E1,\u00E2,\u00E4,\u00E6,\u00E3,\u00E5,\u0101", + // U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE // U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX // U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS // U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE - // U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE // U+0153: "œ" LATIN SMALL LIGATURE OE // U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE // U+014D: "ō" LATIN SMALL LETTER O WITH MACRON // U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE - /* morekeys_o */ "\u00F4,\u00F6,\u00F2,\u00F3,\u0153,\u00F8,\u014D,\u00F5", + /* morekeys_o */ "\u00F3,\u00F4,\u00F6,\u00F2,\u0153,\u00F8,\u014D,\u00F5", + // U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE // U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX // U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS // U+00F9: "ù" LATIN SMALL LETTER U WITH GRAVE - // U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE // U+016B: "ū" LATIN SMALL LETTER U WITH MACRON - /* morekeys_u */ "\u00FB,\u00FC,\u00F9,\u00FA,\u016B", - // U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE + /* morekeys_u */ "\u00FA,\u00FB,\u00FC,\u00F9,\u016B", // U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE + // U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE // U+00EA: "ê" LATIN SMALL LETTER E WITH CIRCUMFLEX // U+00EB: "ë" LATIN SMALL LETTER E WITH DIAERESIS // U+0113: "ē" LATIN SMALL LETTER E WITH MACRON - /* morekeys_e */ "\u00E8,\u00E9,\u00EA,\u00EB,\u0113", + /* morekeys_e */ "\u00E9,\u00E8,\u00EA,\u00EB,\u0113", + // U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE // U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX // U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS - // U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE // U+012B: "ī" LATIN SMALL LETTER I WITH MACRON // U+00EC: "ì" LATIN SMALL LETTER I WITH GRAVE - /* morekeys_i */ "\u00EE,\u00EF,\u00ED,\u012B,\u00EC", + /* morekeys_i */ "\u00ED,\u00EE,\u00EF,\u012B,\u00EC", // U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA /* morekeys_c */ "\u00E7", /* double_quotes */ null, diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 83ee982b1..94a1e3658 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -373,8 +373,7 @@ public final class BinaryDictionary extends Dictionary { return getProbabilityNative(mNativeDict, codePoints); } - // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni - // calls when checking for changes in an entire dictionary. + @UsedForTesting public boolean isValidBigram(final String word0, final String word1) { return getBigramProbability(word0, word1) != NOT_A_PROBABILITY; } @@ -541,7 +540,9 @@ public final class BinaryDictionary extends Dictionary { close(); final File dictFile = new File(mDictFilePath); final File tmpDictFile = new File(tmpDictFilePath); - FileUtils.deleteRecursively(dictFile); + if (!FileUtils.deleteRecursively(dictFile)) { + return false; + } if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { return false; } diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index 9bc01a2b1..e04fcda27 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -31,9 +31,12 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.personalization.AccountUtils; +import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.utils.ExecutorUtils; import com.android.inputmethod.latin.utils.StringUtils; import java.io.File; +import java.util.ArrayList; import java.util.List; import java.util.Locale; @@ -60,7 +63,10 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { private static final int INDEX_NAME = 1; /** The number of contacts in the most recent dictionary rebuild. */ - static private int sContactCountAtLastRebuild = 0; + private int mContactCountAtLastRebuild = 0; + + /** The hash code of ArrayList of contacts names in the most recent dictionary rebuild. */ + private int mHashCodeAtLastRebuild = 0; private ContentObserver mObserver; @@ -96,7 +102,14 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { new ContentObserver(null) { @Override public void onChange(boolean self) { - setNeedsToReload(); + ExecutorUtils.getExecutor("Check Contacts").execute(new Runnable() { + @Override + public void run() { + if (haveContentsChanged()) { + setNeedsToRecreate(); + } + } + }); } }); } @@ -143,7 +156,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { return; } if (cursor.moveToFirst()) { - sContactCountAtLastRebuild = getContactCount(); + mContactCountAtLastRebuild = getContactCount(); addWordsLocked(cursor); } } catch (final SQLiteException e) { @@ -167,9 +180,11 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { private void addWordsLocked(final Cursor cursor) { int count = 0; + final ArrayList<String> names = CollectionUtils.newArrayList(); while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) { String name = cursor.getString(INDEX_NAME); if (isValidName(name)) { + names.add(name); addNameLocked(name); ++count; } else { @@ -179,6 +194,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { } cursor.moveToNext(); } + mHashCodeAtLastRebuild = names.hashCode(); } private int getContactCount() { @@ -258,8 +274,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { return end; } - @Override - protected boolean haveContentsChanged() { + private boolean haveContentsChanged() { final long startTime = SystemClock.uptimeMillis(); final int contactCount = getContactCount(); if (contactCount > MAX_CONTACT_COUNT) { @@ -268,9 +283,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts? return false; } - if (contactCount != sContactCountAtLastRebuild) { + if (contactCount != mContactCountAtLastRebuild) { if (DEBUG) { - Log.d(TAG, "Contact count changed: " + sContactCountAtLastRebuild + " to " + Log.d(TAG, "Contact count changed: " + mContactCountAtLastRebuild + " to " + contactCount); } return true; @@ -283,20 +298,20 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { if (null == cursor) { return false; } + final ArrayList<String> names = CollectionUtils.newArrayList(); try { if (cursor.moveToFirst()) { while (!cursor.isAfterLast()) { String name = cursor.getString(INDEX_NAME); - if (isValidName(name) && !isNameInDictionaryLocked(name)) { - if (DEBUG) { - Log.d(TAG, "Contact name missing: " + name + " (runtime = " - + (SystemClock.uptimeMillis() - startTime) + " ms)"); - } - return true; + if (isValidName(name)) { + names.add(name); } cursor.moveToNext(); } } + if (names.hashCode() != mHashCodeAtLastRebuild) { + return true; + } } finally { cursor.close(); } @@ -313,33 +328,4 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { } return false; } - - /** - * Checks if the words in a name are in the current binary dictionary. - */ - private boolean isNameInDictionaryLocked(final String name) { - int len = StringUtils.codePointCount(name); - String prevWord = null; - for (int i = 0; i < len; i++) { - if (Character.isLetter(name.codePointAt(i))) { - int end = getWordEndPosition(name, len, i); - String word = name.substring(i, end); - i = end - 1; - final int wordLen = StringUtils.codePointCount(word); - if (wordLen < MAX_WORD_LENGTH && wordLen > 1) { - if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) { - if (!isValidBigramLocked(prevWord, word)) { - return false; - } - } else { - if (!isValidWordLocked(word)) { - return false; - } - } - prevWord = word; - } - } - } - return true; - } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index e3bed318e..6818c156e 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -92,8 +92,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { /** Indicates whether a task for reloading the dictionary has been scheduled. */ private final AtomicBoolean mIsReloading; - /** Indicates whether the current dictionary needs to be reloaded. */ - private boolean mNeedsToReload; + /** Indicates whether the current dictionary needs to be recreated. */ + private boolean mNeedsToRecreate; private final ReentrantReadWriteLock mLock; @@ -107,20 +107,14 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { */ protected abstract void loadInitialContentsLocked(); - /** - * Indicates that the source dictionary contents have changed and a rebuild of the binary file - * is required. If it returns false, the next reload will only read the current binary - * dictionary from file. - */ - protected abstract boolean haveContentsChanged(); - private boolean matchesExpectedBinaryDictFormatVersionForThisType(final int formatVersion) { return formatVersion == FormatSpec.VERSION4; } private boolean needsToMigrateDictionary(final int formatVersion) { - // TODO: Check version. - return false; + // When we bump up the dictionary format version, the old version should be added to here + // for supporting migration. Note that native code has to support reading such formats. + return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING; } public boolean isValidDictionaryLocked() { @@ -147,7 +141,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { mDictFile = getDictFile(context, dictName, dictFile); mBinaryDictionary = null; mIsReloading = new AtomicBoolean(); - mNeedsToReload = false; + mNeedsToRecreate = false; mLock = new ReentrantReadWriteLock(); } @@ -489,11 +483,11 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } /** - * Marks that the dictionary needs to be reloaded. + * Marks that the dictionary needs to be recreated. * */ - protected void setNeedsToReload() { - mNeedsToReload = true; + protected void setNeedsToRecreate() { + mNeedsToRecreate = true; } /** @@ -511,7 +505,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * Returns whether a dictionary reload is required. */ private boolean isReloadRequired() { - return mBinaryDictionary == null || mNeedsToReload; + return mBinaryDictionary == null || mNeedsToRecreate; } /** @@ -523,8 +517,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { @Override public void run() { try { - // TODO: Quit checking contents in ExpandableBinaryDictionary. - if (!mDictFile.exists() || (mNeedsToReload && haveContentsChanged())) { + if (!mDictFile.exists() || mNeedsToRecreate) { // If the dictionary file does not exist or contents have been updated, // generate a new one. createNewDictionaryLocked(); @@ -536,12 +529,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { && matchesExpectedBinaryDictFormatVersionForThisType( mBinaryDictionary.getFormatVersion()))) { // Binary dictionary or its format version is not valid. Regenerate - // the dictionary file. writeBinaryDictionary will remove the + // the dictionary file. createNewDictionaryLocked will remove the // existing files if appropriate. createNewDictionaryLocked(); } } - mNeedsToReload = false; + mNeedsToRecreate = false; } finally { mIsReloading.set(false); } diff --git a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java index e1cda696c..c8ffbe443 100644 --- a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java @@ -98,7 +98,7 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { // devices. On older versions of the platform, the hook above will be called instead. @Override public void onChange(final boolean self, final Uri uri) { - setNeedsToReload(); + setNeedsToRecreate(); } }; cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); @@ -272,9 +272,4 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { } } } - - @Override - protected boolean haveContentsChanged() { - return true; - } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index f25503488..613ff2ba4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -186,7 +186,12 @@ public final class FormatSpec { // From version 4 on, we use version * 100 + revision as a version number. That allows // us to change the format during development while having testing devices remove // older files with each upgrade, while still having a readable versioning scheme. + // When we bump up the dictionary format version, we should update + // ExpandableDictionary.needsToMigrateDictionary() and + // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType(). public static final int VERSION2 = 2; + // Dictionary version used for testing. + public static final int VERSION4_ONLY_FOR_TESTING = 399; public static final int VERSION4 = 401; static final int MINIMUM_SUPPORTED_VERSION = VERSION2; static final int MAXIMUM_SUPPORTED_VERSION = VERSION4; diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index 38c28a734..06bdba054 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -74,11 +74,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB } @Override - protected boolean haveContentsChanged() { - return false; - } - - @Override protected void loadInitialContentsLocked() { // No initial contents. } diff --git a/java/src/com/android/inputmethod/latin/personalization/DictionaryDecayBroadcastReciever.java b/java/src/com/android/inputmethod/latin/personalization/DictionaryDecayBroadcastReciever.java index de2744f29..221bb9a8f 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DictionaryDecayBroadcastReciever.java +++ b/java/src/com/android/inputmethod/latin/personalization/DictionaryDecayBroadcastReciever.java @@ -61,6 +61,7 @@ public class DictionaryDecayBroadcastReciever extends BroadcastReceiver { final String action = intent.getAction(); if (action.equals(DICTIONARY_DECAY_INTENT_ACTION)) { PersonalizationHelper.runGCOnAllOpenedUserHistoryDictionaries(); + PersonalizationHelper.runGCOnAllOpenedPersonalizationDictionaries(); } } } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java index 7c43182bc..afacd085b 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java @@ -16,7 +16,6 @@ package com.android.inputmethod.latin.personalization; -import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.FileUtils; @@ -66,8 +65,8 @@ public class PersonalizationHelper { if (TimeUnit.MILLISECONDS.toSeconds( DictionaryDecayBroadcastReciever.DICTIONARY_DECAY_INTERVAL) < currentTimestamp - sCurrentTimestampForTesting) { - // TODO: Run GC for both PersonalizationDictionary and UserHistoryDictionary. runGCOnAllOpenedUserHistoryDictionaries(); + runGCOnAllOpenedPersonalizationDictionaries(); } } @@ -75,7 +74,6 @@ public class PersonalizationHelper { runGCOnAllDictionariesIfRequired(sLangUserHistoryDictCache); } - @UsedForTesting public static void runGCOnAllOpenedPersonalizationDictionaries() { runGCOnAllDictionariesIfRequired(sLangPersonalizationDictCache); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 04e768fbd..4975512ff 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -50,12 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, const int timestamp, bool *const outAddedNewEntry) { + // 1. The word has no bigrams yet. + // 2. The word has bigrams, and there is the target in the list. + // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. + // 4. The word has bigrams. We have to append new bigram entry to the list. + // 5. Same as 4, but the list is the last entry of the content file. + if (outAddedNewEntry) { *outAddedNewEntry = false; } const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { - // Updating PtNode that doesn't have a bigram list. + // Case 1. PtNode that doesn't have a bigram list. // Create new bigram list. if (!mBigramDictContent->createNewBigramList(terminalId)) { return false; @@ -75,50 +81,55 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget return true; } - const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); - if (entryPosToUpdate != NOT_A_DICT_POS) { - // Overwrite existing entry. - const BigramEntry originalBigramEntry = - mBigramDictContent->getBigramEntry(entryPosToUpdate); - if (!originalBigramEntry.isValid()) { - // Reuse invalid entry. - if (outAddedNewEntry) { - *outAddedNewEntry = true; + int tailEntryPos = NOT_A_DICT_POS; + const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, + &tailEntryPos); + if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { + // Case 4, 5. + // Add new entry to the bigram list. + if (tailEntryPos == NOT_A_DICT_POS) { + // Case 4. Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); + // Copy existing bigram list. + if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { + return false; } } - const BigramEntry updatedBigramEntry = - originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); + // Write new entry at the tail position of the bigram content. + const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &updatedBigramEntry, newProbability, timestamp); - return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); + &newBigramEntry, newProbability, timestamp); + if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { + return false; + } + // Update has next flag of the tail entry. + if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + return true; } - // Add new entry to the bigram list. - // Create new bigram list. - if (!mBigramDictContent->createNewBigramList(terminalId)) { - return false; - } - int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - int tailEntryPos = NOT_A_DICT_POS; - // Copy existing bigram list. - if (!mBigramDictContent->copyBigramList(bigramListPos, writingPos, &tailEntryPos)) { - return false; + // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. + const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); + if (!originalBigramEntry.isValid()) { + // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing + // entry is updated. + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } } - // Write new entry at the tail position of the bigram content. - const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); + const BigramEntry updatedBigramEntry = + originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &newBigramEntry, newProbability, timestamp); - if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { - return false; - } - // Update has next flag of the tail entry. - if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { - return false; - } - if (outAddedNewEntry) { - *outAddedNewEntry = true; - } - return true; + &updatedBigramEntry, newProbability, timestamp); + return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { @@ -127,7 +138,8 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer // Bigram list doesn't exist. return false; } - const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); + const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, + nullptr /* outTailEntryPos */); if (entryPosToUpdate == NOT_A_DICT_POS) { // Bigram entry doesn't exist. return false; @@ -212,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { } int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, - const int bigramListPos) const { + const int bigramListPos, int *const outTailEntryPos) const { + if (outTailEntryPos) { + *outTailEntryPos = NOT_A_DICT_POS; + } bool hasNext = true; int invalidEntryPos = NOT_A_DICT_POS; int readingPos = bigramListPos; @@ -228,6 +243,11 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, // Invalid entry that can be reused is found. invalidEntryPos = entryPos; } + if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { + if (outTailEntryPos) { + *outTailEntryPos = entryPos; + } + } } return invalidEntryPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index d8f7be631..c1f33359b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -56,7 +56,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); - int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; + int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos, + int *const outTailEntryPos) const; const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, const int newProbability, const int timestamp) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index e4a6dc594..da24302c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -139,6 +139,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { switch (mDictFormatVersion) { case FormatUtils::VERSION_2: return FormatUtils::VERSION_2; + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: + return FormatUtils::VERSION_4_ONLY_FOR_TESTING; case FormatUtils::VERSION_4: return FormatUtils::VERSION_4; default: diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 5608e27d4..2a9028a9e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -98,8 +98,9 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; case FormatUtils::VERSION_2: // Version 2 dictionary writing is not supported. return false; + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: - return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */, + return buffer->writeUintAndAdvancePosition(version /* data */, HEADER_DICTIONARY_VERSION_SIZE, writingPos); default: return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index be7e43b98..c4d18608c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -52,9 +52,11 @@ namespace latinime { DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict( const int formatVersion, const std::vector<int> &locale, const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { - switch (formatVersion) { + FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion); + switch (dictFormatVersion) { + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: { - HeaderPolicy headerPolicy(FormatUtils::VERSION_4, locale, attributeMap); + HeaderPolicy headerPolicy(dictFormatVersion, locale, attributeMap); Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers(&headerPolicy, Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE); @@ -87,11 +89,13 @@ namespace latinime { if (!mmappedBuffer) { return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr); } - switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(), - mmappedBuffer->getBufferSize())) { + const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion( + mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize()); + switch (formatVersion) { case FormatUtils::VERSION_2: AKLOGE("Given path is a directory but the format is version 2. path: %s", path); break; + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: { const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */; char dictPath[dictDirPathBufSize]; @@ -102,7 +106,8 @@ namespace latinime { return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr); } Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( - Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer))); + Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer), + formatVersion)); if (!dictBuffers || !dictBuffers->isValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s", path); @@ -135,6 +140,7 @@ namespace latinime { case FormatUtils::VERSION_2: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: AKLOGE("Given path is a file but the format is version 4. path: %s", path); break; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp index e02dd5550..a527f03bd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" +#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" @@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; bool DynamicPtUpdatingHelper::addUnigramWord( DynamicPtReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability, - const bool isNotAWord, const bool isBlacklisted, const int timestamp, - bool *const outAddedNewUnigram) { + const int *const wordCodePoints, const int codePointCount, + const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); @@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord( if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; - return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted, - probability, timestamp, wordCodePoints + matchedCodePointCount, + return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty, + wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } } // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { - return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability, - timestamp, outAddedNewUnigram); + return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram); } if (!ptNodeParams.hasChildren()) { *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, - isNotAWord, isBlacklisted, probability, timestamp, + return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty, wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); } @@ -83,7 +81,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord( return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), - isNotAWord, isBlacklisted, probability, timestamp, &pos); + unigramProperty, &pos); } bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, @@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp, int *const forwardLinkFieldPos) { + const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, newPtNodeArrayPos, forwardLinkFieldPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, - isNotAWord, isBlacklisted, probability, timestamp); + unigramProperty); } -bool DynamicPtUpdatingHelper::setPtNodeProbability( - const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, - bool *const outAddedNewUnigram) { +bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, + const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { if (originalPtNodeParams->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; - return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp); + return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty); } else { // Make the node terminal and write the probability. *outAddedNewUnigram = true; const int movedPos = mBuffer->getTailPosition(); int writingPos = movedPos; const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, - isNotAWord, isBlacklisted, true /* isTerminal */, - originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), - originalPtNodeParams->getCodePoints(), probability)); + unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), + true /* isTerminal */, originalPtNodeParams->getParentPos(), + originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), + unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, - timestamp, &writingPos)) { + unigramProperty, &writingPos)) { return false; } if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { @@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability( } bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( - const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, + const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty, const int *const codePoints, const int codePointCount) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, - codePointCount, isNotAWord, isBlacklisted, probability, timestamp); + codePointCount, unigramProperty); } bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp) { + const UnigramProperty *const unigramProperty) { int writingPos = mBuffer->getTailPosition(); if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, 1 /* arraySize */, &writingPos)) { return false; } const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( - isNotAWord, isBlacklisted, true /* isTerminal */, - parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); - if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, - &writingPos)) { + unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, + parentPtNodePos, nodeCodePointCount, nodeCodePoints, + unigramProperty->getProbability())); + if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, + unigramProperty, &writingPos)) { return false; } if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, @@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( // Returns whether the dictionary updating was succeeded or not. bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, - const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) { + const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, + const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. // Reallocating PtNode: abcde, newNode: abcxy. // abc (1st, not terminal) __ de (2nd) // \_ xy (extra child, terminal) - // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. + // Otherwise, this method makes 1st part terminal and write information in unigramProperty. // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; @@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } } else { const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( - isNotAWord, isBlacklisted, true /* isTerminal */, - reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, - reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); + unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), + true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), + overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), + unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, - timestamp, &writingPos)) { + unigramProperty, &writingPos)) { return false; } } @@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } if (addsExtraChild) { const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( - isNotAWord, isBlacklisted, true /* isTerminal */, - firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, - newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); + unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), + true /* isTerminal */, firstPartOfReallocatedPtNodePos, + newNodeCodePointCount - overlappingCodePointCount, + newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, - timestamp, &writingPos)) { + unigramProperty, &writingPos)) { return false; } } @@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( - const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const bool isTerminal, const int parentPos, + const PtNodeParams *const originalPtNodeParams, + const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) const { const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h index 9b2815263..44914fe4c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h @@ -26,6 +26,7 @@ class BufferWithExtendableBuffer; class DynamicPtReadingHelper; class PtNodeReader; class PtNodeWriter; +class UnigramProperty; class DynamicPtUpdatingHelper { public: @@ -37,9 +38,8 @@ class DynamicPtUpdatingHelper { // Add a word to the dictionary. If the word already exists, update the probability. bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability, - const bool isNotAWord, const bool isBlacklisted, const int timestamp, - bool *const outAddedNewUnigram); + const int *const wordCodePoints, const int codePointCount, + const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); // Add a bigram relation from word0Pos to word1Pos. bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, @@ -62,25 +62,22 @@ class DynamicPtUpdatingHelper { PtNodeWriter *const mPtNodeWriter; bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, - const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, - const int probability, const int timestamp, int *const forwardLinkFieldPos); + const int nodeCodePointCount, const UnigramProperty *const unigramProperty, + int *const forwardLinkFieldPos); - bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, - bool *const outAddedNewUnigram); + bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, + const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, - const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp, const int *const codePoints, const int codePointCount); + const UnigramProperty *const unigramProperty, const int *const codePoints, + const int codePointCount); bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, - const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, - const int probability, const int timestamp); + const int nodeCodePointCount, const UnigramProperty *const unigramProperty); bool reallocatePtNodeAndAddNewPtNodes( const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, - const int timestamp, const int *const newNodeCodePoints, + const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const int newNodeCodePointCount); const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h index e843f074a..cbca3fe35 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -24,6 +24,8 @@ namespace latinime { +class UnigramProperty; + // Interface class used to write PtNode information. class PtNodeWriter { public: @@ -51,8 +53,8 @@ class PtNodeWriter { virtual bool markPtNodeAsWillBecomeNonTerminal( const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; - virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int probability, const int timestamp) = 0; + virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, + const UnigramProperty *const unigramProperty) = 0; virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( const PtNodeParams *const toBeUpdatedPtNodeParams, @@ -65,7 +67,7 @@ class PtNodeWriter { int *const ptNodeWritingPos) = 0; virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, - const int timestamp, int *const ptNodeWritingPos) = 0; + const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 40ece7636..944e0f9e2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -88,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent { const BigramDictContent *const originalBigramDictContent, int *const outBigramEntryCount); + bool isContentTailPos(const int pos) const { + return pos == getContentBuffer()->getTailPosition(); + } + private: DISALLOW_COPY_AND_ASSIGN(BigramDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index 95f654498..77ed38b89 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -27,7 +27,8 @@ namespace latinime { /* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers( - const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer) { + const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer, + const FormatUtils::FORMAT_VERSION formatVersion) { if (!headerBuffer) { ASSERT(false); AKLOGE("The header buffer must be valid to open ver4 dict buffers."); @@ -35,7 +36,8 @@ namespace latinime { } // TODO: take only dictDirPath, and open both header and trie files in the constructor below const bool isUpdatable = headerBuffer->isUpdatable(); - return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable)); + return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable, + formatVersion)); } bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, @@ -113,11 +115,12 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, } Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath, - MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable) + MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable, + const FormatUtils::FORMAT_VERSION formatVersion) : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(MmappedBuffer::openBuffer(dictPath, Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)), - mHeaderPolicy(mHeaderBuffer->getBuffer(), FormatUtils::VERSION_4), + mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion), mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr, mHeaderPolicy.getSize(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index fc41432f4..df177c14a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -36,7 +36,8 @@ class Ver4DictBuffers { typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr; static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, - MmappedBuffer::MmappedBufferPtr headerBuffer); + MmappedBuffer::MmappedBufferPtr headerBuffer, + const FormatUtils::FORMAT_VERSION formatVersion); static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers( const HeaderPolicy *const headerPolicy, const int maxTrieSize) { @@ -120,7 +121,8 @@ class Ver4DictBuffers { DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); Ver4DictBuffers(const char *const dictDirPath, - const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable); + const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable, + const FormatUtils::FORMAT_VERSION formatVersion); Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 38ff42fee..50a3e56e3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" +#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" @@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal( &writingPos); } -bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( - const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, - const int timestamp) { +bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const UnigramProperty *const unigramProperty) { + // Update probability and historical information. + // TODO: Update other information in the unigram property. if (!toBeUpdatedPtNodeParams->isTerminal()) { return false; } @@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( mBuffers->getProbabilityDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, - newProbability, timestamp); + unigramProperty); return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); } @@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( - const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) { + const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, + int *const ptNodeWritingPos) { int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, ptNodeWritingPos)) { @@ -213,7 +217,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( // Write probability. ProbabilityEntry newProbabilityEntry; const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( - &newProbabilityEntry, ptNodeParams->getProbability(), timestamp); + &newProbabilityEntry, unigramProperty); return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, &probabilityEntryToWrite); } @@ -379,18 +383,20 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( } const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( - const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, - const int timestamp) const { + const ProbabilityEntry *const originalProbabilityEntry, + const UnigramProperty *const unigramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp, + originalProbabilityEntry->getHistoricalInfo(), + unigramProperty->getProbability(), unigramProperty->getTimestamp(), mHeaderPolicy); return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( &updatedHistoricalInfo); } else { - return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); + return originalProbabilityEntry->createEntryWithUpdatedProbability( + unigramProperty->getProbability()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index b2b0504a1..f20d3a241 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { virtual bool markPtNodeAsWillBecomeNonTerminal( const PtNodeParams *const toBeUpdatedPtNodeParams); - virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int newProbability, const int timestamp); + virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, + const UnigramProperty *const unigramProperty); virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode); @@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { int *const ptNodeWritingPos); virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, - const int timestamp, int *const ptNodeWritingPos); + const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, @@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const PtNodeParams *const ptNodeParams, int *const outTerminalId, int *const ptNodeWritingPos); - // Create updated probability entry using given probability and timestamp. In addition to the + // Create updated probability entry using given unigram property. In addition to the // probability, this method updates historical information if needed. + // TODO: Update flags belonging to the unigram property. const ProbabilityEntry createUpdatedEntryFrom( - const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, - const int timestamp) const; + const ProbabilityEntry *const originalProbabilityEntry, + const UnigramProperty *const unigramProperty) const; bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index bbfd22e59..2584fe5b7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -179,9 +179,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, - unigramProperty->getProbability(), unigramProperty->isNotAWord(), - unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(), - &addedNewUnigram)) { + unigramProperty, &addedNewUnigram)) { if (addedNewUnigram) { mUnigramCount++; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 87fa5994c..7bc7b0a48 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -34,9 +34,12 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = const int dictVersion, const std::vector<int> localeAsCodePointVector, const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { TimeKeeper::setCurrentTime(); - switch (dictVersion) { + const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion); + switch (formatVersion) { + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: - return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap); + return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap, + formatVersion); default: AKLOGE("Cannot create dictionary %s because format version %d is not supported.", filePath, dictVersion); @@ -46,8 +49,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath, const std::vector<int> localeAsCodePointVector, - const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { - HeaderPolicy headerPolicy(FormatUtils::VERSION_4, localeAsCodePointVector, attributeMap); + const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap, + const FormatUtils::FORMAT_VERSION formatVersion) { + HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap); Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( Ver4DictBuffers::createVer4DictBuffers(&headerPolicy, Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE)); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h index 54ec651f7..a822989db 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { @@ -46,7 +47,8 @@ class DictFileWritingUtils { static bool createEmptyV4DictFile(const char *const filePath, const std::vector<int> localeAsCodePointVector, - const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap); + const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap, + const FormatUtils::FORMAT_VERSION formatVersion); static bool flushBufferToFile(const char *const filePath, const BufferWithExtendableBuffer *const buffer); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index cd3c403fa..a8518cdca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -25,6 +25,18 @@ const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12 const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; +/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) { + switch (formatVersion) { + case VERSION_2: + return VERSION_2; + case VERSION_4_ONLY_FOR_TESTING: + return VERSION_4_ONLY_FOR_TESTING; + case VERSION_4: + return VERSION_4; + default: + return UNKNOWN_VERSION; + } +} /* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion( const uint8_t *const dict, const int dictSize) { // The magic number is stored big-endian. @@ -46,6 +58,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; // same so we use them for both here. if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) { return VERSION_2; + } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) { + return VERSION_4_ONLY_FOR_TESTING; } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) { return VERSION_4; } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 759b1c9b2..20dfb9d8c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -31,6 +31,7 @@ class FormatUtils { enum FORMAT_VERSION { // These MUST have the same values as the relevant constants in FormatSpec.java. VERSION_2 = 2, + VERSION_4_ONLY_FOR_TESTING = 399, VERSION_4 = 401, UNKNOWN_VERSION = -1 }; @@ -39,6 +40,7 @@ class FormatUtils { // unsupported or obsolete dictionary formats. static const uint32_t MAGIC_NUMBER; + static FORMAT_VERSION getFormatVersion(const int formatVersion); static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize); private: diff --git a/tests/src/com/android/inputmethod/keyboard/layout/tests/EnglishCustomizer.java b/tests/src/com/android/inputmethod/keyboard/layout/tests/EnglishCustomizer.java index 29264ff3b..3e82f65bf 100644 --- a/tests/src/com/android/inputmethod/keyboard/layout/tests/EnglishCustomizer.java +++ b/tests/src/com/android/inputmethod/keyboard/layout/tests/EnglishCustomizer.java @@ -27,34 +27,34 @@ class EnglishCustomizer extends LayoutCustomizer { @Override public ExpectedKeyboardBuilder setAccentedLetters(final ExpectedKeyboardBuilder builder) { return builder - // U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE // U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE + // U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE // U+00EA: "ê" LATIN SMALL LETTER E WITH CIRCUMFLEX // U+00EB: "ë" LATIN SMALL LETTER E WITH DIAERESIS // U+0113: "ē" LATIN SMALL LETTER E WITH MACRON - .setMoreKeysOf("e", "\u00E8", "\u00E9", "\u00EA", "\u00EB", "\u0113") + .setMoreKeysOf("e", "\u00E9", "\u00E8", "\u00EA", "\u00EB", "\u0113") + // U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE // U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX // U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS // U+00F9: "ù" LATIN SMALL LETTER U WITH GRAVE - // U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE // U+016B: "ū" LATIN SMALL LETTER U WITH MACRON - .setMoreKeysOf("u", "\u00FB", "\u00FC", "\u00F9", "\u00FA", "\u016B") + .setMoreKeysOf("u", "\u00FA", "\u00FB", "\u00FC", "\u00F9", "\u016B") + // U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE // U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX // U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS - // U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE // U+012B: "ī" LATIN SMALL LETTER I WITH MACRON // U+00EC: "ì" LATIN SMALL LETTER I WITH GRAVE - .setMoreKeysOf("i", "\u00EE", "\u00EF", "\u00ED", "\u012B", "\u00EC") + .setMoreKeysOf("i", "\u00ED", "\u00EE", "\u00EF", "\u012B", "\u00EC") + // U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE // U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX // U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS // U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE - // U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE // U+0153: "œ" LATIN SMALL LIGATURE OE // U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE // U+014D: "ō" LATIN SMALL LETTER O WITH MACRON // U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE .setMoreKeysOf("o", - "\u00F4", "\u00F6", "\u00F2", "\u00F3", "\u0153", "\u00F8", "\u014D", + "\u00F3", "\u00F4", "\u00F6", "\u00F2", "\u0153", "\u00F8", "\u014D", "\u00F5") // U+00E1: "á" LATIN SMALL LETTER A WITH ACUTE // U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 0fb0fa587..9ceafa705 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -46,21 +46,23 @@ public class BinaryDictionaryTests extends AndroidTestCase { private File createEmptyDictionaryAndGetFile(final String dictId, final int formatVersion) throws IOException { - if (formatVersion == FormatSpec.VERSION4) { - return createEmptyVer4DictionaryAndGetFile(dictId); + if (formatVersion == FormatSpec.VERSION4 + || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING) { + return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); } else { throw new IOException("Dictionary format version " + formatVersion + " is not supported."); } } - private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException { + private File createEmptyVer4DictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); file.delete(); file.mkdir(); Map<String, String> attributeMap = new HashMap<String, String>(); - if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4, + if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, Locale.ENGLISH, attributeMap)) { return file; } else { @@ -1223,4 +1225,36 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } } + + public void testDictMigration() { + testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4); + } + + private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int unigramProbability = 100; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "bbb", unigramProbability); + final int bigramProbability = 10; + addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); + assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + // TODO: Add tests for bigram frequency when the implementation gets ready. + assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); + } } diff --git a/tools/make-keyboard-text/res/values-zu/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-zu/donottranslate-more-keys.xml index f9150f366..2c5df0c81 100644 --- a/tools/make-keyboard-text/res/values-zu/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-zu/donottranslate-more-keys.xml @@ -28,33 +28,33 @@ U+00E5: "å" LATIN SMALL LETTER A WITH RING ABOVE U+0101: "ā" LATIN SMALL LETTER A WITH MACRON --> <string name="morekeys_a">à,á,â,ä,æ,ã,å,ā</string> - <!-- U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE - U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE + <!-- U+00E9: "é" LATIN SMALL LETTER E WITH ACUTE + U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE U+00EA: "ê" LATIN SMALL LETTER E WITH CIRCUMFLEX U+00EB: "ë" LATIN SMALL LETTER E WITH DIAERESIS U+0113: "ē" LATIN SMALL LETTER E WITH MACRON --> - <string name="morekeys_e">è,é,ê,ë,ē</string> - <!-- U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX + <string name="morekeys_e">é,è,ê,ë,ē</string> + <!-- U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE + U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX U+00EF: "ï" LATIN SMALL LETTER I WITH DIAERESIS - U+00ED: "í" LATIN SMALL LETTER I WITH ACUTE U+012B: "ī" LATIN SMALL LETTER I WITH MACRON U+00EC: "ì" LATIN SMALL LETTER I WITH GRAVE --> - <string name="morekeys_i">î,ï,í,ī,ì</string> - <!-- U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX + <string name="morekeys_i">í,î,ï,ī,ì</string> + <!-- U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE + U+00F4: "ô" LATIN SMALL LETTER O WITH CIRCUMFLEX U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS U+00F2: "ò" LATIN SMALL LETTER O WITH GRAVE - U+00F3: "ó" LATIN SMALL LETTER O WITH ACUTE U+0153: "œ" LATIN SMALL LIGATURE OE U+00F8: "ø" LATIN SMALL LETTER O WITH STROKE U+014D: "ō" LATIN SMALL LETTER O WITH MACRON U+00F5: "õ" LATIN SMALL LETTER O WITH TILDE --> - <string name="morekeys_o">ô,ö,ò,ó,œ,ø,ō,õ</string> - <!-- U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX + <string name="morekeys_o">ó,ô,ö,ò,œ,ø,ō,õ</string> + <!-- U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE + U+00FB: "û" LATIN SMALL LETTER U WITH CIRCUMFLEX U+00FC: "ü" LATIN SMALL LETTER U WITH DIAERESIS U+00F9: "ù" LATIN SMALL LETTER U WITH GRAVE - U+00FA: "ú" LATIN SMALL LETTER U WITH ACUTE U+016B: "ū" LATIN SMALL LETTER U WITH MACRON --> - <string name="morekeys_u">û,ü,ù,ú,ū</string> + <string name="morekeys_u">ú,û,ü,ù,ū</string> <!-- U+00DF: "ß" LATIN SMALL LETTER SHARP S --> <string name="morekeys_s">ß</string> <!-- U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE --> |