diff options
16 files changed, 108 insertions, 42 deletions
diff --git a/java/res/values-fr-rCA/strings.xml b/java/res/values-fr-rCA/strings.xml index 20597fb8a..8d8fedd9a 100644 --- a/java/res/values-fr-rCA/strings.xml +++ b/java/res/values-fr-rCA/strings.xml @@ -34,7 +34,7 @@ <string name="advanced_settings" msgid="362895144495591463">"Paramètres avancés"</string> <string name="advanced_settings_summary" msgid="4487980456152830271">"Options destinées aux experts"</string> <string name="include_other_imes_in_language_switch_list" msgid="4533689960308565519">"Autres modes de saisie"</string> - <string name="include_other_imes_in_language_switch_list_summary" msgid="840637129103317635">"La touche de sélection de langue couvre d\'autres modes de saisie."</string> + <string name="include_other_imes_in_language_switch_list_summary" msgid="840637129103317635">"La touche de sélection de langue couvre d\'autres modes de saisie"</string> <string name="show_language_switch_key" msgid="5915478828318774384">"Touche de sélection de langue"</string> <string name="show_language_switch_key_summary" msgid="7343403647474265713">"Afficher lorsque plusieurs langues de saisie sont activées"</string> <string name="sliding_key_input_preview" msgid="6604262359510068370">"Aff. indicateur saisie gestuelle"</string> @@ -47,7 +47,7 @@ <string name="use_contacts_dict" msgid="4435317977804180815">"Proposer noms de contacts"</string> <string name="use_contacts_dict_summary" msgid="6599983334507879959">"Utiliser des noms de contacts pour les suggestions et corrections"</string> <string name="use_double_space_period" msgid="8781529969425082860">"Point et espace"</string> - <string name="use_double_space_period_summary" msgid="6532892187247952799">"Appuyez deux fois sur la barre d\'espace pour insérer un point et un espace."</string> + <string name="use_double_space_period_summary" msgid="6532892187247952799">"Appuyez deux fois sur la barre d\'espace pour insérer un point et une espace"</string> <string name="auto_cap" msgid="1719746674854628252">"Majuscules automatiques"</string> <string name="auto_cap_summary" msgid="7934452761022946874">"Majuscule au premier mot de chaque phrase"</string> <string name="edit_personal_dictionary" msgid="3996910038952940420">"Dictionnaire personnel"</string> diff --git a/java/res/values-hi/strings.xml b/java/res/values-hi/strings.xml index 13bdc113f..dfe9a5554 100644 --- a/java/res/values-hi/strings.xml +++ b/java/res/values-hi/strings.xml @@ -50,8 +50,8 @@ <string name="use_double_space_period_summary" msgid="6532892187247952799">"स्पेसबार पर डबल टैप करने से पीरियड शामिल हो जाता है जिसके बाद एक रिक्ति होती है"</string> <string name="auto_cap" msgid="1719746674854628252">"स्वत: अक्षर बड़े करना"</string> <string name="auto_cap_summary" msgid="7934452761022946874">"प्रत्येक वाक्य के पहले शब्द को बड़ा लिखें"</string> - <string name="edit_personal_dictionary" msgid="3996910038952940420">"व्यक्तिगत डिक्शनरी"</string> - <string name="configure_dictionaries_title" msgid="4238652338556902049">"एड-ऑन डिक्शनरी"</string> + <string name="edit_personal_dictionary" msgid="3996910038952940420">"व्यक्तिगत शब्दकोश"</string> + <string name="configure_dictionaries_title" msgid="4238652338556902049">"एड-ऑन शब्दकोश"</string> <string name="main_dictionary" msgid="4798763781818361168">"मुख्य डिक्शनरी"</string> <string name="prefs_show_suggestions" msgid="8026799663445531637">"सुधार सुझाव दिखाएं"</string> <string name="prefs_show_suggestions_summary" msgid="1583132279498502825">"लिखते समय सुझाए गए शब्द दिखाएं"</string> @@ -167,7 +167,7 @@ <string name="prefs_key_longpress_timeout_settings" msgid="6102240298932897873">"कुंजी को देर तक दबाने का विलंब"</string> <string name="prefs_keypress_vibration_duration_settings" msgid="7918341459947439226">"कुंजी-स्पर्श कंपन अवधि"</string> <string name="prefs_keypress_sound_volume_settings" msgid="6027007337036891623">"कुंजी-स्पर्श ध्वनि वॉल्यूम"</string> - <string name="prefs_read_external_dictionary" msgid="2588931418575013067">"बाहरी डिक्शनरी फ़ाइल पढ़ें"</string> + <string name="prefs_read_external_dictionary" msgid="2588931418575013067">"बाहरी शब्दकोश फ़ाइल पढ़ें"</string> <string name="read_external_dictionary_no_files_message" msgid="4947420942224623792">"डाउनलोड फ़ोल्डर में कोई शब्दकोश फ़ाइल नहीं है"</string> <string name="read_external_dictionary_multiple_files_title" msgid="7637749044265808628">"इंस्टॉल करने के लिए कोई शब्दकोश फ़ाइल चुनें"</string> <string name="read_external_dictionary_confirm_install_message" msgid="6898610163768980870">"<xliff:g id="LOCALE_NAME">%s</xliff:g> के लिए वास्तव में यह फ़ाइल इंस्टॉल करें?"</string> @@ -179,8 +179,8 @@ <string name="setup_next_action" msgid="371821437915144603">"अगला चरण"</string> <string name="setup_steps_title" msgid="6400373034871816182">"<xliff:g id="APPLICATION_NAME">%s</xliff:g> सेट करना"</string> <string name="setup_step1_title" msgid="3147967630253462315">"<xliff:g id="APPLICATION_NAME">%s</xliff:g> को सक्षम करें"</string> - <string name="setup_step1_instruction" msgid="2578631936624637241">"कृपया अपनी भाषा और इनपुट सेटिंग में \"<xliff:g id="APPLICATION_NAME">%s</xliff:g>\" को चेक करें. इससे वह आपके उपकरण पर चलने के लिए अधिकृत हो जाएगा."</string> - <string name="setup_step1_finished_instruction" msgid="10761482004957994">"<xliff:g id="APPLICATION_NAME">%s</xliff:g> आपकी भाषा और इनपुट सेटिंग में पहले से सक्षम है, इसलिए यह चरण पूर्ण हो गया है. अगले चरण पर जाएं!"</string> + <string name="setup_step1_instruction" msgid="2578631936624637241">"कृपया अपनी भाषा और अक्षर सेटिंग में \"<xliff:g id="APPLICATION_NAME">%s</xliff:g>\" को चेक करें. इससे वह आपके उपकरण पर चलने के लिए अधिकृत हो जाएगा."</string> + <string name="setup_step1_finished_instruction" msgid="10761482004957994">"<xliff:g id="APPLICATION_NAME">%s</xliff:g> आपकी भाषा और अक्षर सेटिंग में पहले से सक्षम है, इसलिए यह चरण पूर्ण हो गया है. अगले चरण पर जाएं!"</string> <string name="setup_step1_action" msgid="4366513534999901728">"सेटिंग में सक्षम करें"</string> <string name="setup_step2_title" msgid="6860725447906690594">"<xliff:g id="APPLICATION_NAME">%s</xliff:g> पर स्विच करें"</string> <string name="setup_step2_instruction" msgid="9141481964870023336">"इसके बाद, \"<xliff:g id="APPLICATION_NAME">%s</xliff:g>\" को अपनी सक्रिय पाठ-इनपुट पद्धति के रूप में चुनें."</string> @@ -195,16 +195,16 @@ <string name="dictionary_provider_name" msgid="3027315045397363079">"डिक्शनरी प्रदाता"</string> <string name="dictionary_service_name" msgid="6237472350693511448">"डिक्शनरी सेवा"</string> <string name="download_description" msgid="6014835283119198591">"डिक्शनरी अपडेट जानकारी"</string> - <string name="dictionary_settings_title" msgid="8091417676045693313">"एड-ऑन डिक्शनरी"</string> - <string name="dictionary_install_over_metered_network_prompt" msgid="3587517870006332980">"डिक्शनरी उपलब्ध"</string> + <string name="dictionary_settings_title" msgid="8091417676045693313">"एड-ऑन शब्दकोश"</string> + <string name="dictionary_install_over_metered_network_prompt" msgid="3587517870006332980">"शब्दकोश उपलब्ध"</string> <string name="dictionary_settings_summary" msgid="5305694987799824349">"डिक्शनरी की सेटिंग"</string> - <string name="user_dictionaries" msgid="3582332055892252845">"उपयोगकर्ता डिक्शनरी"</string> - <string name="default_user_dict_pref_name" msgid="1625055720489280530">"उपयोगकर्ता डिक्शनरी"</string> - <string name="dictionary_available" msgid="4728975345815214218">"डिक्शनरी उपलब्ध"</string> + <string name="user_dictionaries" msgid="3582332055892252845">"उपयोगकर्ता शब्दकोश"</string> + <string name="default_user_dict_pref_name" msgid="1625055720489280530">"उपयोगकर्ता शब्दकोश"</string> + <string name="dictionary_available" msgid="4728975345815214218">"शब्दकोश उपलब्ध"</string> <string name="dictionary_downloading" msgid="2982650524622620983">"वर्तमान में डाउनलोड हो रही है"</string> <string name="dictionary_installed" msgid="8081558343559342962">"इंस्टॉल है"</string> <string name="dictionary_disabled" msgid="8950383219564621762">"इंस्टॉल है, अक्षम है"</string> - <string name="cannot_connect_to_dict_service" msgid="9216933695765732398">"डिक्शनरी सेवा से कनेक्ट करने में समस्या"</string> + <string name="cannot_connect_to_dict_service" msgid="9216933695765732398">"शब्दकोश सेवा से कनेक्ट करने में समस्या"</string> <string name="no_dictionaries_available" msgid="8039920716566132611">"डिक्शनरी अनुपलब्ध"</string> <string name="check_for_updates_now" msgid="8087688440916388581">"रीफ़्रेश करें"</string> <string name="last_update" msgid="730467549913588780">"अंतिम अपडेट"</string> @@ -215,7 +215,7 @@ <string name="install_dict" msgid="180852772562189365">"इंस्टॉल करें"</string> <string name="cancel_download_dict" msgid="7843340278507019303">"रद्द करें"</string> <string name="delete_dict" msgid="756853268088330054">"हटाएं"</string> - <string name="should_download_over_metered_prompt" msgid="2878629598667658845">"आपके मोबाइल उपकरण पर चयनित भाषा में डिक्शनरी उपलब्ध है.<br/> आपके लेखन अनुभव को बेहतर बनाने के लिए हम <xliff:g id="LANGUAGE">%1$s</xliff:g> डिक्शनरी को <b>डाउनलोड करने</b> की अनुशंसा करते हैं.<br/> <br/> 3G पर डाउनलोड होने में एक या दो मिनट लग सकते हैं. यदि आपके पास <b>असीमित डेटा प्लान</b> नहीं है, तो शुल्क लग सकते हैं.<br/> यदि आप अपने डेटा प्लान के बारे में सुनिश्चित नहीं हैं, तो हम अपने आप डाउनलोड प्रारंभ करने के लिए Wi-Fi कनेक्शन ढूंढने की अनुशंसा करते हैं.<br/> <br/> युक्ति: आप अपने मोबाइल उपकरण पर <b>सेटिंग</b> मेनू में <b>भाषा और इनपुट</b> पर जाकर डिक्शनरी डाउनलोड कर सकते हैं और निकाल सकते हैं."</string> + <string name="should_download_over_metered_prompt" msgid="2878629598667658845">"आपके मोबाइल उपकरण पर चयनित भाषा में डिक्शनरी उपलब्ध है.<br/> आपके लेखन अनुभव को बेहतर बनाने के लिए हम <xliff:g id="LANGUAGE">%1$s</xliff:g> डिक्शनरी को <b>डाउनलोड करने</b> की अनुशंसा करते हैं.<br/> <br/> 3G पर डाउनलोड होने में एक या दो मिनट लग सकते हैं. यदि आपके पास <b>असीमित डेटा प्लान</b> नहीं है, तो शुल्क लग सकते हैं.<br/> यदि आप अपने डेटा प्लान के बारे में सुनिश्चित नहीं हैं, तो हम अपने आप डाउनलोड प्रारंभ करने के लिए Wi-Fi कनेक्शन ढूंढने की अनुशंसा करते हैं.<br/> <br/> युक्ति: आप अपने मोबाइल उपकरण पर <b>सेटिंग</b> मेनू में <b>भाषा और अक्षर</b> पर जाकर डिक्शनरी डाउनलोड कर सकते हैं और निकाल सकते हैं."</string> <string name="download_over_metered" msgid="1643065851159409546">"अभी डाउनलोड करें (<xliff:g id="SIZE_IN_MEGABYTES">%1$.1f</xliff:g>MB)"</string> <string name="do_not_download_over_metered" msgid="2176209579313941583">"Wi-Fi से डाउनलोड करें"</string> <string name="dict_available_notification_title" msgid="6514288591959117288">"<xliff:g id="LANGUAGE">%1$s</xliff:g> के लिए डिक्शनरी उपलब्ध है"</string> diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index bb54e608e..e81591992 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -21,7 +21,6 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" -#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" @@ -93,13 +92,15 @@ namespace latinime { if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. - return ProbabilityUtils::backoff(unigramProbability); + return dictionaryStructurePolicy->getProbability(unigramProbability, + NOT_A_PROBABILITY); } if (multiBigramMap) { return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, wordPos, unigramProbability); } - return ProbabilityUtils::backoff(unigramProbability); + return dictionaryStructurePolicy->getProbability(unigramProbability, + NOT_A_PROBABILITY); } //////////////// diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index f437c95f6..9bc96877e 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -116,10 +116,6 @@ class DicNodeStatePrevWord { return mPrevWordStart; } - int16_t getPrevWordProbability() const { - return mPrevWordProbability; - } - int getPrevWordNodePos() const { return mPrevWordNodePos; } diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index e74a1dbc8..cf1cd8815 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -23,7 +23,6 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/dictionary.h" -#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" @@ -131,7 +130,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 // in very bad cases. This means that sometimes, we'll see some bigrams interverted // here, but it can't get too bad. - const int probability = ProbabilityUtils::computeProbabilityForBigram( + const int probability = mDictionaryStructurePolicy->getProbability( unigramProbability, bigramsIt.getProbability()); addWordBigram(bigramBuffer, codePointCount, probability, outBigramProbability, outBigramCodePoints, outputTypes); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 8418a608a..02ece639c 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -90,7 +90,7 @@ int Dictionary::getProbability(const int *word, int length) const { if (NOT_A_VALID_WORD_POS == pos) { return NOT_A_PROBABILITY; } - return getDictionaryStructurePolicy()->getUnigramProbability(pos); + return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index fb4a80083..9efe5f6f9 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -22,7 +22,6 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/bloom_filter.h" -#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/hash_map_compat.h" @@ -43,11 +42,12 @@ class MultiBigramMap { hash_map_compat<int, BigramMap>::const_iterator mapPosition = mBigramMaps.find(wordPosition); if (mapPosition != mBigramMaps.end()) { - return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability); + return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, + unigramProbability); } if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { addBigramsForWordPosition(structurePolicy, wordPosition); - return mBigramMaps[wordPosition].getBigramProbability( + return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, @@ -82,17 +82,17 @@ class MultiBigramMap { } AK_FORCE_INLINE int getBigramProbability( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nextWordPosition, const int unigramProbability) const { + int bigramProbability = NOT_A_PROBABILITY; if (mBloomFilter.isInFilter(nextWordPosition)) { const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = mBigramMap.find(nextWordPosition); if (bigramProbabilityIt != mBigramMap.end()) { - const int bigramProbability = bigramProbabilityIt->second; - return ProbabilityUtils::computeProbabilityForBigram( - unigramProbability, bigramProbability); + bigramProbability = bigramProbabilityIt->second; } } - return ProbabilityUtils::backoff(unigramProbability); + return structurePolicy->getProbability(unigramProbability, bigramProbability); } private: @@ -111,17 +111,18 @@ class MultiBigramMap { AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { + int bigramProbability = NOT_A_PROBABILITY; const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { - return ProbabilityUtils::computeProbabilityForBigram( - unigramProbability, bigramsIt.getProbability()); + bigramProbability = bigramsIt.getProbability(); + break; } } - return ProbabilityUtils::backoff(unigramProbability); + return structurePolicy->getProbability(unigramProbability, bigramProbability); } static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 532411509..c8cbbcfdf 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -47,7 +47,10 @@ class DictionaryStructureWithBufferPolicy { virtual int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; - virtual int getUnigramProbability(const int nodePos) const = 0; + virtual int getProbability(const int unigramProbability, + const int bigramProbability) const = 0; + + virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0; virtual int getShortcutPositionOfNode(const int nodePos) const = 0; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 7d8dd21c5..e788e914a 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -171,7 +171,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; - const bool isPossiblyOffensiveWord = terminalDicNode->getProbability() <= 0; + const bool isPossiblyOffensiveWord = + traverseSession->getDictionaryStructurePolicy()->getProbability( + terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0; const bool isExactMatch = terminalDicNode->isExactMatch(); const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); // Heuristic: We exclude freq=0 first-char-uppercase words from exact match. diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 3b9878b82..945677b50 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { @@ -134,7 +135,20 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in return NOT_A_VALID_WORD_POS; } -int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { +int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + // TODO: check mHeaderPolicy.usesForgettingCurve(); + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } +} + +int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } @@ -144,7 +158,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; } - return nodeReader.getProbability(); + return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); } int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 5873d3d65..cdab0e16a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -57,7 +57,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; - int getUnigramProbability(const int nodePos) const; + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int nodePos) const; int getShortcutPositionOfNode(const int nodePos) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index a6cb46d39..62d73bb02 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -56,6 +56,15 @@ class DynamicPatriciaTrieReadingUtils { return FLAG_IS_DELETED == (MASK_MOVED & flags); } + static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags, + const bool isMoved, const bool isDeleted) { + NodeFlags flags = originalFlags; + flags = isMoved ? ((flags & (!MASK_MOVED)) | FLAG_IS_MOVED) : flags; + flags = isDeleted ? ((flags & (!MASK_MOVED)) | FLAG_IS_DELETED) : flags; + flags = (!isMoved && !isDeleted) ? ((flags & (!MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags; + return flags; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils); diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index adcf2dbdf..d5a83a938 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -21,6 +21,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { @@ -306,7 +307,19 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, } } -int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const { +int PatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } +} + +int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } @@ -324,7 +337,8 @@ int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const { return NOT_A_PROBABILITY; } PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); + return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition( + mDictRoot, &pos), NOT_A_PROBABILITY); } int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index d0567fd85..75d976205 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -56,7 +56,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; - int getUnigramProbability(const int nodePos) const; + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int nodePos) const; int getShortcutPositionOfNode(const int nodePos) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h index f76c38751..2b0646db2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -119,6 +119,29 @@ class PatriciaTrieReadingUtils { return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); } + static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted, + const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, + const bool hasBigrams, const bool hasMultipleChars, + const int childrenPositionFieldSize) { + NodeFlags nodeFlags = 0; + nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags; + nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags; + nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags; + nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags; + nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags; + nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags; + if (childrenPositionFieldSize == 1) { + nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; + } else if (childrenPositionFieldSize == 2) { + nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; + } else if (childrenPositionFieldSize == 3) { + nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; + } else { + nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; + } + return nodeFlags; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); diff --git a/native/jni/src/suggest/core/dictionary/probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h index 21fe355b8..21fe355b8 100644 --- a/native/jni/src/suggest/core/dictionary/probability_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h |