aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h87
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.cpp5
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h317
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_pool.h87
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h150
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp74
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h16
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h29
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp33
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.h30
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h111
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state.h46
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h14
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h118
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h154
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h69
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp176
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h46
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h5
-rw-r--r--native/jni/src/suggest/core/dictionary/bloom_filter.h39
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp181
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h73
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary_utils.cpp96
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h)28
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.cpp22
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp38
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h78
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp72
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h73
-rw-r--r--native/jni/src/suggest/core/dictionary/property/bigram_property.h66
-rw-r--r--native/jni/src/suggest/core/dictionary/property/unigram_property.h114
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.cpp84
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.h62
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h64
-rw-r--r--native/jni/src/suggest/core/layout/normal_distribution.h49
-rw-r--r--native/jni/src/suggest/core/layout/normal_distribution_2d.h59
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp22
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.h21
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_params.cpp16
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_params.h12
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.cpp32
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h26
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp210
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state_utils.h21
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_utils.h43
-rw-r--r--native/jni/src/suggest/core/layout/touch_position_correction_utils.h4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h21
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h48
-rw-r--r--native/jni/src/suggest/core/policy/scoring.h22
-rw-r--r--native/jni/src/suggest/core/policy/traversal.h4
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp5
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h3
-rw-r--r--native/jni/src/suggest/core/result/suggested_word.h83
-rw-r--r--native/jni/src/suggest/core/result/suggestion_results.cpp106
-rw-r--r--native/jni/src/suggest/core/result/suggestion_results.h63
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.cpp235
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.h56
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp21
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h49
-rw-r--r--native/jni/src/suggest/core/session/prev_words_info.h211
-rw-r--r--native/jni/src/suggest/core/suggest.cpp264
-rw-r--r--native/jni/src/suggest/core/suggest.h28
-rw-r--r--native/jni/src/suggest/core/suggest_interface.h6
-rw-r--r--native/jni/src/suggest/core/suggest_options.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp182
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp391
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h92
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp191
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp124
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp380
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h121
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp239
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h289
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp558
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h138
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp102
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h224
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp68
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp290
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h93
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp224
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h135
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h110
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp171
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h74
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h90
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp199
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h101
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h86
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h122
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp111
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h118
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp156
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h152
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h84
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp109
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp429
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h145
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp513
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h174
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp304
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h140
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp90
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h57
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp203
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h64
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp87
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h)37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp144
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h)89
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp326
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h284
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp)26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h)24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp291
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h94
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp)63
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h)25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp)32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h)12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h45
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h255
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h40
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp)4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h)4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp)322
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h)79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h)4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp54
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h44
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp282
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h71
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp219
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h131
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h (renamed from native/jni/src/suggest/core/dictionary/bloom_filter.cpp)19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp160
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h63
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp188
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h90
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h111
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp100
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h106
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp145
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h141
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp70
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp390
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp522
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h151
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp294
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h125
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h42
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp171
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp266
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h93
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h62
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp101
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h61
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp23
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h10
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h60
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h23
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h12
-rw-r--r--native/jni/src/suggest/policyimpl/utils/edit_distance.h12
-rw-r--r--native/jni/src/utils/autocorrection_threshold_utils.cpp3
-rw-r--r--native/jni/src/utils/char_utils.cpp8
-rw-r--r--native/jni/src/utils/char_utils.h27
-rw-r--r--native/jni/src/utils/hash_map_compat.h34
-rw-r--r--native/jni/src/utils/jni_data_utils.cpp (renamed from native/jni/src/suggest/core/dicnode/dic_node_release_listener.h)19
-rw-r--r--native/jni/src/utils/jni_data_utils.h151
-rw-r--r--native/jni/src/utils/time_keeper.cpp41
-rw-r--r--native/jni/src/utils/time_keeper.h41
212 files changed, 15733 insertions, 5757 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 742e388e4..24d04e51f 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -35,7 +35,13 @@
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
-#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
+
+// TODO: Use size_t instead of int.
+// Disclaimer: You will see a compile error if you use this macro against a variable-length array.
+// Sorry for the inconvenience. It isn't supported.
+template <typename T, int N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+#define NELEMS(x) (sizeof(ArraySizeHelper(x)))
AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
char *dest, const int destSize) {
@@ -87,14 +93,24 @@ AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const in
}
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#if defined(__ANDROID__)
#include <android/log.h>
+#endif // defined(__ANDROID__)
#ifndef LOG_TAG
#define LOG_TAG "LatinIME: "
#endif // LOG_TAG
+
+#if defined(HOST_TOOL)
+#include <stdio.h>
+#define AKLOGE(fmt, ...) printf(fmt "\n", ##__VA_ARGS__)
+#define AKLOGI(fmt, ...) printf(fmt "\n", ##__VA_ARGS__)
+#else // defined(HOST_TOOL)
#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
+#endif // defined(HOST_TOOL)
-#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
+#define DUMP_SUGGESTION(words, frequencies, index, score) \
+ do { dumpWordInfo(words, frequencies, index, score); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
#define INTS_TO_CHARS(input, length, output, outlength) do { \
intArrayToCharArray(input, length, output, outlength); } while (0)
@@ -108,14 +124,6 @@ static inline void dumpWordInfo(const int *word, const int length, const int ran
}
}
-static inline void dumpResult(const int *outWords, const int *frequencies) {
- AKLOGI("--- DUMP RESULT ---------");
- for (int i = 0; i < MAX_RESULTS; ++i) {
- dumpWordInfo(&outWords[i * MAX_WORD_LENGTH], MAX_WORD_LENGTH, i, frequencies[i]);
- }
- AKLOGI("-------------------------");
-}
-
static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
static char charBuf[50];
const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
@@ -156,7 +164,7 @@ static inline void showStackTrace() {
#else // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...)
-#define DUMP_RESULT(words, frequencies)
+#define DUMP_SUGGESTION(words, frequencies, index, score)
#define DUMP_WORD(word, length)
#undef DO_ASSERT_TEST
#define ASSERT(success)
@@ -285,23 +293,18 @@ static inline void prof_out(void) {
#define M_PI_F 3.14159265f
#define MAX_PERCENTILE 100
-// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
-// As such, this is the maximum number of characters will be needed to represent an int as a
-// string, including the terminator; this is used as the size of a string buffer large enough to
-// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
-// of the binary dictionary where a {key,value} string pair scheme is used.
-#define LARGEST_INT_DIGIT_COUNT 11
-
#define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE (-1)
#define NOT_A_COORDINATE (-1)
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)
+#define NOT_A_TIMESTAMP (-1)
+#define NOT_A_LANGUAGE_WEIGHT (-1.0f)
// A special value to mean the first word confidence makes no sense in this case,
// e.g. this is not a multi-word suggestion.
-#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX)
+#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN)
// How high the confidence needs to be for us to auto-commit. Arbitrary.
// This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java
#define CONFIDENCE_FOR_AUTO_COMMIT (1000000)
@@ -315,14 +318,13 @@ static inline void prof_out(void) {
#define KEYCODE_SPACE ' '
#define KEYCODE_SINGLE_QUOTE '\''
#define KEYCODE_HYPHEN_MINUS '-'
+// Code point to indicate beginning-of-sentence. This is not in the code point space of unicode.
+#define CODE_POINT_BEGINNING_OF_SENTENCE 0x110000
#define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f
#define MAX_PROBABILITY 255
#define MAX_BIGRAM_ENCODED_PROBABILITY 15
-// Assuming locale strings such as en_US, sr-Latn etc.
-#define MAX_LOCALE_STRING_LENGTH 10
-
// Max value for length, distance and probability which are used in weighting
// TODO: Remove
#define MAX_VALUE_FOR_WEIGHTING 10000000
@@ -334,19 +336,24 @@ static inline void prof_out(void) {
#define MAX_POINTER_COUNT 1
#define MAX_POINTER_COUNT_G 2
-template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
-template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
+// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
+#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1
+
+#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
+ TypeName() = delete
+
+#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
+ TypeName(const TypeName&) = delete
-// DEBUG
-#define INPUTLENGTH_FOR_DEBUG (-1)
-#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
+#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
+ void operator=(const TypeName&) = delete
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&); \
- void operator=(const TypeName&)
+ DISALLOW_COPY_CONSTRUCTOR(TypeName); \
+ DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
- TypeName(); \
+ DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// Used as a return value for character comparison
@@ -392,24 +399,4 @@ typedef enum {
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
-
-// ErrorType is mainly decided by CorrectionType but it is also depending on if
-// the correction has really been performed or not.
-typedef enum {
- // Substitution, omission and transposition
- ET_EDIT_CORRECTION,
- // Proximity error
- ET_PROXIMITY_CORRECTION,
- // Completion
- ET_COMPLETION,
- // New word
- // TODO: Remove.
- // A new word error should be an edit correction error or a proximity correction error.
- ET_NEW_WORD,
- // Treat error as an intentional omission when the CorrectionType is omission and the node can
- // be intentional omission.
- ET_INTENTIONAL_OMISSION,
- // Not treated as an error. Tracked for checking exact match
- ET_NOT_AN_ERROR
-} ErrorType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.cpp b/native/jni/src/suggest/core/dicnode/dic_node.cpp
index de088c7d0..414dc3b1e 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node.cpp
@@ -24,8 +24,7 @@ DicNode::DicNode(const DicNode &dicNode)
mProfiler(dicNode.mProfiler),
#endif
mDicNodeProperties(dicNode.mDicNodeProperties), mDicNodeState(dicNode.mDicNodeState),
- mIsCachedForNextSuggestion(dicNode.mIsCachedForNextSuggestion), mIsUsed(dicNode.mIsUsed),
- mReleaseListener(0) {
+ mIsCachedForNextSuggestion(dicNode.mIsCachedForNextSuggestion) {
/* empty */
}
@@ -36,8 +35,6 @@ DicNode &DicNode::operator=(const DicNode &dicNode) {
mDicNodeProperties = dicNode.mDicNodeProperties;
mDicNodeState = dicNode.mDicNodeState;
mIsCachedForNextSuggestion = dicNode.mIsCachedForNextSuggestion;
- mIsUsed = dicNode.mIsUsed;
- mReleaseListener = dicNode.mReleaseListener;
return *this;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 49cfdecac..92f39ea25 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -19,29 +19,35 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
-#include "suggest/core/dicnode/dic_node_release_listener.h"
+#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dicnode/internal/dic_node_state.h"
#include "suggest/core/dicnode/internal/dic_node_properties.h"
#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
+#include "suggest/core/layout/proximity_info_state.h"
#include "utils/char_utils.h"
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
- do { char charBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
- AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
- __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
- getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
+ do { \
+ char charBuf[50]; \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
+ AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
+ __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
+ getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \
+ } while (0)
#define DUMP_WORD_AND_SCORE(header) \
- do { char charBuf[50]; char prevWordCharBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
- INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
- NELEMS(prevWordCharBuf)); \
- AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \
- getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
- getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
- getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
+ do { \
+ char charBuf[50]; \
+ INTS_TO_CHARS(getOutputWordBuf(), \
+ getNodeCodePointCount() \
+ + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \
+ charBuf, NELEMS(charBuf)); \
+ AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \
+ getSpatialDistanceForScoring(), \
+ mDicNodeState.mDicNodeStateScoring.getLanguageDistance(), \
+ getNormalizedCompoundDistance(), getRawLength(), charBuf, \
+ getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
} while (0)
#else
#define LOGI_SHOW_ADD_COST_PROP
@@ -77,113 +83,74 @@ class DicNode {
#if DEBUG_DICT
DicNodeProfiler mProfiler;
#endif
- //////////////////
- // Memory utils //
- //////////////////
- AK_FORCE_INLINE static void managedDelete(DicNode *node) {
- node->remove();
- }
- // end
- /////////////////
AK_FORCE_INLINE DicNode()
:
#if DEBUG_DICT
mProfiler(),
#endif
- mDicNodeProperties(), mDicNodeState(), mIsCachedForNextSuggestion(false),
- mIsUsed(false), mReleaseListener(0) {}
+ mDicNodeProperties(), mDicNodeState(), mIsCachedForNextSuggestion(false) {}
DicNode(const DicNode &dicNode);
DicNode &operator=(const DicNode &dicNode);
- virtual ~DicNode() {}
+ ~DicNode() {}
// Init for copy
- void initByCopy(const DicNode *dicNode) {
- mIsUsed = true;
+ void initByCopy(const DicNode *const dicNode) {
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
- mDicNodeProperties.init(&dicNode->mDicNodeProperties);
- mDicNodeState.init(&dicNode->mDicNodeState);
+ mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties);
+ mDicNodeState.initByCopy(&dicNode->mDicNodeState);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // Init for root with prevWordNodePos which is used for bigram
- void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
- mIsUsed = true;
+ // Init for root with prevWordsPtNodePos which is used for n-gram
+ void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordsPtNodePos) {
mIsCachedForNextSuggestion = false;
- mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
- NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
- true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
- 0 /* terminalDepth */);
- mDicNodeState.init(prevWordNodePos);
+ mDicNodeProperties.init(rootPtNodeArrayPos, prevWordsPtNodePos);
+ mDicNodeState.init();
PROF_NODE_RESET(mProfiler);
}
// Init for root with previous word
- void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
- mIsUsed = true;
+ void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
- mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
- NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
- true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
- 0 /* terminalDepth */);
- // TODO: Move to dicNodeState?
- mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
- mDicNodeState.mDicNodeStateInput.init(
- &dicNode->mDicNodeState.mDicNodeStateInput, true /* resetTerminalDiffCost */);
- mDicNodeState.mDicNodeStateScoring.init(
- &dicNode->mDicNodeState.mDicNodeStateScoring);
- mDicNodeState.mDicNodeStatePrevWord.init(
- dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
- dicNode->mDicNodeProperties.getProbability(),
- dicNode->mDicNodeProperties.getPos(),
- dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
- dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
- dicNode->getOutputWordBuf(),
- dicNode->mDicNodeProperties.getDepth(),
- dicNode->mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
- mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
+ int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
+ for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
+ newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
+ }
+ mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
+ mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
+ dicNode->mDicNodeProperties.getDepth());
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- void initAsPassingChild(DicNode *parentNode) {
- mIsUsed = true;
- mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
- const int c = parentNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
- mDicNodeState.init(&parentNode->mDicNodeState);
- PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
+ void initAsPassingChild(const DicNode *parentDicNode) {
+ mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
+ const int codePoint =
+ parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(
+ parentDicNode->getNodeCodePointCount());
+ mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, codePoint);
+ mDicNodeState.initByCopy(&parentDicNode->mDicNodeState);
+ PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
}
- void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
- mIsUsed = true;
+ void initAsChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
- isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
+ mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- AK_FORCE_INLINE void remove() {
- mIsUsed = false;
- if (mReleaseListener) {
- mReleaseListener->onReleased(this);
- }
- }
-
- bool isUsed() const {
- return mIsUsed;
- }
-
bool isRoot() const {
return getNodeCodePointCount() == 0;
}
@@ -209,11 +176,6 @@ class DicNode {
mIsCachedForNextSuggestion = true;
}
- // Used to expand the node in DicNodeUtils
- int getNodeTypedCodePoint() const {
- return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
- }
-
// Check if the current word and the previous word can be considered as a valid multiple word
// suggestion.
bool isValidMultipleWordSuggestion() const {
@@ -222,21 +184,17 @@ class DicNode {
}
// Treat suggestion as invalid if the current and the previous word are single character
// words.
- const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
- - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
+ const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
+ - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
const int currentWordLen = getNodeCodePointCount();
return (prevWordLen != 1 || currentWordLen != 1);
}
bool isFirstCharUppercase() const {
- const int c = getOutputWordBuf()[0];
+ const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0);
return CharUtils::isAsciiUpper(c);
}
- bool isFirstWord() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
- }
-
bool isCompletion(const int inputSize) const {
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) >= inputSize;
}
@@ -245,94 +203,76 @@ class DicNode {
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
}
- // Used to get bigram probability in DicNodeUtils
- int getPos() const {
- return mDicNodeProperties.getPos();
+ // Used to get n-gram probability in DicNodeUtils.
+ int getPtNodePos() const {
+ return mDicNodeProperties.getPtNodePos();
}
- // Used to get bigram probability in DicNodeUtils
- int getPrevWordPos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ // Used to get n-gram probability in DicNodeUtils. n is 1-indexed.
+ int getNthPrevWordTerminalPtNodePos(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return NOT_A_DICT_POS;
+ }
+ return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
}
// Used in DicNodeUtils
- int getChildrenPos() const {
- return mDicNodeProperties.getChildrenPos();
+ int getChildrenPtNodeArrayPos() const {
+ return mDicNodeProperties.getChildrenPtNodeArrayPos();
}
int getProbability() const {
return mDicNodeProperties.getProbability();
}
- AK_FORCE_INLINE bool isTerminalWordNode() const {
- const bool isTerminalNodes = mDicNodeProperties.isTerminal();
- const int currentNodeDepth = getNodeCodePointCount();
- const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
- return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
+ AK_FORCE_INLINE bool isTerminalDicNode() const {
+ const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
+ const int currentDicNodeDepth = getNodeCodePointCount();
+ const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
+ return isTerminalPtNode && currentDicNodeDepth > 0
+ && currentDicNodeDepth == terminalDicNodeDepth;
}
bool shouldBeFilteredBySafetyNetForBigram() const {
const uint16_t currentDepth = getNodeCodePointCount();
- const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
- - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
+ const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
+ - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
}
+ bool hasMatchedOrProximityCodePoints() const {
+ // This DicNode does not have matched or proximity code points when all code points have
+ // been handled as edit corrections or completion so far.
+ const int editCorrectionCount = mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount();
+ const int completionCount = mDicNodeState.mDicNodeStateScoring.getCompletionCount();
+ return (editCorrectionCount + completionCount) < getNodeCodePointCount();
+ }
+
bool isTotalInputSizeExceedingLimit() const {
- const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
- const int currentWordDepth = getNodeCodePointCount();
// TODO: 3 can be 2? Needs to be investigated.
// TODO: Have a const variable for 3 (or 2)
- return prevWordsLen + currentWordDepth > MAX_WORD_LENGTH - 3;
- }
-
- // TODO: This may be defective. Needs to be revised.
- bool truncateNode(const DicNode *const topNode, const int inputCommitPoint) {
- const int prevWordLenOfTop = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
- int newPrevWordStartIndex = inputCommitPoint;
- int charCount = 0;
- // Find new word start index
- for (int i = 0; i < prevWordLenOfTop; ++i) {
- const int c = mDicNodeState.mDicNodeStatePrevWord.getPrevWordCodePointAt(i);
- // TODO: Check other separators.
- if (c != KEYCODE_SPACE && c != KEYCODE_SINGLE_QUOTE) {
- if (charCount == inputCommitPoint) {
- newPrevWordStartIndex = i;
- break;
- }
- ++charCount;
- }
- }
- if (!mDicNodeState.mDicNodeStatePrevWord.startsWith(
- &topNode->mDicNodeState.mDicNodeStatePrevWord, newPrevWordStartIndex - 1)) {
- // Node mismatch.
- return false;
- }
- mDicNodeState.mDicNodeStateInput.truncate(inputCommitPoint);
- mDicNodeState.mDicNodeStatePrevWord.truncate(newPrevWordStartIndex);
- return true;
+ return getTotalNodeCodePointCount() > MAX_WORD_LENGTH - 3;
}
void outputResult(int *dest) const {
- const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
- const uint16_t currentDepth = getNodeCodePointCount();
- DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
- prevWordLength, getOutputWordBuf(), currentDepth, dest);
+ memmove(dest, getOutputWordBuf(), getTotalNodeCodePointCount() * sizeof(dest[0]));
DUMP_WORD_AND_SCORE("OUTPUT");
}
// "Total" in this context (and other methods in this class) means the whole suggestion. When
// this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
// the one that corresponds to the last word of the suggestion, and all the previous words
- // are concatenated together in mPrevWord - which contains a space at the end.
+ // are concatenated together in mDicNodeStateOutput.
int getTotalNodeSpaceCount() const {
- if (isFirstWord()) return 0;
- return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength());
+ if (!hasMultipleWords()) {
+ return 0;
+ }
+ return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(),
+ mDicNodeState.mDicNodeStateOutput.getPrevWordsLength());
}
int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
- const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
+ const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex();
if (inputIndex == NOT_AN_INDEX) {
return NOT_AN_INDEX;
} else {
@@ -341,7 +281,7 @@ class DicNode {
}
bool hasMultipleWords() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
+ return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0;
}
int getProximityCorrectionCount() const {
@@ -373,13 +313,8 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
}
- // Used to commit input partially
- int getPrevWordNodePos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
- }
-
AK_FORCE_INLINE const int *getOutputWordBuf() const {
- return mDicNodeState.mDicNodeStateOutput.mCodePointsBuf;
+ return mDicNodeState.mDicNodeStateOutput.getCodePointBuf();
}
int getPrevCodePointG(int pointerId) const {
@@ -410,7 +345,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const {
- const int codePoint = mDicNodeProperties.getNodeCodePoint();
+ const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@@ -423,8 +358,8 @@ class DicNode {
// Utils for cost calculation //
////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
- return mDicNodeProperties.getNodeCodePoint()
- == dicNode->mDicNodeProperties.getNodeCodePoint();
+ return mDicNodeProperties.getDicNodeCodePoint()
+ == dicNode->mDicNodeProperties.getDicNodeCodePoint();
}
// TODO: remove
@@ -440,10 +375,6 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.getSpatialDistance();
}
- float getLanguageDistanceForScoring() const {
- return mDicNodeState.mDicNodeStateScoring.getLanguageDistance();
- }
-
// For space-aware gestures, we store the normalized distance at the char index
// that ends the first word of the suggestion. We call this the distance after
// first word.
@@ -451,22 +382,10 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistanceAfterFirstWord();
}
- float getLanguageDistanceRatePerWordForScoring() const {
- const float langDist = getLanguageDistanceForScoring();
- const float totalWordCount =
- static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1);
- return langDist / totalWordCount;
- }
-
float getRawLength() const {
return mDicNodeState.mDicNodeStateScoring.getRawLength();
}
- bool isLessThanOneErrorForScoring() const {
- return mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount()
- + mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount() <= 1;
- }
-
DoubleLetterLevel getDoubleLetterLevel() const {
return mDicNodeState.mDicNodeStateScoring.getDoubleLetterLevel();
}
@@ -484,8 +403,8 @@ class DicNode {
mDicNodeState.mDicNodeStateScoring.advanceDigraphIndex();
}
- bool isExactMatch() const {
- return mDicNodeState.mDicNodeStateScoring.isExactMatch();
+ ErrorTypeUtils::ErrorType getContainedErrorTypes() const {
+ return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
}
bool isBlacklistedOrNotAWord() const {
@@ -498,7 +417,7 @@ class DicNode {
// Returns code point count including spaces
inline uint16_t getTotalNodeCodePointCount() const {
- return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
+ return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
}
AK_FORCE_INLINE void dump(const char *tag) const {
@@ -510,24 +429,10 @@ class DicNode {
#endif
}
- void setReleaseListener(DicNodeReleaseListener *releaseListener) {
- mReleaseListener = releaseListener;
- }
-
- AK_FORCE_INLINE bool compare(const DicNode *right) {
- if (!isUsed() && !right->isUsed()) {
- // Compare pointer values here for stable comparison
- return this > right;
- }
- if (!isUsed()) {
- return true;
- }
- if (!right->isUsed()) {
- return false;
- }
+ AK_FORCE_INLINE bool compare(const DicNode *right) const {
// Promote exact matches to prevent them from being pruned.
- const bool leftExactMatch = isExactMatch();
- const bool rightExactMatch = right->isExactMatch();
+ const bool leftExactMatch = ErrorTypeUtils::isExactMatch(getContainedErrorTypes());
+ const bool rightExactMatch = ErrorTypeUtils::isExactMatch(right->getContainedErrorTypes());
if (leftExactMatch != rightExactMatch) {
return leftExactMatch;
}
@@ -545,8 +450,9 @@ class DicNode {
return depthDiff > 0;
}
for (int i = 0; i < depth; ++i) {
- const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
- const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
+ const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
+ const int rightCodePoint =
+ right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
if (codePoint != rightCodePoint) {
return rightCodePoint > codePoint;
}
@@ -560,8 +466,6 @@ class DicNode {
DicNodeState mDicNodeState;
// TODO: Remove
bool mIsCachedForNextSuggestion;
- bool mIsUsed;
- DicNodeReleaseListener *mReleaseListener;
AK_FORCE_INLINE int getTotalInputIndex() const {
int index = 0;
@@ -574,7 +478,8 @@ class DicNode {
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
- const bool doNormalization, const int inputSize, const ErrorType errorType) {
+ const bool doNormalization, const int inputSize,
+ const ErrorTypeUtils::ErrorType errorType) {
if (DEBUG_GEO_FULL) {
LOGI_SHOW_ADD_COST_PROP;
}
@@ -602,8 +507,8 @@ class DicNode {
}
AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
- if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) {
- mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex(
+ if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) {
+ mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex(
inputStateG->mInputIndex);
}
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_pool.h b/native/jni/src/suggest/core/dicnode/dic_node_pool.h
new file mode 100644
index 000000000..a660b744f
--- /dev/null
+++ b/native/jni/src/suggest/core/dicnode/dic_node_pool.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_POOL_H
+#define LATINIME_DIC_NODE_POOL_H
+
+#include <deque>
+#include <unordered_set>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/core/dicnode/dic_node.h"
+
+namespace latinime {
+
+class DicNodePool {
+ public:
+ explicit DicNodePool(const int capacity) : mDicNodes(), mPooledDicNodes() {
+ reset(capacity);
+ }
+
+ void reset(const int capacity) {
+ if (capacity == static_cast<int>(mDicNodes.size())
+ && capacity == static_cast<int>(mPooledDicNodes.size())) {
+ // No need to reset.
+ return;
+ }
+ mDicNodes.resize(capacity);
+ mDicNodes.shrink_to_fit();
+ mPooledDicNodes.clear();
+ for (auto &dicNode : mDicNodes) {
+ mPooledDicNodes.emplace_back(&dicNode);
+ }
+ }
+
+ // Get a DicNode instance from the pool. The instance has to be returned by returnInstance().
+ DicNode *getInstance() {
+ if (mPooledDicNodes.empty()) {
+ return nullptr;
+ }
+ DicNode *const dicNode = mPooledDicNodes.back();
+ mPooledDicNodes.pop_back();
+ return dicNode;
+ }
+
+ // Return an instance that has been removed from the pool by getInstance() to the pool. The
+ // instance must not be used after returning without getInstance().
+ void placeBackInstance(DicNode *dicNode) {
+ mPooledDicNodes.emplace_back(dicNode);
+ }
+
+ void dump() const {
+ AKLOGI("\n\n\n\n\n===========================");
+ std::unordered_set<const DicNode*> usedDicNodes;
+ for (const auto &dicNode : mDicNodes) {
+ usedDicNodes.insert(&dicNode);
+ }
+ for (const auto &dicNodePtr : mPooledDicNodes) {
+ usedDicNodes.erase(dicNodePtr);
+ }
+ for (const auto &usedDicNodePtr : usedDicNodes) {
+ usedDicNodePtr->dump("DIC_NODE_POOL: ");
+ }
+ AKLOGI("===========================\n\n\n\n\n");
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodePool);
+
+ std::vector<DicNode> mDicNodes;
+ std::deque<DicNode*> mPooledDicNodes;
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_POOL_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h b/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h
index 7461f0cc6..7b753f2e4 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h
@@ -17,43 +17,36 @@
#ifndef LATINIME_DIC_NODE_PRIORITY_QUEUE_H
#define LATINIME_DIC_NODE_PRIORITY_QUEUE_H
+#include <algorithm>
#include <queue>
#include <vector>
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_release_listener.h"
+#include "suggest/core/dicnode/dic_node_pool.h"
namespace latinime {
-class DicNodePriorityQueue : public DicNodeReleaseListener {
+class DicNodePriorityQueue {
public:
AK_FORCE_INLINE explicit DicNodePriorityQueue(const int capacity)
- : mCapacity(capacity), mMaxSize(capacity), mDicNodesBuf(),
- mUnusedNodeIndices(), mNextUnusedNodeId(0), mDicNodesQueue() {
- mDicNodesBuf.resize(mCapacity + 1);
- mUnusedNodeIndices.resize(mCapacity + 1);
- clearAndResizeToCapacity();
+ : mMaxSize(capacity), mDicNodesQueue(), mDicNodePool(capacity) {
+ clear();
}
// Non virtual inline destructor -- never inherit this class
AK_FORCE_INLINE ~DicNodePriorityQueue() {}
- int getSize() const {
+ AK_FORCE_INLINE int getSize() const {
return static_cast<int>(mDicNodesQueue.size());
}
- int getMaxSize() const {
+ AK_FORCE_INLINE int getMaxSize() const {
return mMaxSize;
}
AK_FORCE_INLINE void setMaxSize(const int maxSize) {
- ASSERT(maxSize <= mCapacity);
- mMaxSize = min(maxSize, mCapacity);
- }
-
- AK_FORCE_INLINE void clearAndResizeToCapacity() {
- clearAndResize(mCapacity);
+ mMaxSize = maxSize;
}
AK_FORCE_INLINE void clear() {
@@ -61,25 +54,32 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
}
AK_FORCE_INLINE void clearAndResize(const int maxSize) {
- ASSERT(maxSize <= mCapacity);
+ mMaxSize = maxSize;
while (!mDicNodesQueue.empty()) {
mDicNodesQueue.pop();
}
- setMaxSize(maxSize);
- for (int i = 0; i < mCapacity + 1; ++i) {
- mDicNodesBuf[i].remove();
- mDicNodesBuf[i].setReleaseListener(this);
- mUnusedNodeIndices[i] = i == mCapacity ? NOT_A_NODE_ID : static_cast<int>(i) + 1;
- }
- mNextUnusedNodeId = 0;
+ mDicNodePool.reset(mMaxSize + 1);
}
- // Copy
- AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode) {
- return copyPush(dicNode, mMaxSize);
+ AK_FORCE_INLINE void copyPush(const DicNode *const dicNode) {
+ DicNode *const pooledDicNode = newDicNode(dicNode);
+ if (!pooledDicNode) {
+ return;
+ }
+ if (getSize() < mMaxSize) {
+ mDicNodesQueue.push(pooledDicNode);
+ return;
+ }
+ if (betterThanWorstDicNode(pooledDicNode)) {
+ mDicNodePool.placeBackInstance(mDicNodesQueue.top());
+ mDicNodesQueue.pop();
+ mDicNodesQueue.push(pooledDicNode);
+ return;
+ }
+ mDicNodePool.placeBackInstance(pooledDicNode);
}
- AK_FORCE_INLINE void copyPop(DicNode *dest) {
+ AK_FORCE_INLINE void copyPop(DicNode *const dest) {
if (mDicNodesQueue.empty()) {
ASSERT(false);
return;
@@ -88,62 +88,34 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
if (dest) {
DicNodeUtils::initByCopy(node, dest);
}
- node->remove();
+ mDicNodePool.placeBackInstance(node);
mDicNodesQueue.pop();
}
- void onReleased(DicNode *dicNode) {
- const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
- if (mUnusedNodeIndices[index] != NOT_A_NODE_ID) {
- // it's already released
- return;
- }
- mUnusedNodeIndices[index] = mNextUnusedNodeId;
- mNextUnusedNodeId = index;
- ASSERT(index >= 0 && index < (mCapacity + 1));
- }
-
- AK_FORCE_INLINE void dump() const {
- AKLOGI("\n\n\n\n\n===========================");
- for (int i = 0; i < mCapacity + 1; ++i) {
- if (mDicNodesBuf[i].isUsed()) {
- mDicNodesBuf[i].dump("QUEUE: ");
- }
- }
- AKLOGI("===========================\n\n\n\n\n");
+ AK_FORCE_INLINE void dump() {
+ mDicNodePool.dump();
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodePriorityQueue);
- static const int NOT_A_NODE_ID = -1;
- AK_FORCE_INLINE static bool compareDicNode(DicNode *left, DicNode *right) {
+ AK_FORCE_INLINE static bool compareDicNode(const DicNode *const left,
+ const DicNode *const right) {
return left->compare(right);
}
struct DicNodeComparator {
- bool operator ()(DicNode *left, DicNode *right) {
+ bool operator ()(const DicNode *left, const DicNode *right) const {
return compareDicNode(left, right);
}
};
typedef std::priority_queue<DicNode *, std::vector<DicNode *>, DicNodeComparator> DicNodesQueue;
- const int mCapacity;
int mMaxSize;
- std::vector<DicNode> mDicNodesBuf; // of each element of mDicNodesBuf respectively
- std::vector<int> mUnusedNodeIndices;
- int mNextUnusedNodeId;
DicNodesQueue mDicNodesQueue;
+ DicNodePool mDicNodePool;
- inline bool isFull(const int maxSize) const {
- return getSize() >= maxSize;
- }
-
- AK_FORCE_INLINE void pop() {
- copyPop(0);
- }
-
- AK_FORCE_INLINE bool betterThanWorstDicNode(DicNode *dicNode) const {
+ AK_FORCE_INLINE bool betterThanWorstDicNode(const DicNode *const dicNode) const {
DicNode *worstNode = mDicNodesQueue.top();
if (!worstNode) {
return true;
@@ -151,61 +123,13 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
return compareDicNode(dicNode, worstNode);
}
- AK_FORCE_INLINE DicNode *searchEmptyDicNode() {
- if (mCapacity == 0) {
- return 0;
- }
- if (mNextUnusedNodeId == NOT_A_NODE_ID) {
- AKLOGI("No unused node found.");
- for (int i = 0; i < mCapacity + 1; ++i) {
- AKLOGI("Dump node availability, %d, %d, %d",
- i, mDicNodesBuf[i].isUsed(), mUnusedNodeIndices[i]);
- }
- ASSERT(false);
- return 0;
- }
- DicNode *dicNode = &mDicNodesBuf[mNextUnusedNodeId];
- markNodeAsUsed(dicNode);
- return dicNode;
- }
-
- AK_FORCE_INLINE void markNodeAsUsed(DicNode *dicNode) {
- const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
- mNextUnusedNodeId = mUnusedNodeIndices[index];
- mUnusedNodeIndices[index] = NOT_A_NODE_ID;
- ASSERT(index >= 0 && index < (mCapacity + 1));
- }
-
- AK_FORCE_INLINE DicNode *pushPoolNodeWithMaxSize(DicNode *dicNode, const int maxSize) {
- if (!dicNode) {
- return 0;
- }
- if (!isFull(maxSize)) {
- mDicNodesQueue.push(dicNode);
- return dicNode;
- }
- if (betterThanWorstDicNode(dicNode)) {
- pop();
- mDicNodesQueue.push(dicNode);
- return dicNode;
- }
- dicNode->remove();
- return 0;
- }
-
- // Copy
- AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode, const int maxSize) {
- return pushPoolNodeWithMaxSize(newDicNode(dicNode), maxSize);
- }
-
- AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) {
- DicNode *newNode = searchEmptyDicNode();
+ AK_FORCE_INLINE DicNode *newDicNode(const DicNode *const dicNode) {
+ DicNode *newNode = mDicNodePool.getInstance();
if (newNode) {
DicNodeUtils::initByCopy(dicNode, newNode);
}
return newNode;
}
-
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PRIORITY_QUEUE_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index ec65114c7..4445f4aaf 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -16,13 +16,10 @@
#include "suggest/core/dicnode/dic_node_utils.h"
-#include <cstring>
-
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/char_utils.h"
namespace latinime {
@@ -32,34 +29,35 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
+ const int *const prevWordsPtNodePos, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordsPtNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *const prevWordLastNode, DicNode *const newRootNode) {
- newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRootWithPreviousWord(
+ prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
}
-/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
- destNode->initByCopy(srcNode);
+/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
+ DicNode *const destDicNode) {
+ destDicNode->initByCopy(srcDicNode);
}
///////////////////////////////////
// Traverse node expansion utils //
///////////////////////////////////
-/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
+/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNodeVector *childDicNodes) {
+ DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode);
} else {
- dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
+ dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
}
}
@@ -71,11 +69,11 @@ namespace latinime {
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@@ -85,52 +83,22 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- const int unigramProbability = node->getProbability();
- const int wordPos = node->getPos();
- const int prevWordPos = node->getPrevWordPos();
- if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ const int unigramProbability = dicNode->getProbability();
+ const int ptNodePos = dicNode->getPtNodePos();
+ const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
+ if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
- wordPos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
+ prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
-////////////////
-// Char utils //
-////////////////
-
-// TODO: Move to char_utils?
-/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, int *dest) {
- int actualLength0 = 0;
- for (int i = 0; i < length0; ++i) {
- if (src0[i] == 0) {
- break;
- }
- actualLength0 = i + 1;
- }
- actualLength0 = min(actualLength0, MAX_WORD_LENGTH);
- memcpy(dest, src0, actualLength0 * sizeof(dest[0]));
- if (!src1 || length1 == 0) {
- return actualLength0;
- }
- int actualLength1 = 0;
- for (int i = 0; i < length1; ++i) {
- if (src1[i] == 0) {
- break;
- }
- actualLength1 = i + 1;
- }
- actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0);
- memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
- return actualLength0 + actualLength1;
-}
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 3fb351a61..00e80c604 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -17,8 +17,6 @@
#ifndef LATINIME_DIC_NODE_UTILS_H
#define LATINIME_DIC_NODE_UTILS_H
-#include <stdint.h>
-
#include "defines.h"
namespace latinime {
@@ -30,21 +28,19 @@ class MultiBigramMap;
class DicNodeUtils {
public:
- static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
- const int16_t length1, int *dest);
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *newRootNode);
+ const int *const prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *prevWordLastNode, DicNode *newRootNode);
- static void initByCopy(DicNode *srcNode, DicNode *destNode);
- static void getAllChildDicNodes(DicNode *dicNode,
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
+ static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
+ static void getAllChildDicNodes(const DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *const multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@@ -53,7 +49,7 @@ class DicNodeUtils {
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 42addae8d..54cde1988 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -32,10 +32,10 @@ class DicNodeVector {
#else
static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 60;
#endif
- AK_FORCE_INLINE DicNodeVector() : mDicNodes(0), mLock(false), mEmptyNode() {}
+ AK_FORCE_INLINE DicNodeVector() : mDicNodes(), mLock(false) {}
// Specify the capacity of the vector
- AK_FORCE_INLINE DicNodeVector(const int size) : mDicNodes(0), mLock(false), mEmptyNode() {
+ AK_FORCE_INLINE DicNodeVector(const int size) : mDicNodes(), mLock(false) {
mDicNodes.reserve(size);
}
@@ -52,24 +52,20 @@ class DicNodeVector {
return static_cast<int>(mDicNodes.size());
}
- bool exceeds(const size_t limit) const {
- return mDicNodes.size() >= limit;
- }
-
- void pushPassingChild(DicNode *dicNode) {
+ void pushPassingChild(const DicNode *dicNode) {
ASSERT(!mLock);
- mDicNodes.push_back(mEmptyNode);
+ mDicNodes.emplace_back();
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
- mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
- hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mDicNodes.emplace_back();
+ mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}
@@ -80,14 +76,13 @@ class DicNodeVector {
DicNode *front() {
ASSERT(1 <= static_cast<int>(mDicNodes.size()));
- return &mDicNodes[0];
+ return &mDicNodes.front();
}
private:
DISALLOW_COPY_AND_ASSIGN(DicNodeVector);
std::vector<DicNode> mDicNodes;
bool mLock;
- DicNode mEmptyNode;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_VECTOR_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
index b6be47e90..ef4a6b5d8 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
@@ -28,37 +28,4 @@ const int DicNodesCache::LARGE_PRIORITY_QUEUE_CAPACITY = 310;
// Capacity for reducing memory footprint.
const int DicNodesCache::SMALL_PRIORITY_QUEUE_CAPACITY = 100;
-/**
- * Truncates all of the dicNodes so that they start at the given commit point.
- * Only called for multi-word typing input.
- */
-DicNode *DicNodesCache::setCommitPoint(int commitPoint) {
- std::list<DicNode> dicNodesList;
- while (mCachedDicNodesForContinuousSuggestion->getSize() > 0) {
- DicNode dicNode;
- mCachedDicNodesForContinuousSuggestion->copyPop(&dicNode);
- dicNodesList.push_front(dicNode);
- }
-
- // Get the starting words of the top scoring dicNode (last dicNode popped from priority queue)
- // up to the commit point. These words have already been committed to the text view.
- DicNode *topDicNode = &dicNodesList.front();
- DicNode topDicNodeCopy;
- DicNodeUtils::initByCopy(topDicNode, &topDicNodeCopy);
-
- // Keep only those dicNodes that match the same starting words.
- std::list<DicNode>::iterator iter;
- for (iter = dicNodesList.begin(); iter != dicNodesList.end(); iter++) {
- DicNode *dicNode = &*iter;
- if (dicNode->truncateNode(&topDicNodeCopy, commitPoint)) {
- mCachedDicNodesForContinuousSuggestion->copyPush(dicNode);
- } else {
- // Top dicNode should be reprocessed.
- ASSERT(dicNode != topDicNode);
- DicNode::managedDelete(dicNode);
- }
- }
- mInputIndex -= commitPoint;
- return topDicNode;
-}
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
index 8493b6a8b..fb76c731f 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_DIC_NODES_CACHE_H
#define LATINIME_DIC_NODES_CACHE_H
-#include <stdint.h>
+#include <algorithm>
#include "defines.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
@@ -48,15 +48,14 @@ class DicNodesCache {
AK_FORCE_INLINE void reset(const int nextActiveSize, const int terminalSize) {
mInputIndex = 0;
mLastCachedInputIndex = 0;
- // We want to use the max capacity for the current active dic node queue.
- mActiveDicNodes->clearAndResizeToCapacity();
- // nextActiveSize is used to limit the next iteration's active dic node size.
- const int nextActiveSizeFittingToTheCapacity = min(nextActiveSize, getCacheCapacity());
+ // The size of current active DicNode queue doesn't have to be changed.
+ mActiveDicNodes->clear();
+ // nextActiveSize is used to limit the next iteration's active DicNode size.
+ const int nextActiveSizeFittingToTheCapacity = std::min(nextActiveSize, getCacheCapacity());
mNextActiveDicNodes->clearAndResize(nextActiveSizeFittingToTheCapacity);
mTerminalDicNodes->clearAndResize(terminalSize);
- // We want to use the max capacity for the cached dic nodes that will be used for the
- // continuous suggestion.
- mCachedDicNodesForContinuousSuggestion->clearAndResizeToCapacity();
+ // The size of cached DicNode queue doesn't have to be changed.
+ mCachedDicNodesForContinuousSuggestion->clear();
}
AK_FORCE_INLINE void continueSearch() {
@@ -75,8 +74,6 @@ class DicNodesCache {
moveNodesAndReturnReusableEmptyQueue(mNextActiveDicNodes, &mActiveDicNodes);
}
- DicNode *setCommitPoint(int commitPoint);
-
int activeSize() const { return mActiveDicNodes->getSize(); }
int terminalSize() const { return mTerminalDicNodes->getSize(); }
bool isLookAheadCorrectionInputIndex(const int inputIndex) const {
@@ -96,19 +93,12 @@ class DicNodesCache {
mActiveDicNodes->copyPush(dicNode);
}
- AK_FORCE_INLINE bool copyPushContinue(DicNode *dicNode) {
- return mCachedDicNodesForContinuousSuggestion->copyPush(dicNode);
+ AK_FORCE_INLINE void copyPushContinue(DicNode *dicNode) {
+ mCachedDicNodesForContinuousSuggestion->copyPush(dicNode);
}
AK_FORCE_INLINE void copyPushNextActive(DicNode *dicNode) {
- DicNode *pushedDicNode = mNextActiveDicNodes->copyPush(dicNode);
- if (!pushedDicNode) {
- if (dicNode->isCached()) {
- dicNode->remove();
- }
- // We simply drop any dic node that was not cached, ignoring the slim chance
- // that one of its children represents what the user really wanted.
- }
+ mNextActiveDicNodes->copyPush(dicNode);
}
void popTerminal(DicNode *dest) {
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
index 9e0f62ceb..8202176f7 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
@@ -17,80 +17,98 @@
#ifndef LATINIME_DIC_NODE_PROPERTIES_H
#define LATINIME_DIC_NODE_PROPERTIES_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
namespace latinime {
/**
- * Node for traversing the lexicon trie.
+ * PtNode information related to the DicNode from the lexicon trie.
*/
-// TODO: Introduce a dictionary node class which has attribute members required to understand the
-// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
- mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
+ : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
+ mIsTerminal(false), mHasChildrenPtNodes(false),
+ mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
- virtual ~DicNodeProperties() {}
+ ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
- const uint16_t depth, const uint16_t leavingDepth) {
- mPos = pos;
- mChildrenPos = childrenPos;
- mNodeCodePoint = nodeCodePoint;
+ const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
+ mPtNodePos = pos;
+ mChildrenPtNodeArrayPos = childrenPos;
+ mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
- mHasChildren = hasChildren;
+ mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
+ memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
}
- // Init for copy
- void init(const DicNodeProperties *const nodeProp) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = nodeProp->mNodeCodePoint;
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth;
- mLeavingDepth = nodeProp->mLeavingDepth;
+ // Init for root with prevWordsPtNodePos which is used for n-gram
+ void init(const int rootPtNodeArrayPos, const int *const prevWordsNodePos) {
+ mPtNodePos = NOT_A_DICT_POS;
+ mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
+ mDicNodeCodePoint = NOT_A_CODE_POINT;
+ mProbability = NOT_A_PROBABILITY;
+ mIsTerminal = false;
+ mHasChildrenPtNodes = true;
+ mIsBlacklistedOrNotAWord = false;
+ mDepth = 0;
+ mLeavingDepth = 0;
+ memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
+ }
+
+ void initByCopy(const DicNodeProperties *const dicNodeProp) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth;
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
+ memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
+ sizeof(mPrevWordsTerminalPtNodePos));
}
// Init as passing child
- void init(const DicNodeProperties *const nodeProp, const int codePoint) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
+ memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
+ sizeof(mPrevWordsTerminalPtNodePos));
}
- int getPos() const {
- return mPos;
+ int getPtNodePos() const {
+ return mPtNodePos;
}
- int getChildrenPos() const {
- return mChildrenPos;
+ int getChildrenPtNodeArrayPos() const {
+ return mChildrenPtNodeArrayPos;
}
int getProbability() const {
return mProbability;
}
- int getNodeCodePoint() const {
- return mNodeCodePoint;
+ int getDicNodeCodePoint() const {
+ return mDicNodeCodePoint;
}
uint16_t getDepth() const {
@@ -107,26 +125,31 @@ class DicNodeProperties {
}
bool hasChildren() const {
- return mHasChildren || mDepth != mLeavingDepth;
+ return mHasChildrenPtNodes || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
return mIsBlacklistedOrNotAWord;
}
+ const int *getPrevWordsTerminalPtNodePos() const {
+ return mPrevWordsTerminalPtNodePos;
+ }
+
private:
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
- int mPos;
- int mChildrenPos;
+ int mPtNodePos;
+ int mChildrenPtNodeArrayPos;
int mProbability;
- int mNodeCodePoint;
+ int mDicNodeCodePoint;
bool mIsTerminal;
- bool mHasChildren;
+ bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
+ int mPrevWordsTerminalPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h
index b0fddb724..badb1f5f2 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h
@@ -20,7 +20,6 @@
#include "defines.h"
#include "suggest/core/dicnode/internal/dic_node_state_input.h"
#include "suggest/core/dicnode/internal/dic_node_state_output.h"
-#include "suggest/core/dicnode/internal/dic_node_state_prevword.h"
#include "suggest/core/dicnode/internal/dic_node_state_scoring.h"
namespace latinime {
@@ -29,44 +28,53 @@ class DicNodeState {
public:
DicNodeStateInput mDicNodeStateInput;
DicNodeStateOutput mDicNodeStateOutput;
- DicNodeStatePrevWord mDicNodeStatePrevWord;
DicNodeStateScoring mDicNodeStateScoring;
AK_FORCE_INLINE DicNodeState()
- : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
- mDicNodeStateScoring() {
+ : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {}
+
+ ~DicNodeState() {}
+
+ DicNodeState &operator=(const DicNodeState& src) {
+ initByCopy(&src);
+ return *this;
}
- virtual ~DicNodeState() {}
+ DicNodeState(const DicNodeState& src)
+ : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {
+ initByCopy(&src);
+ }
- // Init with prevWordPos
- void init(const int prevWordPos) {
+ // Init for root
+ void init() {
mDicNodeStateInput.init();
mDicNodeStateOutput.init();
- mDicNodeStatePrevWord.init(prevWordPos);
mDicNodeStateScoring.init();
}
+ // Init with previous word.
+ void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState,
+ const int prevWordCodePointCount) {
+ mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput);
+ mDicNodeStateInput.init(
+ &prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */);
+ mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring);
+ }
+
// Init by copy
- AK_FORCE_INLINE void init(const DicNodeState *const src) {
- mDicNodeStateInput.init(&src->mDicNodeStateInput);
- mDicNodeStateOutput.init(&src->mDicNodeStateOutput);
- mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord);
- mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
+ AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) {
+ mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput);
+ mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput);
+ mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring);
}
// Init by copy and adding merged node code points.
void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
- init(src);
+ initByCopy(src);
mDicNodeStateOutput.addMergedNodeCodePoints(
mergedNodeCodePointCount, mergedNodeCodePoints);
}
-
- private:
- // Caution!!!
- // Use a default copy constructor and an assign operator because shallow copies are ok
- // for this class
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_H
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h
index bbd9435b5..50a37ba3e 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h
@@ -25,12 +25,7 @@ namespace latinime {
class DicNodeStateInput {
public:
DicNodeStateInput() {}
- virtual ~DicNodeStateInput() {}
-
- // TODO: Merge into DicNodeStatePrevWord::truncate
- void truncate(const int commitPoint) {
- mInputIndex[0] -= commitPoint;
- }
+ ~DicNodeStateInput() {}
void init() {
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
@@ -58,7 +53,7 @@ class DicNodeStateInput {
mTerminalDiffCost[pointerId] = terminalDiffCost;
}
- void init(const DicNodeStateInput *const src) {
+ void initByCopy(const DicNodeStateInput *const src) {
init(src, false);
}
@@ -89,9 +84,8 @@ class DicNodeStateInput {
}
private:
- // Caution!!!
- // Use a default copy constructor and an assign operator because shallow copies are ok
- // for this class
+ DISALLOW_COPY_AND_ASSIGN(DicNodeStateInput);
+
int mInputIndex[MAX_POINTER_COUNT_G];
int mPrevCodePoint[MAX_POINTER_COUNT_G];
float mTerminalDiffCost[MAX_POINTER_COUNT_G];
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h
index 74eb5dfe7..69a886f55 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h
@@ -17,63 +17,135 @@
#ifndef LATINIME_DIC_NODE_STATE_OUTPUT_H
#define LATINIME_DIC_NODE_STATE_OUTPUT_H
-#include <cstring> // for memcpy()
-#include <stdint.h>
+#include <algorithm>
+#include <cstdint>
+#include <cstring> // for memmove()
#include "defines.h"
namespace latinime {
+// Class to have information to be output. This can contain previous words when the suggestion
+// is a multi-word suggestion.
class DicNodeStateOutput {
public:
- DicNodeStateOutput() : mOutputtedCodePointCount(0) {
- init();
- }
+ DicNodeStateOutput()
+ : mOutputtedCodePointCount(0), mCurrentWordStart(0), mPrevWordCount(0),
+ mPrevWordsLength(0), mPrevWordStart(0), mSecondWordFirstInputIndex(NOT_AN_INDEX) {}
- virtual ~DicNodeStateOutput() {}
+ ~DicNodeStateOutput() {}
+ // Init for root
void init() {
mOutputtedCodePointCount = 0;
- mCodePointsBuf[0] = 0;
+ mCurrentWordStart = 0;
+ mOutputCodePoints[0] = 0;
+ mPrevWordCount = 0;
+ mPrevWordsLength = 0;
+ mPrevWordStart = 0;
+ mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
+ // Init for next word.
void init(const DicNodeStateOutput *const stateOutput) {
- memcpy(mCodePointsBuf, stateOutput->mCodePointsBuf,
- stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
+ mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount + 1;
+ memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
+ stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
+ mOutputCodePoints[stateOutput->mOutputtedCodePointCount] = KEYCODE_SPACE;
+ mCurrentWordStart = stateOutput->mOutputtedCodePointCount + 1;
+ mPrevWordCount = std::min(static_cast<int16_t>(stateOutput->mPrevWordCount + 1),
+ static_cast<int16_t>(MAX_RESULTS));
+ mPrevWordsLength = stateOutput->mOutputtedCodePointCount + 1;
+ mPrevWordStart = stateOutput->mCurrentWordStart;
+ mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
+ }
+
+ void initByCopy(const DicNodeStateOutput *const stateOutput) {
+ memmove(mOutputCodePoints, stateOutput->mOutputCodePoints,
+ stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
- mCodePointsBuf[mOutputtedCodePointCount] = 0;
+ mOutputCodePoints[mOutputtedCodePointCount] = 0;
}
+ mCurrentWordStart = stateOutput->mCurrentWordStart;
+ mPrevWordCount = stateOutput->mPrevWordCount;
+ mPrevWordsLength = stateOutput->mPrevWordsLength;
+ mPrevWordStart = stateOutput->mPrevWordStart;
+ mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex;
}
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
if (mergedNodeCodePoints) {
- const int additionalCodePointCount = min(static_cast<int>(mergedNodeCodePointCount),
+ const int additionalCodePointCount = std::min(
+ static_cast<int>(mergedNodeCodePointCount),
MAX_WORD_LENGTH - mOutputtedCodePointCount);
- memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
- additionalCodePointCount * sizeof(mCodePointsBuf[0]));
+ memmove(&mOutputCodePoints[mOutputtedCodePointCount], mergedNodeCodePoints,
+ additionalCodePointCount * sizeof(mOutputCodePoints[0]));
mOutputtedCodePointCount = static_cast<uint16_t>(
- mOutputtedCodePointCount + mergedNodeCodePointCount);
+ mOutputtedCodePointCount + additionalCodePointCount);
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
- mCodePointsBuf[mOutputtedCodePointCount] = 0;
+ mOutputCodePoints[mOutputtedCodePointCount] = 0;
}
}
}
- // TODO: Remove
- int getCodePointAt(const int index) const {
- return mCodePointsBuf[index];
+ int getCurrentWordCodePointAt(const int index) const {
+ return mOutputCodePoints[mCurrentWordStart + index];
+ }
+
+ const int *getCodePointBuf() const {
+ return mOutputCodePoints;
+ }
+
+ void setSecondWordFirstInputIndex(const int inputIndex) {
+ mSecondWordFirstInputIndex = inputIndex;
+ }
+
+ int getSecondWordFirstInputIndex() const {
+ return mSecondWordFirstInputIndex;
}
- // TODO: Move to private
- int mCodePointsBuf[MAX_WORD_LENGTH];
+ // TODO: remove
+ int16_t getPrevWordsLength() const {
+ return mPrevWordsLength;
+ }
+
+ int16_t getPrevWordCount() const {
+ return mPrevWordCount;
+ }
+
+ int16_t getPrevWordStart() const {
+ return mPrevWordStart;
+ }
+
+ int getOutputCodePointAt(const int id) const {
+ return mOutputCodePoints[id];
+ }
private:
- // Caution!!!
- // Use a default copy constructor and an assign operator because shallow copies are ok
- // for this class
+ DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput);
+
+ // When the DicNode represents "this is a pen":
+ // mOutputtedCodePointCount is 13, which is total code point count of "this is a pen" including
+ // spaces.
+ // mCurrentWordStart indicates the head of "pen", thus it is 10.
+ // This contains 3 previous words, "this", "is" and "a"; thus, mPrevWordCount is 3.
+ // mPrevWordsLength is length of "this is a ", which is 10.
+ // mPrevWordStart is the start index of "a"; thus, it is 8.
+ // mSecondWordFirstInputIndex is the first input index of "is".
+
uint16_t mOutputtedCodePointCount;
+ int mOutputCodePoints[MAX_WORD_LENGTH];
+ int16_t mCurrentWordStart;
+ // Previous word count in mOutputCodePoints.
+ int16_t mPrevWordCount;
+ // Total length of previous words in mOutputCodePoints. This is being used by the algorithm
+ // that may want to look at the previous word information.
+ int16_t mPrevWordsLength;
+ // Start index of the previous word in mOutputCodePoints. This is being used for auto commit.
+ int16_t mPrevWordStart;
+ int mSecondWordFirstInputIndex;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
deleted file mode 100644
index b8986203d..000000000
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H
-#define LATINIME_DIC_NODE_STATE_PREVWORD_H
-
-#include <cstring> // for memset()
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/layout/proximity_info_state.h"
-
-namespace latinime {
-
-class DicNodeStatePrevWord {
- public:
- AK_FORCE_INLINE DicNodeStatePrevWord()
- : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
- memset(mPrevWord, 0, sizeof(mPrevWord));
- }
-
- virtual ~DicNodeStatePrevWord() {}
-
- void init() {
- mPrevWordLength = 0;
- mPrevWordCount = 0;
- mPrevWordStart = 0;
- mPrevWordProbability = -1;
- mPrevWordNodePos = NOT_A_DICT_POS;
- mSecondWordFirstInputIndex = NOT_AN_INDEX;
- }
-
- void init(const int prevWordNodePos) {
- mPrevWordLength = 0;
- mPrevWordCount = 0;
- mPrevWordStart = 0;
- mPrevWordProbability = -1;
- mPrevWordNodePos = prevWordNodePos;
- mSecondWordFirstInputIndex = NOT_AN_INDEX;
- }
-
- // Init by copy
- AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) {
- mPrevWordLength = prevWord->mPrevWordLength;
- mPrevWordCount = prevWord->mPrevWordCount;
- mPrevWordStart = prevWord->mPrevWordStart;
- mPrevWordProbability = prevWord->mPrevWordProbability;
- mPrevWordNodePos = prevWord->mPrevWordNodePos;
- mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
- memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
- }
-
- void init(const int16_t prevWordCount, const int16_t prevWordProbability,
- const int prevWordNodePos, const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1,
- const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
- mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
- mPrevWordProbability = prevWordProbability;
- mPrevWordNodePos = prevWordNodePos;
- int twoWordsLen =
- DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
- if (twoWordsLen >= MAX_WORD_LENGTH) {
- twoWordsLen = MAX_WORD_LENGTH - 1;
- }
- mPrevWord[twoWordsLen] = KEYCODE_SPACE;
- mPrevWordStart = length0;
- mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
- mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex;
- }
-
- void truncate(const int offset) {
- // TODO: memmove
- if (mPrevWordLength < offset) {
- memset(mPrevWord, 0, sizeof(mPrevWord));
- mPrevWordLength = 0;
- return;
- }
- const int newPrevWordLength = mPrevWordLength - offset;
- memmove(mPrevWord, &mPrevWord[offset], newPrevWordLength * sizeof(mPrevWord[0]));
- mPrevWordLength = newPrevWordLength;
- }
-
- void setSecondWordFirstInputIndex(const int inputIndex) {
- mSecondWordFirstInputIndex = inputIndex;
- }
-
- int getSecondWordFirstInputIndex() const {
- return mSecondWordFirstInputIndex;
- }
-
- // TODO: remove
- int16_t getPrevWordLength() const {
- return mPrevWordLength;
- }
-
- int16_t getPrevWordCount() const {
- return mPrevWordCount;
- }
-
- int16_t getPrevWordStart() const {
- return mPrevWordStart;
- }
-
- int getPrevWordNodePos() const {
- return mPrevWordNodePos;
- }
-
- int getPrevWordCodePointAt(const int id) const {
- return mPrevWord[id];
- }
-
- bool startsWith(const DicNodeStatePrevWord *const prefix, const int prefixLen) const {
- if (prefixLen > mPrevWordLength) {
- return false;
- }
- for (int i = 0; i < prefixLen; ++i) {
- if (mPrevWord[i] != prefix->mPrevWord[i]) {
- return false;
- }
- }
- return true;
- }
-
- // TODO: Move to private
- int mPrevWord[MAX_WORD_LENGTH];
-
- private:
- // Caution!!!
- // Use a default copy constructor and an assign operator because shallow copies are ok
- // for this class
- int16_t mPrevWordCount;
- int16_t mPrevWordLength;
- int16_t mPrevWordStart;
- int16_t mPrevWordProbability;
- int mPrevWordNodePos;
- int mSecondWordFirstInputIndex;
-};
-} // namespace latinime
-#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 3c85d0e9d..c19d48eb9 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -17,10 +17,12 @@
#ifndef LATINIME_DIC_NODE_STATE_SCORING_H
#define LATINIME_DIC_NODE_STATE_SCORING_H
-#include <stdint.h>
+#include <algorithm>
+#include <cstdint>
#include "defines.h"
#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -29,17 +31,18 @@ class DicNodeStateScoring {
AK_FORCE_INLINE DicNodeStateScoring()
: mDoubleLetterLevel(NOT_A_DOUBLE_LETTER),
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
- mEditCorrectionCount(0), mProximityCorrectionCount(0),
+ mEditCorrectionCount(0), mProximityCorrectionCount(0), mCompletionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mExactMatch(true),
+ mRawLength(0.0f), mContainedErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
- virtual ~DicNodeStateScoring() {}
+ ~DicNodeStateScoring() {}
void init() {
mEditCorrectionCount = 0;
mProximityCorrectionCount = 0;
+ mCompletionCount = 0;
mNormalizedCompoundDistance = 0.0f;
mSpatialDistance = 0.0f;
mLanguageDistance = 0.0f;
@@ -47,46 +50,37 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
- mExactMatch = true;
+ mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
- AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
+ AK_FORCE_INLINE void initByCopy(const DicNodeStateScoring *const scoring) {
mEditCorrectionCount = scoring->mEditCorrectionCount;
mProximityCorrectionCount = scoring->mProximityCorrectionCount;
+ mCompletionCount = scoring->mCompletionCount;
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
mSpatialDistance = scoring->mSpatialDistance;
mLanguageDistance = scoring->mLanguageDistance;
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
- mExactMatch = scoring->mExactMatch;
+ mContainedErrorTypes = scoring->mContainedErrorTypes;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
- const int inputSize, const int totalInputIndex, const ErrorType errorType) {
+ const int inputSize, const int totalInputIndex,
+ const ErrorTypeUtils::ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
- switch (errorType) {
- case ET_EDIT_CORRECTION:
- ++mEditCorrectionCount;
- mExactMatch = false;
- break;
- case ET_PROXIMITY_CORRECTION:
- ++mProximityCorrectionCount;
- mExactMatch = false;
- break;
- case ET_COMPLETION:
- mExactMatch = false;
- break;
- case ET_NEW_WORD:
- mExactMatch = false;
- break;
- case ET_INTENTIONAL_OMISSION:
- mExactMatch = false;
- break;
- case ET_NOT_AN_ERROR:
- break;
+ mContainedErrorTypes = mContainedErrorTypes | errorType;
+ if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
+ ++mEditCorrectionCount;
+ }
+ if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
+ ++mProximityCorrectionCount;
+ }
+ if (ErrorTypeUtils::isCompletion(errorType)) {
+ ++mCompletionCount;
}
}
@@ -140,6 +134,10 @@ class DicNodeStateScoring {
return mProximityCorrectionCount;
}
+ int16_t getCompletionCount() const {
+ return mCompletionCount;
+ }
+
float getRawLength() const {
return mRawLength;
}
@@ -181,25 +179,26 @@ class DicNodeStateScoring {
}
}
- bool isExactMatch() const {
- return mExactMatch;
+ ErrorTypeUtils::ErrorType getContainedErrorTypes() const {
+ return mContainedErrorTypes;
}
private:
- // Caution!!!
- // Use a default copy constructor and an assign operator because shallow copies are ok
- // for this class
+ DISALLOW_COPY_AND_ASSIGN(DicNodeStateScoring);
+
DoubleLetterLevel mDoubleLetterLevel;
DigraphUtils::DigraphCodePointIndex mDigraphIndex;
int16_t mEditCorrectionCount;
int16_t mProximityCorrectionCount;
+ int16_t mCompletionCount;
float mNormalizedCompoundDistance;
float mSpatialDistance;
float mLanguageDistance;
float mRawLength;
- bool mExactMatch;
+ // All accumulated error types so far
+ ErrorTypeUtils::ErrorType mContainedErrorTypes;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
@@ -210,7 +209,7 @@ class DicNodeStateScoring {
mNormalizedCompoundDistance = mSpatialDistance + mLanguageDistance;
} else {
mNormalizedCompoundDistance = (mSpatialDistance + mLanguageDistance)
- / static_cast<float>(max(1, totalInputIndex));
+ / static_cast<float>(std::max(1, totalInputIndex));
}
}
};
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
deleted file mode 100644
index 71f4ef6ea..000000000
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (C) 2010, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstring>
-
-#define LOG_TAG "LatinIME: bigram_dictionary.cpp"
-
-#include "bigram_dictionary.h"
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
-#include "suggest/core/dictionary/dictionary.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/char_utils.h"
-
-namespace latinime {
-
-BigramDictionary::BigramDictionary(
- const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
- : mDictionaryStructurePolicy(dictionaryStructurePolicy) {
- if (DEBUG_DICT) {
- AKLOGI("BigramDictionary - constructor");
- }
-}
-
-BigramDictionary::~BigramDictionary() {
-}
-
-void BigramDictionary::addWordBigram(int *word, int length, int probability, int *bigramProbability,
- int *bigramCodePoints, int *outputTypes) const {
- word[length] = 0;
- if (DEBUG_DICT_FULL) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
- AKLOGI("Bigram: Found word = %s, freq = %d :", s, probability);
-#endif
- }
-
- // Find the right insertion point
- int insertAt = 0;
- while (insertAt < MAX_RESULTS) {
- if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability
- && length < CharUtils::getCodePointCount(MAX_WORD_LENGTH,
- bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
- break;
- }
- insertAt++;
- }
- if (DEBUG_DICT_FULL) {
- AKLOGI("Bigram: InsertAt -> %d MAX_RESULTS: %d", insertAt, MAX_RESULTS);
- }
- if (insertAt >= MAX_RESULTS) {
- return;
- }
- memmove(bigramProbability + (insertAt + 1),
- bigramProbability + insertAt,
- (MAX_RESULTS - insertAt - 1) * sizeof(bigramProbability[0]));
- bigramProbability[insertAt] = probability;
- outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
- memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
- bigramCodePoints + insertAt * MAX_WORD_LENGTH,
- (MAX_RESULTS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
- int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
- while (length--) {
- *dest++ = *word++;
- }
- *dest = 0; // NULL terminate
- if (DEBUG_DICT_FULL) {
- AKLOGI("Bigram: Added word at %d", insertAt);
- }
-}
-
-/* Parameters :
- * prevWord: the word before, the one for which we need to look up bigrams.
- * prevWordLength: its length.
- * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
- * outBigramProbability: an array to output frequencies.
- * outputTypes: an array to output types.
- * This method returns the number of bigrams this word has, for backward compatibility.
- */
-int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
- int *const outBigramCodePoints, int *const outBigramProbability,
- int *const outputTypes) const {
- // TODO: remove unused arguments, and refrain from storing stuff in members of this class
- // TODO: have "in" arguments before "out" ones, and make out args explicit in the name
-
- int pos = getBigramListPositionForWord(prevWord, prevWordLength,
- false /* forceLowerCaseSearch */);
- // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) {
- // If no bigrams for this exact word, search again in lower case.
- pos = getBigramListPositionForWord(prevWord, prevWordLength,
- true /* forceLowerCaseSearch */);
- }
- // If still no bigrams, we really don't have them!
- if (NOT_A_DICT_POS == pos) return 0;
-
- int bigramCount = 0;
- int unigramProbability = 0;
- int bigramBuffer[MAX_WORD_LENGTH];
- BinaryDictionaryBigramsIterator bigramsIt(
- mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- const int codePointCount = mDictionaryStructurePolicy->
- getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
- MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
- if (codePointCount <= 0) {
- continue;
- }
- // Due to space constraints, the probability for bigrams is approximate - the lower the
- // unigram probability, the worse the precision. The theoritical maximum error in
- // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
- // in very bad cases. This means that sometimes, we'll see some bigrams interverted
- // here, but it can't get too bad.
- const int probability = mDictionaryStructurePolicy->getProbability(
- unigramProbability, bigramsIt.getProbability());
- addWordBigram(bigramBuffer, codePointCount, probability, outBigramProbability,
- outBigramCodePoints, outputTypes);
- ++bigramCount;
- }
- return min(bigramCount, MAX_RESULTS);
-}
-
-// Returns a pointer to the start of the bigram list.
-// If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS.
-int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
- const bool forceLowerCaseSearch) const {
- if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
- forceLowerCaseSearch);
- if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
- return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
-}
-
-int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
- int length1) const {
- int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
- // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
- int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
-
- BinaryDictionaryBigramsIterator bigramsIt(
- mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPos) {
- return mDictionaryStructurePolicy->getProbability(
- mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
- bigramsIt.getProbability());
- }
- }
- return NOT_A_PROBABILITY;
-}
-
-// TODO: Move functions related to bigram to here
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
deleted file mode 100644
index 8af7ee75d..000000000
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_DICTIONARY_H
-#define LATINIME_BIGRAM_DICTIONARY_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class DictionaryStructureWithBufferPolicy;
-
-class BigramDictionary {
- public:
- BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
-
- int getPredictions(const int *word, int length, int *outBigramCodePoints,
- int *outBigramProbability, int *outputTypes) const;
- int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const;
- ~BigramDictionary();
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
-
- void addWordBigram(int *word, int length, int probability, int *bigramProbability,
- int *bigramCodePoints, int *outputTypes) const;
- int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
- const bool forceLowerCaseSearch) const;
-
- const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
-};
-} // namespace latinime
-#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
index d16ac47fe..bc9d57671 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator {
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
mHasNext(pos != NOT_A_DICT_POS) {}
+ BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
+ : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
+ mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
+ mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}
+
AK_FORCE_INLINE bool hasNext() const {
return mHasNext;
}
diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h
index 5205456a8..1e60f49ed 100644
--- a/native/jni/src/suggest/core/dictionary/bloom_filter.h
+++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_BLOOM_FILTER_H
#define LATINIME_BLOOM_FILTER_H
-#include <stdint.h>
+#include <bitset>
#include "defines.h"
@@ -33,38 +33,37 @@ namespace latinime {
// Total 148603.14 (sum of others 148579.90)
class BloomFilter {
public:
- BloomFilter() {
- ASSERT(BIGRAM_FILTER_BYTE_SIZE * 8 >= BIGRAM_FILTER_MODULO);
- }
+ BloomFilter() : mFilter() {}
- // TODO: uint32_t position
- AK_FORCE_INLINE void setInFilter(const int32_t position) {
- const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
- mFilter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7));
+ AK_FORCE_INLINE void setInFilter(const int position) {
+ mFilter.set(getIndex(position));
}
- // TODO: uint32_t position
- AK_FORCE_INLINE bool isInFilter(const int32_t position) const {
- const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
- return (mFilter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7))) != 0;
+ AK_FORCE_INLINE bool isInFilter(const int position) const {
+ return mFilter.test(getIndex(position));
}
private:
- // Size, in bytes, of the bloom filter index for bigrams
- // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
+ DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
+
+ AK_FORCE_INLINE size_t getIndex(const int position) const {
+ return static_cast<size_t>(position) % BIGRAM_FILTER_MODULO;
+ }
+
+ // Size, in bits, of the bloom filter index for bigrams
+ // The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of
// bits we can test.
- // At the moment 100 is the maximum number of bigrams for a word with the current
+ // At the moment 100 is the maximum number of bigrams for a word with the current main
// dictionaries, so n = 100. 1024 buckets give us m = 1024.
// With 1 hash function, our false positive rate is about 9.3%, which should be enough for
// our uses since we are only using this to increase average performance. For the record,
// k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%,
// and m = 4096 gives 2.4%.
- // This is assigned here because it is used for array size.
- static const int BIGRAM_FILTER_BYTE_SIZE = 128;
- static const int BIGRAM_FILTER_MODULO;
-
- uint8_t mFilter[BIGRAM_FILTER_BYTE_SIZE];
+ // This is assigned here because it is used for bitset size.
+ // 1021 is the largest prime under 1024.
+ static const size_t BIGRAM_FILTER_MODULO = 1021;
+ std::bitset<BIGRAM_FILTER_MODULO> mFilter;
};
} // namespace latinime
#endif // LATINIME_BLOOM_FILTER_H
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 59ead1894..fb25f757c 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -18,77 +18,78 @@
#include "suggest/core/dictionary/dictionary.h"
-#include <stdint.h>
-
#include "defines.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/dictionary/dictionary_utils.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
#include "utils/log_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
-Dictionary::Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
- : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
- mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
+Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionaryStructureWithBufferPolicy)
+ : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
logDictionaryInfo(env);
}
-Dictionary::~Dictionary() {
- delete mBigramDictionary;
- delete mGestureSuggest;
- delete mTypingSuggest;
- delete mDictionaryStructureWithBufferPolicy;
+void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
+ int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ const SuggestOptions *const suggestOptions, const float languageWeight,
+ SuggestionResults *const outSuggestionResults) const {
+ TimeKeeper::setCurrentTime();
+ traverseSession->init(this, prevWordsInfo, suggestOptions);
+ const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
+ suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ ycoordinates, times, pointerIds, inputCodePoints, inputSize,
+ languageWeight, outSuggestionResults);
+ if (DEBUG_DICT) {
+ outSuggestionResults->dumpSuggestions();
+ }
}
-int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
- int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
- const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
- int *spaceIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
- int result = 0;
- if (suggestOptions->isGesture()) {
- DicTraverseSession::initSessionInstance(
- traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
- ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
- frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence);
- if (DEBUG_DICT) {
- DUMP_RESULT(outWords, frequencies);
+void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
+ SuggestionResults *const outSuggestionResults) const {
+ TimeKeeper::setCurrentTime();
+ int unigramProbability = 0;
+ int bigramCodePoints[MAX_WORD_LENGTH];
+ BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
+ mDictionaryStructureWithBufferPolicy.get());
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+ continue;
+ }
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ && bigramsIt.getProbability() == NOT_A_PROBABILITY) {
+ continue;
}
- return result;
- } else {
- DicTraverseSession::initSessionInstance(
- traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
- ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
- outWords, frequencies, spaceIndices, outputTypes,
- outputAutoCommitFirstWordConfidence);
- if (DEBUG_DICT) {
- DUMP_RESULT(outWords, frequencies);
+ const int codePointCount = mDictionaryStructureWithBufferPolicy->
+ getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
+ MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
+ if (codePointCount <= 0) {
+ continue;
}
- return result;
+ const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
+ unigramProbability, bigramsIt.getProbability());
+ outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
}
}
-int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
- int *outputTypes) const {
- if (length <= 0) return 0;
- return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
-}
-
int Dictionary::getProbability(const int *word, int length) const {
- int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
+ TimeKeeper::setCurrentTime();
+ int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;
@@ -96,41 +97,95 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}
-int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
- int length1) const {
- return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
+int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
+ TimeKeeper::setCurrentTime();
+ return DictionaryUtils::getMaxProbabilityOfExactMatches(
+ mDictionaryStructureWithBufferPolicy.get(), word, length);
+}
+
+int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
+ int length) const {
+ TimeKeeper::setCurrentTime();
+ int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
+ length, false /* forceLowerCaseSearch */);
+ if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
+ BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
+ mDictionaryStructureWithBufferPolicy.get());
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ return mDictionaryStructureWithBufferPolicy->getProbability(
+ mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode(
+ nextWordPos), bigramsIt.getProbability());
+ }
+ }
+ return NOT_A_PROBABILITY;
+}
+
+bool Dictionary::addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ if (unigramProperty->representsBeginningOfSentence()
+ && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
+ ->supportsBeginningOfSentence()) {
+ AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
+ return false;
+ }
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
}
-void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
- mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
+bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
}
-void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability) {
- mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
- probability);
+bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
}
-void Dictionary::removeBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1) {
- mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
+bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
}
-void Dictionary::flush(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flush(filePath);
+bool Dictionary::flush(const char *const filePath) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->flush(filePath);
}
-void Dictionary::flushWithGC(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+bool Dictionary::flushWithGC(const char *const filePath) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
+ TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
}
-void Dictionary::getProperty(const char *const query, char *const outResult,
+void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) {
- return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
+ maxResultLength);
+}
+
+const WordProperty Dictionary::getWordProperty(const int *const codePoints,
+ const int codePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->getWordProperty(
+ codePoints, codePointCount);
+}
+
+int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
+ token, outCodePoints, outCodePointCount);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 0195d5bf0..3b41088fe 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -17,18 +17,22 @@
#ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H
-#include <stdint.h>
+#include <memory>
#include "defines.h"
#include "jni.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/suggest_interface.h"
namespace latinime {
-class BigramDictionary;
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
+class PrevWordsInfo;
class ProximityInfo;
-class SuggestInterface;
+class SuggestionResults;
class SuggestOptions;
class Dictionary {
@@ -52,55 +56,70 @@ class Dictionary {
static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
+ static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
- Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
+ Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ dictionaryStructureWithBufferPolicy);
- int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+ void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
- const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
- int *spaceIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const;
+ int inputSize, const PrevWordsInfo *const prevWordsInfo,
+ const SuggestOptions *const suggestOptions, const float languageWeight,
+ SuggestionResults *const outSuggestionResults) const;
- int getBigrams(const int *word, int length, int *outWords, int *frequencies,
- int *outputTypes) const;
+ void getPredictions(const PrevWordsInfo *const prevWordsInfo,
+ SuggestionResults *const outSuggestionResults) const;
int getProbability(const int *word, int length) const;
- int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
+ int getMaxProbabilityOfExactMatches(const int *word, int length) const;
- void addUnigramWord(const int *const word, const int length, const int probability);
+ int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
+ const int *word, int length) const;
- void addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability);
+ bool addUnigramEntry(const int *const codePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty);
- void removeBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1);
+ bool removeUnigramEntry(const int *const codePoints, const int codePointCount);
- void flush(const char *const filePath);
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty);
- void flushWithGC(const char *const filePath);
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
+ const int length);
+
+ bool flush(const char *const filePath);
+
+ bool flushWithGC(const char *const filePath);
bool needsToRunGC(const bool mindsBlockByGC);
- void getProperty(const char *const query, char *const outResult,
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
+ const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
+
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
- return mDictionaryStructureWithBufferPolicy;
+ return mDictionaryStructureWithBufferPolicy.get();
}
- virtual ~Dictionary();
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+ typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
+
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
- DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
- const BigramDictionary *const mBigramDictionary;
- const SuggestInterface *const mGestureSuggest;
- const SuggestInterface *const mTypingSuggest;
+ const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ mDictionaryStructureWithBufferPolicy;
+ const SuggestInterfacePtr mGestureSuggest;
+ const SuggestInterfacePtr mTypingSuggest;
void logDictionaryInfo(JNIEnv *const env) const;
};
diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
new file mode 100644
index 000000000..b94966cbe
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/dictionary_utils.h"
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_priority_queue.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+
+namespace latinime {
+
+/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const codePoints, const int codePointCount) {
+ std::vector<DicNode> current;
+ std::vector<DicNode> next;
+
+ // No prev words information.
+ PrevWordsInfo emptyPrevWordsInfo;
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ emptyPrevWordsInfo.getPrevWordsTerminalPtNodePos(dictionaryStructurePolicy,
+ prevWordsPtNodePos, false /* tryLowerCaseSearch */);
+ current.emplace_back();
+ DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordsPtNodePos, &current.front());
+ for (int i = 0; i < codePointCount; ++i) {
+ // The base-lower input is used to ignore case errors and accent errors.
+ const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]);
+ for (const DicNode &dicNode : current) {
+ if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) {
+ next.emplace_back(dicNode);
+ next.back().advanceDigraphIndex();
+ continue;
+ }
+ processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next);
+ }
+ current.clear();
+ current.swap(next);
+ }
+
+ int maxProbability = NOT_A_PROBABILITY;
+ for (const DicNode &dicNode : current) {
+ if (!dicNode.isTerminalDicNode()) {
+ continue;
+ }
+ // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
+ maxProbability = std::max(maxProbability, dicNode.getProbability());
+ }
+ return maxProbability;
+}
+
+/* static */ void DictionaryUtils::processChildDicNodes(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int inputCodePoint, const DicNode *const parentDicNode,
+ std::vector<DicNode> *const outDicNodes) {
+ DicNodeVector childDicNodes;
+ DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
+ for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
+ DicNode *const childDicNode = childDicNodes[childIndex];
+ const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
+ if (inputCodePoint == codePoint) {
+ outDicNodes->emplace_back(*childDicNode);
+ }
+ if (childDicNode->canBeIntentionalOmission()) {
+ processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
+ outDicNodes);
+ }
+ if (DigraphUtils::hasDigraphForCodePoint(
+ dictionaryStructurePolicy->getHeaderStructurePolicy(),
+ childDicNode->getNodeCodePoint())) {
+ childDicNode->advanceDigraphIndex();
+ if (childDicNode->getNodeCodePoint() == codePoint) {
+ childDicNode->advanceDigraphIndex();
+ outDicNodes->emplace_back(*childDicNode);
+ }
+ }
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/core/dictionary/dictionary_utils.h
index 8cebc3b16..358ebf674 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,23 +14,31 @@
* limitations under the License.
*/
-#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
-#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+#ifndef LATINIME_DICTIONARY_UTILS_H
+#define LATINIME_DICTIONARY_UTILS_H
-#include <stdint.h>
+#include <vector>
#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
-class DictionaryStructureWithBufferPolicyFactory {
+class DictionaryStructureWithBufferPolicy;
+class DicNode;
+
+class DictionaryUtils {
public:
- static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
- const char *const path, const int bufOffset, const int size, const bool isUpdatable);
+ static int getMaxProbabilityOfExactMatches(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int *const codePoints, const int codePointCount);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryUtils);
+
+ static void processChildDicNodes(
+ const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
+ const int inputCodePoint, const DicNode *const parentDicNode,
+ std::vector<DicNode> *const outDicNodes);
};
} // namespace latinime
-#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+#endif // LATINIME_DICTIONARY_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index 3271c1bfb..bb2ce5012 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -28,11 +28,8 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
{ { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
{ 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
{ 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
-const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
- { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
- { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
- { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
+ { DIGRAPH_TYPE_GERMAN_UMLAUT };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
const DictionaryHeaderStructurePolicy *const headerPolicy,
@@ -50,9 +47,6 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
if (headerPolicy->requiresGermanUmlautProcessing()) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
- if (headerPolicy->requiresFrenchLigatureProcessing()) {
- return DIGRAPH_TYPE_FRENCH_LIGATURES;
- }
return DIGRAPH_TYPE_NONE;
}
@@ -86,15 +80,11 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
*digraphs = GERMAN_UMLAUT_DIGRAPHS;
return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
}
- if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
- *digraphs = FRENCH_LIGATURES_DIGRAPHS;
- return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
- }
return 0;
}
/**
- * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
+ * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
* compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
*/
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
@@ -106,17 +96,17 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
return digraph;
}
}
- return 0;
+ return nullptr;
}
/**
- * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
+ * Returns the digraph for the input composite glyph codepoint, or nullptr if none exists.
* digraphType: the type of digraphs supported.
* compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
*/
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
- const DigraphUtils::digraph_t *digraphs = 0;
+ const DigraphUtils::digraph_t *digraphs = nullptr;
const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
const int digraphsSize =
DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs);
@@ -125,7 +115,7 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
return &digraphs[i];
}
}
- return 0;
+ return nullptr;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h
index 6ae16e390..bec2cd6e2 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.h
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h
@@ -34,7 +34,6 @@ class DigraphUtils {
typedef enum {
DIGRAPH_TYPE_NONE,
DIGRAPH_TYPE_GERMAN_UMLAUT,
- DIGRAPH_TYPE_FRENCH_LIGATURES
} DigraphType;
typedef struct { int first; int second; int compositeGlyph; } digraph_t;
@@ -55,7 +54,6 @@ class DigraphUtils {
const DigraphType digraphType, const int compositeGlyphCodePoint);
static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
- static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
static const DigraphType USED_DIGRAPH_TYPES[];
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
new file mode 100644
index 000000000..b6bf7a98c
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
+ NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+
+const ErrorTypeUtils::ErrorType
+ ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
+ ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
new file mode 100644
index 000000000..e3e76b238
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ERROR_TYPE_UTILS_H
+#define LATINIME_ERROR_TYPE_UTILS_H
+
+#include <cstdint>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ErrorTypeUtils {
+ public:
+ // ErrorType is mainly decided by CorrectionType but it is also depending on if
+ // the correction has really been performed or not.
+ typedef uint32_t ErrorType;
+
+ static const ErrorType NOT_AN_ERROR;
+ static const ErrorType MATCH_WITH_CASE_ERROR;
+ static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_DIGRAPH;
+ // Treat error as an intentional omission when the CorrectionType is omission and the node can
+ // be intentional omission.
+ static const ErrorType INTENTIONAL_OMISSION;
+ // Substitution, omission and transposition
+ static const ErrorType EDIT_CORRECTION;
+ // Proximity error
+ static const ErrorType PROXIMITY_CORRECTION;
+ // Completion
+ static const ErrorType COMPLETION;
+ // New word
+ // TODO: Remove.
+ // A new word error should be an edit correction error or a proximity correction error.
+ static const ErrorType NEW_WORD;
+
+ static bool isExactMatch(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
+ }
+
+ static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes
+ & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
+ }
+
+ static bool isEditCorrectionError(const ErrorType errorType) {
+ return (errorType & EDIT_CORRECTION) != 0;
+ }
+
+ static bool isProximityCorrectionError(const ErrorType errorType) {
+ return (errorType & PROXIMITY_CORRECTION) != 0;
+ }
+
+ static bool isCompletion(const ErrorType errorType) {
+ return (errorType & COMPLETION) != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
+
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
+};
+} // namespace latinime
+#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
index b1d2f4b4d..105224126 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
@@ -17,6 +17,7 @@
#include "suggest/core/dictionary/multi_bigram_map.h"
#include <cstddef>
+#include <unordered_map>
namespace latinime {
@@ -30,4 +31,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
// Most common previous word contexts currently have 100 bigrams
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
+// Look up the bigram probability for the given word pair from the cached bigram maps.
+// Also caches the bigrams if there is space remaining and they have not been cached already.
+int MultiBigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int wordPosition, const int nextWordPosition, const int unigramProbability) {
+ std::unordered_map<int, BigramMap>::const_iterator mapPosition =
+ mBigramMaps.find(wordPosition);
+ if (mapPosition != mBigramMaps.end()) {
+ return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
+ unigramProbability);
+ }
+ if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
+ addBigramsForWordPosition(structurePolicy, wordPosition);
+ return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
+ nextWordPosition, unigramProbability);
+ }
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
+ nextWordPosition, unigramProbability);
+}
+
+void MultiBigramMap::BigramMap::init(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+ continue;
+ }
+ mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
+ mBloomFilter.setInFilter(bigramsIt.getBigramPos());
+ }
+}
+
+int MultiBigramMap::BigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int nextWordPosition, const int unigramProbability) const {
+ int bigramProbability = NOT_A_PROBABILITY;
+ if (mBloomFilter.isInFilter(nextWordPosition)) {
+ const std::unordered_map<int, int>::const_iterator bigramProbabilityIt =
+ mBigramMap.find(nextWordPosition);
+ if (bigramProbabilityIt != mBigramMap.end()) {
+ bigramProbability = bigramProbabilityIt->second;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
+void MultiBigramMap::addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
+ mBigramMaps[position].init(structurePolicy, position);
+}
+
+int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
+ const int nextWordPosition, const int unigramProbability) {
+ int bigramProbability = NOT_A_PROBABILITY;
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPosition) {
+ bigramProbability = bigramsIt.getProbability();
+ break;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index 4633c07b0..195b5e22f 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -18,12 +18,12 @@
#define LATINIME_MULTI_BIGRAM_MAP_H
#include <cstddef>
+#include <unordered_map>
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
@@ -38,21 +38,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability) {
- hash_map_compat<int, BigramMap>::const_iterator mapPosition =
- mBigramMaps.find(wordPosition);
- if (mapPosition != mBigramMaps.end()) {
- return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
- unigramProbability);
- }
- if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(structurePolicy, wordPosition);
- return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
- nextWordPosition, unigramProbability);
- }
- return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
- nextWordPosition, unigramProbability);
- }
+ const int wordPosition, const int nextWordPosition, const int unigramProbability);
void clear() {
mBigramMaps.clear();
@@ -67,66 +53,29 @@ class MultiBigramMap {
~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nodePos) {
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
- mBloomFilter.setInFilter(bigramsIt.getBigramPos());
- }
- }
+ const int nodePos);
- AK_FORCE_INLINE int getBigramProbability(
+ int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nextWordPosition, const int unigramProbability) const {
- int bigramProbability = NOT_A_PROBABILITY;
- if (mBloomFilter.isInFilter(nextWordPosition)) {
- const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
- mBigramMap.find(nextWordPosition);
- if (bigramProbabilityIt != mBigramMap.end()) {
- bigramProbability = bigramProbabilityIt->second;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
- }
+ const int nextWordPosition, const int unigramProbability) const;
private:
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
// copy constructor is needed for use in hash_map.
static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
- hash_map_compat<int, int> mBigramMap;
+ std::unordered_map<int, int> mBigramMap;
BloomFilter mBloomFilter;
};
- AK_FORCE_INLINE void addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
- mBigramMaps[position].init(structurePolicy, position);
- }
+ void addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
- AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
+ int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability) {
- int bigramProbability = NOT_A_PROBABILITY;
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPosition) {
- bigramProbability = bigramsIt.getProbability();
- break;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
- }
+ const int nextWordPosition, const int unigramProbability);
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
- hash_map_compat<int, BigramMap> mBigramMaps;
+ std::unordered_map<int, BigramMap> mBigramMaps;
};
} // namespace latinime
#endif // LATINIME_MULTI_BIGRAM_MAP_H
diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/bigram_property.h
new file mode 100644
index 000000000..343af143c
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/property/bigram_property.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_PROPERTY_H
+#define LATINIME_BIGRAM_PROPERTY_H
+
+#include <vector>
+
+#include "defines.h"
+
+namespace latinime {
+
+// TODO: Change to NgramProperty.
+class BigramProperty {
+ public:
+ BigramProperty(const std::vector<int> *const targetCodePoints,
+ const int probability, const int timestamp, const int level, const int count)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ private:
+ // Default copy constructor and assign operator are used for using in std::vector.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty);
+
+ // TODO: Make members const.
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+};
+} // namespace latinime
+#endif // LATINIME_WORD_PROPERTY_H
diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
new file mode 100644
index 000000000..902eb000f
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_PROPERTY_H
+#define LATINIME_UNIGRAM_PROPERTY_H
+
+#include <vector>
+
+#include "defines.h"
+
+namespace latinime {
+
+class UnigramProperty {
+ public:
+ class ShortcutProperty {
+ public:
+ ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
+ : mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
+
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ private:
+ // Default copy constructor and assign operator are used for using in std::vector.
+ DISALLOW_DEFAULT_CONSTRUCTOR(ShortcutProperty);
+
+ // TODO: Make members const.
+ std::vector<int> mTargetCodePoints;
+ int mProbability;
+ };
+
+ UnigramProperty()
+ : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
+ mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
+ mShortcuts() {}
+
+ UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp, const int level,
+ const int count, const std::vector<ShortcutProperty> *const shortcuts)
+ : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+ mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
+
+ bool representsBeginningOfSentence() const {
+ return mRepresentsBeginningOfSentence;
+ }
+
+ bool isNotAWord() const {
+ return mIsNotAWord;
+ }
+
+ bool isBlacklisted() const {
+ return mIsBlacklisted;
+ }
+
+ bool hasShortcuts() const {
+ return !mShortcuts.empty();
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ const std::vector<ShortcutProperty> &getShortcuts() const {
+ return mShortcuts;
+ }
+
+ private:
+ // Default copy constructor is used for using as a return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
+
+ // TODO: Make members const.
+ bool mRepresentsBeginningOfSentence;
+ bool mIsNotAWord;
+ bool mIsBlacklisted;
+ int mProbability;
+ // Historical information
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ std::vector<ShortcutProperty> mShortcuts;
+};
+} // namespace latinime
+#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
new file mode 100644
index 000000000..5bdd5606b
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/property/word_property.h"
+
+#include "utils/jni_data_utils.h"
+
+namespace latinime {
+
+void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+ jobject outBigramProbabilities, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) const {
+ JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
+ MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
+ false /* needsNullTermination */);
+ jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
+ !mBigrams.empty(), mUnigramProperty.hasShortcuts(),
+ mUnigramProperty.representsBeginningOfSentence()};
+ env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
+ int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
+ mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
+ env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+ probabilityInfo);
+
+ jclass integerClass = env->FindClass("java/lang/Integer");
+ jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
+ jclass arrayListClass = env->FindClass("java/util/ArrayList");
+ jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+
+ // Output bigrams.
+ for (const auto &bigramProperty : mBigrams) {
+ const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints();
+ jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
+ JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */,
+ word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(),
+ false /* needsNullTermination */);
+ env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
+ env->DeleteLocalRef(bigramWord1CodePointArray);
+
+ int bigramProbabilityInfo[] = {bigramProperty.getProbability(),
+ bigramProperty.getTimestamp(), bigramProperty.getLevel(),
+ bigramProperty.getCount()};
+ jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
+ env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
+ NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
+ env->CallBooleanMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
+ env->DeleteLocalRef(bigramProbabilityInfoArray);
+ }
+
+ // Output shortcuts.
+ for (const auto &shortcut : mUnigramProperty.getShortcuts()) {
+ const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
+ jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
+ env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
+ targetCodePoints->size(), targetCodePoints->data());
+ JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */,
+ targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(),
+ false /* needsNullTermination */);
+ env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
+ env->DeleteLocalRef(shortcutTargetCodePointArray);
+ jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
+ shortcut.getProbability());
+ env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
+ env->DeleteLocalRef(integerProbability);
+ }
+ env->DeleteLocalRef(integerClass);
+ env->DeleteLocalRef(arrayListClass);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h
new file mode 100644
index 000000000..aa3e0b68a
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORD_PROPERTY_H
+#define LATINIME_WORD_PROPERTY_H
+
+#include <vector>
+
+#include "defines.h"
+#include "jni.h"
+#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/unigram_property.h"
+
+namespace latinime {
+
+// This class is used for returning information belonging to a word to java side.
+class WordProperty {
+ public:
+ // Default constructor is used to create an instance that indicates an invalid word.
+ WordProperty()
+ : mCodePoints(), mUnigramProperty(), mBigrams() {}
+
+ WordProperty(const std::vector<int> *const codePoints,
+ const UnigramProperty *const unigramProperty,
+ const std::vector<BigramProperty> *const bigrams)
+ : mCodePoints(*codePoints), mUnigramProperty(*unigramProperty), mBigrams(*bigrams) {}
+
+ void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) const;
+
+ const UnigramProperty *getUnigramProperty() const {
+ return &mUnigramProperty;
+ }
+
+ const std::vector<BigramProperty> *getBigramProperties() const {
+ return &mBigrams;
+ }
+
+ private:
+ // Default copy constructor is used for using as a return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
+
+ const std::vector<int> mCodePoints;
+ const UnigramProperty mUnigramProperty;
+ const std::vector<BigramProperty> mBigrams;
+};
+} // namespace latinime
+#endif // LATINIME_WORD_PROPERTY_H
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
deleted file mode 100644
index 9ccef020f..000000000
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_SHORTCUT_UTILS
-#define LATINIME_SHORTCUT_UTILS
-
-#include "defines.h"
-#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
-
-namespace latinime {
-
-class ShortcutUtils {
- public:
- static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
- int outputWordIndex, const int finalScore, int *const outputCodePoints,
- int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
- int shortcutTarget[MAX_WORD_LENGTH];
- while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
- bool isWhilelist;
- int shortcutTargetStringLength;
- shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
- &shortcutTargetStringLength, &isWhilelist);
- int shortcutScore;
- int kind;
- if (isWhilelist && sameAsTyped) {
- shortcutScore = S_INT_MAX;
- kind = Dictionary::KIND_WHITELIST;
- } else {
- // shortcut entry's score == its base entry's score - 1
- shortcutScore = finalScore;
- // Protection against int underflow
- shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
- kind = Dictionary::KIND_SHORTCUT;
- }
- outputTypes[outputWordIndex] = kind;
- frequencies[outputWordIndex] = shortcutScore;
- frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1;
- const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH;
- DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength,
- &outputCodePoints[startIndex2]);
- ++outputWordIndex;
- }
- return outputWordIndex;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutUtils);
-};
-} // namespace latinime
-#endif // LATINIME_SHORTCUT_UTILS
diff --git a/native/jni/src/suggest/core/layout/normal_distribution.h b/native/jni/src/suggest/core/layout/normal_distribution.h
new file mode 100644
index 000000000..5f21a59c0
--- /dev/null
+++ b/native/jni/src/suggest/core/layout/normal_distribution.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NORMAL_DISTRIBUTION_H
+#define LATINIME_NORMAL_DISTRIBUTION_H
+
+#include <cmath>
+
+#include "defines.h"
+
+namespace latinime {
+
+// Normal distribution N(u, sigma^2).
+class NormalDistribution {
+ public:
+ NormalDistribution(const float u, const float sigma)
+ : mU(u),
+ mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F
+ * GeometryUtils::SQUARE_FLOAT(sigma))),
+ mPreComputedExponentPart(-1.0f / (2.0f * GeometryUtils::SQUARE_FLOAT(sigma))) {}
+
+ float getProbabilityDensity(const float x) const {
+ const float shiftedX = x - mU;
+ return mPreComputedNonExpPart
+ * expf(mPreComputedExponentPart * GeometryUtils::SQUARE_FLOAT(shiftedX));
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution);
+
+ const float mU; // mean value
+ const float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2)
+ const float mPreComputedExponentPart; // = -1 / (2 * sigma^2)
+};
+} // namespace latinime
+#endif // LATINIME_NORMAL_DISTRIBUTION_H
diff --git a/native/jni/src/suggest/core/layout/normal_distribution_2d.h b/native/jni/src/suggest/core/layout/normal_distribution_2d.h
new file mode 100644
index 000000000..3bc0a0153
--- /dev/null
+++ b/native/jni/src/suggest/core/layout/normal_distribution_2d.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NORMAL_DISTRIBUTION_2D_H
+#define LATINIME_NORMAL_DISTRIBUTION_2D_H
+
+#include <cmath>
+
+#include "defines.h"
+#include "suggest/core/layout/geometry_utils.h"
+#include "suggest/core/layout/normal_distribution.h"
+
+namespace latinime {
+
+// Normal distribution on a 2D plane. The covariance is always zero, but the distribution can be
+// rotated.
+class NormalDistribution2D {
+ public:
+ NormalDistribution2D(const float uX, const float sigmaX, const float uY, const float sigmaY,
+ const float theta)
+ : mXDistribution(0.0f, sigmaX), mYDistribution(0.0f, sigmaY), mUX(uX), mUY(uY),
+ mSinTheta(sinf(theta)), mCosTheta(cosf(theta)) {}
+
+ float getProbabilityDensity(const float x, const float y) const {
+ // Shift
+ const float shiftedX = x - mUX;
+ const float shiftedY = y - mUY;
+ // Rotate
+ const float rotatedShiftedX = mCosTheta * shiftedX + mSinTheta * shiftedY;
+ const float rotatedShiftedY = -mSinTheta * shiftedX + mCosTheta * shiftedY;
+ return mXDistribution.getProbabilityDensity(rotatedShiftedX)
+ * mYDistribution.getProbabilityDensity(rotatedShiftedY);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution2D);
+
+ const NormalDistribution mXDistribution;
+ const NormalDistribution mYDistribution;
+ const float mUX;
+ const float mUY;
+ const float mSinTheta;
+ const float mCosTheta;
+};
+} // namespace latinime
+#endif // LATINIME_NORMAL_DISTRIBUTION_2D_H
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index e64476d82..4c75a188e 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -18,6 +18,7 @@
#include "suggest/core/layout/proximity_info.h"
+#include <algorithm>
#include <cstring>
#include <cmath>
@@ -57,13 +58,12 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
const jfloatArray sweetSpotCenterYs, const jfloatArray sweetSpotRadii)
: GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth),
MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidth * mostCommonKeyWidth),
- MOST_COMMON_KEY_HEIGHT(mostCommonKeyHeight),
NORMALIZED_SQUARED_MOST_COMMON_KEY_HYPOTENUSE(1.0f +
GeometryUtils::SQUARE_FLOAT(static_cast<float>(mostCommonKeyHeight) /
static_cast<float>(mostCommonKeyWidth))),
CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth),
CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight),
- KEY_COUNT(min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)),
+ KEY_COUNT(std::min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)),
KEYBOARD_WIDTH(keyboardWidth), KEYBOARD_HEIGHT(keyboardHeight),
KEYBOARD_HYPOTENUSE(hypotf(KEYBOARD_WIDTH, KEYBOARD_HEIGHT)),
HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
@@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
&& sweetSpotCenterYs && sweetSpotRadii),
mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
/* proximityCharsLength */]),
- mCodeToKeyMap() {
+ mLowerCodePointToKeyMap() {
/* Let's check the input array length here to make sure */
const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
@@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
- return mKeyIndexToCodePointG[keyIndex];
+ return mKeyIndexToLowerCodePointG[keyIndex];
+}
+
+int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToOriginalCodePoint[keyIndex];
}
void ProximityInfo::initializeG() {
@@ -164,8 +171,9 @@ void ProximityInfo::initializeG() {
const float gapY = sweetSpotCenterY - mCenterYsG[i];
mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
}
- mCodeToKeyMap[lowerCode] = i;
- mKeyIndexToCodePointG[i] = lowerCode;
+ mLowerCodePointToKeyMap[lowerCode] = i;
+ mKeyIndexToOriginalCodePoint[i] = code;
+ mKeyIndexToLowerCodePointG[i] = lowerCode;
}
for (int i = 0; i < KEY_COUNT; i++) {
mKeyKeyDistancesG[i][i] = 0;
@@ -218,7 +226,7 @@ int ProximityInfo::getKeyCenterXOfKeyIdG(
// When the referencePointY is NOT_A_COORDINATE, this method calculates the return value without
// using the line segment.
int ProximityInfo::getKeyCenterYOfKeyIdG(
- const int keyId, const int referencePointY, const bool isGeometric) const {
+ const int keyId, const int referencePointY, const bool isGeometric) const {
// TODO: Remove "isGeometric" and have separate "proximity_info"s for gesture and typing.
if (keyId < 0) {
return 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h
index f25949001..d4e453736 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.h
+++ b/native/jni/src/suggest/core/layout/proximity_info.h
@@ -17,10 +17,11 @@
#ifndef LATINIME_PROXIMITY_INFO_H
#define LATINIME_PROXIMITY_INFO_H
+#include <unordered_map>
+
#include "defines.h"
#include "jni.h"
#include "suggest/core/layout/proximity_info_utils.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
@@ -35,10 +36,10 @@ class ProximityInfo {
const jfloatArray sweetSpotCenterYs, const jfloatArray sweetSpotRadii);
~ProximityInfo();
bool hasSpaceProximity(const int x, const int y) const;
- int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const;
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const bool isGeometric) const;
int getCodePointOf(const int keyIndex) const;
+ int getOriginalCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@@ -47,8 +48,6 @@ class ProximityInfo {
float getSweetSpotRadiiAt(int keyIndex) const { return mSweetSpotRadii[keyIndex]; }
float getSweetSpotCenterXAt(int keyIndex) const { return mSweetSpotCenterXs[keyIndex]; }
float getSweetSpotCenterYAt(int keyIndex) const { return mSweetSpotCenterYs[keyIndex]; }
- void calculateNearbyKeyCodes(
- const int x, const int y, const int primaryKey, int *inputCodes) const;
bool hasTouchPositionCorrectionData() const { return HAS_TOUCH_POSITION_CORRECTION_DATA; }
int getMostCommonKeyWidth() const { return MOST_COMMON_KEY_WIDTH; }
int getMostCommonKeyWidthSquare() const { return MOST_COMMON_KEY_WIDTH_SQUARE; }
@@ -76,11 +75,11 @@ class ProximityInfo {
ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
- KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
+ KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
}
AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
- return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
}
AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
@@ -96,7 +95,6 @@ class ProximityInfo {
const int GRID_HEIGHT;
const int MOST_COMMON_KEY_WIDTH;
const int MOST_COMMON_KEY_WIDTH_SQUARE;
- const int MOST_COMMON_KEY_HEIGHT;
const float NORMALIZED_SQUARED_MOST_COMMON_KEY_HYPOTENUSE;
const int CELL_WIDTH;
const int CELL_HEIGHT;
@@ -105,6 +103,8 @@ class ProximityInfo {
const int KEYBOARD_HEIGHT;
const float KEYBOARD_HYPOTENUSE;
const bool HAS_TOUCH_POSITION_CORRECTION_DATA;
+ // Assuming locale strings such as en_US, sr-Latn etc.
+ static const int MAX_LOCALE_STRING_LENGTH = 10;
char mLocaleStr[MAX_LOCALE_STRING_LENGTH];
int *mProximityCharsArray;
int mKeyXCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
@@ -117,13 +117,12 @@ class ProximityInfo {
// Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
- hash_map_compat<int, int> mCodeToKeyMap;
-
- int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ std::unordered_map<int, int> mLowerCodePointToKeyMap;
+ int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
- // TODO: move to correction.h
};
} // namespace latinime
#endif // LATINIME_PROXIMITY_INFO_H
diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.cpp b/native/jni/src/suggest/core/layout/proximity_info_params.cpp
index 49df10301..68bb0ae9d 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_params.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_params.cpp
@@ -24,9 +24,6 @@ const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE = 1.0f;
const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE_G = 0.5f;
/* Per method constants */
-// Used by ProximityInfoStateUtils::initGeometricDistanceInfos()
-const float ProximityInfoParams::NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f;
-
// Used by ProximityInfoStateUtils::updateNearKeysDistances()
const float ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_DISTANCE = 2.0f;
@@ -50,7 +47,7 @@ const int ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION = 2;
const int ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE = 4;
// Used by ProximityInfoStateUtils::updateAlignPointProbabilities()
-const float ProximityInfoParams::MIN_PROBABILITY = 0.000001f;
+const float ProximityInfoParams::MIN_PROBABILITY = 0.000005f;
const float ProximityInfoParams::MAX_SKIP_PROBABILITY = 0.95f;
const float ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY = 0.01f;
const float ProximityInfoParams::SKIP_LAST_POINT_PROBABILITY = 0.1f;
@@ -76,8 +73,12 @@ const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION = 0
const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION = 0.5f;
const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION = 0.15f;
const float ProximityInfoParams::MIN_STANDARD_DEVIATION = 0.37f;
-const float ProximityInfoParams::PREV_DISTANCE_WEIGHT = 0.5f;
-const float ProximityInfoParams::NEXT_DISTANCE_WEIGHT = 0.6f;
+const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST = 1.25f;
+const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST = 0.85f;
+const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_LAST = 1.4f;
+const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST = 0.95f;
+const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT = 1.1f;
+const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT = 0.95f;
// Used by ProximityInfoStateUtils::suppressCharProbabilities()
const float ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT = 1.5f;
@@ -98,7 +99,4 @@ const int ProximityInfoParams::LOOKUP_RADIUS_PERCENTILE = 50;
const int ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS = 150;
const int ProximityInfoParams::STRONG_DOUBLE_LETTER_TIME_MILLIS = 600;
-// Used by ProximityInfoStateUtils::calculateNormalizedSquaredDistance()
-const int ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = 1 << 10;
-
} // namespace latinime
diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.h b/native/jni/src/suggest/core/layout/proximity_info_params.h
index ae1f82c22..d9515c837 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_params.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_params.h
@@ -28,9 +28,6 @@ class ProximityInfoParams {
static const float VERTICAL_SWEET_SPOT_SCALE;
static const float VERTICAL_SWEET_SPOT_SCALE_G;
- // Used by ProximityInfoStateUtils::initGeometricDistanceInfos()
- static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD;
-
// Used by ProximityInfoStateUtils::updateNearKeysDistances()
static const float NEAR_KEY_THRESHOLD_FOR_DISTANCE;
@@ -78,8 +75,13 @@ class ProximityInfoParams {
static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION;
static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION;
static const float MIN_STANDARD_DEVIATION;
- static const float PREV_DISTANCE_WEIGHT;
- static const float NEXT_DISTANCE_WEIGHT;
+ // X means gesture's direction. Y means gesture's orthogonal direction.
+ static const float STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST;
+ static const float STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST;
+ static const float STANDARD_DEVIATION_X_WEIGHT_FOR_LAST;
+ static const float STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST;
+ static const float STANDARD_DEVIATION_X_WEIGHT;
+ static const float STANDARD_DEVIATION_Y_WEIGHT;
// Used by ProximityInfoStateUtils::suppressCharProbabilities()
static const float SUPPRESSION_LENGTH_WEIGHT;
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index fbabd92f2..91469e26d 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -18,8 +18,10 @@
#include "suggest/core/layout/proximity_info_state.h"
-#include <cstring> // for memset() and memcpy()
+#include <algorithm>
+#include <cstring> // for memset() and memmove()
#include <sstream> // for debug prints
+#include <unordered_map>
#include <vector>
#include "defines.h"
@@ -30,6 +32,12 @@
namespace latinime {
+int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
+ const int primaryCodePoint = getPrimaryCodePointAt(index);
+ const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
+ return mProximityInfo->getOriginalCodePointOf(keyIndex);
+}
+
// TODO: Remove the dependency of "isGeometric"
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
@@ -84,7 +92,6 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
mSampledInputIndice.clear();
mSampledLengthCache.clear();
mSampledNormalizedSquaredLengthCache.clear();
- mSampledNearKeySets.clear();
mSampledSearchKeySets.clear();
mSpeedRates.clear();
mBeelineSpeedPercentiles.clear();
@@ -119,18 +126,17 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
if (mSampledInputSize > 0) {
ProximityInfoStateUtils::initGeometricDistanceInfos(mProximityInfo, mSampledInputSize,
lastSavedInputSize, isGeometric, &mSampledInputXs, &mSampledInputYs,
- &mSampledNearKeySets, &mSampledNormalizedSquaredLengthCache);
+ &mSampledNormalizedSquaredLengthCache);
if (isGeometric) {
// updates probabilities of skipping or mapping each key for all points.
ProximityInfoStateUtils::updateAlignPointProbabilities(
mMaxPointToKeyLength, mProximityInfo->getMostCommonKeyWidth(),
mProximityInfo->getKeyCount(), lastSavedInputSize, mSampledInputSize,
&mSampledInputXs, &mSampledInputYs, &mSpeedRates, &mSampledLengthCache,
- &mSampledNormalizedSquaredLengthCache, &mSampledNearKeySets,
- &mCharProbabilities);
+ &mSampledNormalizedSquaredLengthCache, mProximityInfo, &mCharProbabilities);
ProximityInfoStateUtils::updateSampledSearchKeySets(mProximityInfo,
mSampledInputSize, lastSavedInputSize, &mSampledLengthCache,
- &mSampledNearKeySets, &mSampledSearchKeySets,
+ &mCharProbabilities, &mSampledSearchKeySets,
&mSampledSearchKeyVectors);
mMostProbableStringProbability = ProximityInfoStateUtils::getMostProbableString(
mProximityInfo, mSampledInputSize, &mCharProbabilities, mMostProbableString);
@@ -165,7 +171,7 @@ float ProximityInfoState::getPointToKeyLength(
const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
if (keyId != NOT_AN_INDEX) {
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
- return min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength);
+ return std::min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength);
}
if (CharUtils::isIntentionalOmissionCodePoint(codePoint)) {
return 0.0f;
@@ -249,6 +255,14 @@ ProximityType ProximityInfoState::getProximityTypeG(const int index, const int c
if (!isUsed()) {
return UNRELATED_CHAR;
}
+ const int sampledSearchKeyVectorsSize = static_cast<int>(mSampledSearchKeyVectors.size());
+ if (index < 0 || index >= sampledSearchKeyVectorsSize) {
+ AKLOGE("getProximityTypeG() is called with an invalid index(%d). "
+ "mSampledSearchKeyVectors.size() = %d, codePoint = %x.", index,
+ sampledSearchKeyVectorsSize, codePoint);
+ ASSERT(false);
+ return UNRELATED_CHAR;
+ }
const int lowerCodePoint = CharUtils::toLowerCase(codePoint);
const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
@@ -271,7 +285,7 @@ float ProximityInfoState::getDirection(const int index0, const int index1) const
}
float ProximityInfoState::getMostProbableString(int *const codePointBuf) const {
- memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString));
+ memmove(codePointBuf, mMostProbableString, sizeof(mMostProbableString));
return mMostProbableStringProbability;
}
@@ -283,7 +297,7 @@ bool ProximityInfoState::hasSpaceProximity(const int index) const {
// Returns a probability of mapping index to keyIndex.
float ProximityInfoState::getProbability(const int index, const int keyIndex) const {
ASSERT(0 <= index && index < mSampledInputSize);
- hash_map_compat<int, float>::const_iterator it = mCharProbabilities[index].find(keyIndex);
+ std::unordered_map<int, float>::const_iterator it = mCharProbabilities[index].find(keyIndex);
if (it != mCharProbabilities[index].end()) {
return it->second;
}
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index c94060fa9..6b1a319aa 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -18,12 +18,12 @@
#define LATINIME_PROXIMITY_INFO_STATE_H
#include <cstring> // for memset()
+#include <unordered_map>
#include <vector>
#include "defines.h"
#include "suggest/core/layout/proximity_info_params.h"
#include "suggest/core/layout/proximity_info_state_utils.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
@@ -43,16 +43,16 @@ class ProximityInfoState {
// Defined here //
/////////////////////////////////////////
AK_FORCE_INLINE ProximityInfoState()
- : mProximityInfo(0), mMaxPointToKeyLength(0.0f), mAverageSpeed(0.0f),
+ : mProximityInfo(nullptr), mMaxPointToKeyLength(0.0f), mAverageSpeed(0.0f),
mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0),
mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0),
mIsContinuousSuggestionPossible(false), mHasBeenUpdatedByGeometricInput(false),
mSampledInputXs(), mSampledInputYs(), mSampledTimes(), mSampledInputIndice(),
mSampledLengthCache(), mBeelineSpeedPercentiles(),
mSampledNormalizedSquaredLengthCache(), mSpeedRates(), mDirections(),
- mCharProbabilities(), mSampledNearKeySets(), mSampledSearchKeySets(),
- mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false),
- mSampledInputSize(0), mMostProbableStringProbability(0.0f) {
+ mCharProbabilities(), mSampledSearchKeySets(), mSampledSearchKeyVectors(),
+ mTouchPositionCorrectionEnabled(false), mSampledInputSize(0),
+ mMostProbableStringProbability(0.0f) {
memset(mInputProximities, 0, sizeof(mInputProximities));
memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
memset(mMostProbableString, 0, sizeof(mMostProbableString));
@@ -65,6 +65,8 @@ class ProximityInfoState {
return getProximityCodePointsAt(index)[0];
}
+ int getPrimaryOriginalCodePointAt(const int index) const;
+
inline bool sameAsTyped(const int *word, int length) const {
if (length != mSampledInputSize) {
return false;
@@ -106,10 +108,6 @@ class ProximityInfoState {
return false;
}
- inline const int *getPrimaryInputWord() const {
- return mPrimaryInputWord;
- }
-
inline bool touchPositionCorrectionEnabled() const {
return mTouchPositionCorrectionEnabled;
}
@@ -154,10 +152,6 @@ class ProximityInfoState {
ProximityType getProximityTypeG(const int index, const int codePoint) const;
- const std::vector<int> *getSearchKeyVector(const int index) const {
- return &mSampledSearchKeyVectors[index];
- }
-
float getSpeedRate(const int index) const {
return mSpeedRates[index];
}
@@ -221,11 +215,7 @@ class ProximityInfoState {
std::vector<float> mSpeedRates;
std::vector<float> mDirections;
// probabilities of skipping or mapping to a key for each point.
- std::vector<hash_map_compat<int, float> > mCharProbabilities;
- // The vector for the key code set which holds nearby keys for each sampled input point
- // 1. Used to calculate the probability of the key
- // 2. Used to calculate mSampledSearchKeySets
- std::vector<ProximityInfoStateUtils::NearKeycodesSet> mSampledNearKeySets;
+ std::vector<std::unordered_map<int, float> > mCharProbabilities;
// The vector for the key code set which holds nearby keys of some trailing sampled input points
// for each sampled input point. These nearby keys contain the next characters which can be in
// the dictionary. Specifically, currently we are looking for keys nearby trailing sampled
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp
index e1b35340b..ea3b02216 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp
@@ -16,13 +16,16 @@
#include "suggest/core/layout/proximity_info_state_utils.h"
+#include <algorithm>
#include <cmath>
#include <cstring> // for memset()
#include <sstream> // for debug prints
+#include <unordered_map>
#include <vector>
#include "defines.h"
#include "suggest/core/layout/geometry_utils.h"
+#include "suggest/core/layout/normal_distribution_2d.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/layout/proximity_info_params.h"
@@ -186,13 +189,10 @@ namespace latinime {
const int lastSavedInputSize, const bool isGeometric,
const std::vector<int> *const sampledInputXs,
const std::vector<int> *const sampledInputYs,
- std::vector<NearKeycodesSet> *sampledNearKeySets,
std::vector<float> *sampledNormalizedSquaredLengthCache) {
- sampledNearKeySets->resize(sampledInputSize);
const int keyCount = proximityInfo->getKeyCount();
sampledNormalizedSquaredLengthCache->resize(sampledInputSize * keyCount);
for (int i = lastSavedInputSize; i < sampledInputSize; ++i) {
- (*sampledNearKeySets)[i].reset();
for (int k = 0; k < keyCount; ++k) {
const int index = i * keyCount + k;
const int x = (*sampledInputXs)[i];
@@ -201,10 +201,6 @@ namespace latinime {
proximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(
k, x, y, isGeometric);
(*sampledNormalizedSquaredLengthCache)[index] = normalizedSquaredDistance;
- if (normalizedSquaredDistance
- < ProximityInfoParams::NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) {
- (*sampledNearKeySets)[i][k] = true;
- }
}
}
}
@@ -240,7 +236,7 @@ namespace latinime {
// Calculate velocity by using distances and durations of
// ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION points for both forward and
// backward.
- const int forwardNumPoints = min(inputSize - 1,
+ const int forwardNumPoints = std::min(inputSize - 1,
index + ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION);
for (int j = index; j < forwardNumPoints; ++j) {
if (i < sampledInputSize - 1 && j >= (*sampledInputIndice)[i + 1]) {
@@ -250,7 +246,7 @@ namespace latinime {
xCoordinates[j + 1], yCoordinates[j + 1]);
duration += times[j + 1] - times[j];
}
- const int backwardNumPoints = max(0,
+ const int backwardNumPoints = std::max(0,
index - ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION);
for (int j = index - 1; j >= backwardNumPoints; --j) {
if (i > 0 && j < (*sampledInputIndice)[i - 1]) {
@@ -272,7 +268,7 @@ namespace latinime {
// Direction calculation.
sampledDirections->resize(sampledInputSize - 1);
- for (int i = max(0, lastSavedInputSize - 1); i < sampledInputSize - 1; ++i) {
+ for (int i = std::max(0, lastSavedInputSize - 1); i < sampledInputSize - 1; ++i) {
(*sampledDirections)[i] = getDirection(sampledInputXs, sampledInputYs, i, i + 1);
}
return averageSpeed;
@@ -609,7 +605,7 @@ namespace latinime {
const int inputIndex, const int keyId) {
if (keyId != NOT_AN_INDEX) {
const int index = inputIndex * keyCount + keyId;
- return min((*sampledNormalizedSquaredLengthCache)[index], maxPointToKeyLength);
+ return std::min((*sampledNormalizedSquaredLengthCache)[index], maxPointToKeyLength);
}
// If the char is not a key on the keyboard then return the max length.
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
@@ -624,8 +620,8 @@ namespace latinime {
const std::vector<float> *const sampledSpeedRates,
const std::vector<int> *const sampledLengthCache,
const std::vector<float> *const sampledNormalizedSquaredLengthCache,
- std::vector<NearKeycodesSet> *sampledNearKeySets,
- std::vector<hash_map_compat<int, float> > *charProbabilities) {
+ const ProximityInfo *const proximityInfo,
+ std::vector<std::unordered_map<int, float> > *charProbabilities) {
charProbabilities->resize(sampledInputSize);
// Calculates probabilities of using a point as a correlated point with the character
// for each point.
@@ -640,23 +636,21 @@ namespace latinime {
float nearestKeyDistance = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
for (int j = 0; j < keyCount; ++j) {
- if ((*sampledNearKeySets)[i].test(j)) {
- const float distance = getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j);
- if (distance < nearestKeyDistance) {
- nearestKeyDistance = distance;
- }
+ const float distance = getPointToKeyByIdLength(
+ maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j);
+ if (distance < nearestKeyDistance) {
+ nearestKeyDistance = distance;
}
}
if (i == 0) {
- skipProbability *= min(1.0f,
+ skipProbability *= std::min(1.0f,
nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT
+ ProximityInfoParams::NEAREST_DISTANCE_BIAS);
// Promote the first point
skipProbability *= ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY;
} else if (i == sampledInputSize - 1) {
- skipProbability *= min(1.0f,
+ skipProbability *= std::min(1.0f,
nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT_FOR_LAST
+ ProximityInfoParams::NEAREST_DISTANCE_BIAS_FOR_LAST);
// Promote the last point
@@ -667,17 +661,17 @@ namespace latinime {
&& speedRate
< (*sampledSpeedRates)[i + 1] - ProximityInfoParams::SPEED_MARGIN) {
if (currentAngle < ProximityInfoParams::CORNER_ANGLE_THRESHOLD) {
- skipProbability *= min(1.0f, speedRate
+ skipProbability *= std::min(1.0f, speedRate
* ProximityInfoParams::SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY);
} else {
// If the angle is small enough, we promote this point more. (e.g. pit vs put)
- skipProbability *= min(1.0f,
+ skipProbability *= std::min(1.0f,
speedRate * ProximityInfoParams::SPEED_WEIGHT_FOR_SKIP_PROBABILITY
+ ProximityInfoParams::MIN_SPEED_RATE_FOR_SKIP_PROBABILITY);
}
}
- skipProbability *= min(1.0f,
+ skipProbability *= std::min(1.0f,
speedRate * nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT
+ ProximityInfoParams::NEAREST_DISTANCE_BIAS);
@@ -707,93 +701,57 @@ namespace latinime {
// (1.0f - skipProbability).
const float inputCharProbability = 1.0f - skipProbability;
- const float speedxAngleRate = min(speedRate * currentAngle / M_PI_F
+ const float speedMultipliedByAngleRate = std::min(speedRate * currentAngle / M_PI_F
* ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION,
ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION);
- const float speedxNearestKeyDistanceRate = min(speedRate * nearestKeyDistance
- * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION,
- ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION);
- const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate
- + ProximityInfoParams::MIN_STANDARD_DEVIATION;
-
- ProximityInfoUtils::NormalDistribution
- distribution(ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma);
+ const float speedMultipliedByNearestKeyDistanceRate = std::min(
+ speedRate * nearestKeyDistance
+ * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION,
+ ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION);
+ const float sigma = (speedMultipliedByAngleRate + speedMultipliedByNearestKeyDistanceRate
+ + ProximityInfoParams::MIN_STANDARD_DEVIATION) * mostCommonKeyWidth;
+ float theta = 0.0f;
+ // TODO: Use different metrics to compute sigmas.
+ float sigmaX = sigma;
+ float sigmaY = sigma;
+ if (i == 0 && i != sampledInputSize - 1) {
+ // First point
+ theta = getDirection(sampledInputXs, sampledInputYs, i + 1, i);
+ sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST;
+ sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST;
+ } else {
+ if (i == sampledInputSize - 1) {
+ // Last point
+ sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_LAST;
+ sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST;
+ } else {
+ sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT;
+ sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT;
+ }
+ theta = getDirection(sampledInputXs, sampledInputYs, i, i - 1);
+ }
+ NormalDistribution2D distribution((*sampledInputXs)[i], sigmaX, (*sampledInputYs)[i],
+ sigmaY, theta);
// Summing up probability densities of all near keys.
float sumOfProbabilityDensities = 0.0f;
for (int j = 0; j < keyCount; ++j) {
- if ((*sampledNearKeySets)[i].test(j)) {
- float distance = sqrtf(getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j));
- if (i == 0 && i != sampledInputSize - 1) {
- // For the first point, weighted average of distances from first point and the
- // next point to the key is used as a point to key distance.
- const float nextDistance = sqrtf(getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount,
- i + 1, j));
- if (nextDistance < distance) {
- // The distance of the first point tends to bigger than continuing
- // points because the first touch by the user can be sloppy.
- // So we promote the first point if the distance of that point is larger
- // than the distance of the next point.
- distance = (distance
- + nextDistance * ProximityInfoParams::NEXT_DISTANCE_WEIGHT)
- / (1.0f + ProximityInfoParams::NEXT_DISTANCE_WEIGHT);
- }
- } else if (i != 0 && i == sampledInputSize - 1) {
- // For the first point, weighted average of distances from last point and
- // the previous point to the key is used as a point to key distance.
- const float previousDistance = sqrtf(getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount,
- i - 1, j));
- if (previousDistance < distance) {
- // The distance of the last point tends to bigger than continuing points
- // because the last touch by the user can be sloppy. So we promote the
- // last point if the distance of that point is larger than the distance of
- // the previous point.
- distance = (distance
- + previousDistance * ProximityInfoParams::PREV_DISTANCE_WEIGHT)
- / (1.0f + ProximityInfoParams::PREV_DISTANCE_WEIGHT);
- }
- }
- // TODO: Promote the first point when the extended line from the next input is near
- // from a key. Also, promote the last point as well.
- sumOfProbabilityDensities += distribution.getProbabilityDensity(distance);
- }
+ sumOfProbabilityDensities += distribution.getProbabilityDensity(
+ proximityInfo->getKeyCenterXOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointX */, true /* isGeometric */),
+ proximityInfo->getKeyCenterYOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointY */, true /* isGeometric */));
}
// Split the probability of an input point to keys that are close to the input point.
for (int j = 0; j < keyCount; ++j) {
- if ((*sampledNearKeySets)[i].test(j)) {
- float distance = sqrtf(getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j));
- if (i == 0 && i != sampledInputSize - 1) {
- // For the first point, weighted average of distances from the first point and
- // the next point to the key is used as a point to key distance.
- const float prevDistance = sqrtf(getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount,
- i + 1, j));
- if (prevDistance < distance) {
- distance = (distance
- + prevDistance * ProximityInfoParams::NEXT_DISTANCE_WEIGHT)
- / (1.0f + ProximityInfoParams::NEXT_DISTANCE_WEIGHT);
- }
- } else if (i != 0 && i == sampledInputSize - 1) {
- // For the first point, weighted average of distances from last point and
- // the previous point to the key is used as a point to key distance.
- const float prevDistance = sqrtf(getPointToKeyByIdLength(
- maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount,
- i - 1, j));
- if (prevDistance < distance) {
- distance = (distance
- + prevDistance * ProximityInfoParams::PREV_DISTANCE_WEIGHT)
- / (1.0f + ProximityInfoParams::PREV_DISTANCE_WEIGHT);
- }
- }
- const float probabilityDensity = distribution.getProbabilityDensity(distance);
- const float probability = inputCharProbability * probabilityDensity
- / sumOfProbabilityDensities;
- (*charProbabilities)[i][j] = probability;
- }
+ const float probabilityDensity = distribution.getProbabilityDensity(
+ proximityInfo->getKeyCenterXOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointX */, true /* isGeometric */),
+ proximityInfo->getKeyCenterYOfKeyIdG(j,
+ NOT_A_COORDINATE /* referencePointY */, true /* isGeometric */));
+ const float probability = inputCharProbability * probabilityDensity
+ / sumOfProbabilityDensities;
+ (*charProbabilities)[i][j] = probability;
}
}
@@ -805,7 +763,7 @@ namespace latinime {
sstream << "Speed: "<< (*sampledSpeedRates)[i] << ", ";
sstream << "Angle: "<< getPointAngle(sampledInputXs, sampledInputYs, i) << ", \n";
- for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].begin();
+ for (std::unordered_map<int, float>::iterator it = (*charProbabilities)[i].begin();
it != (*charProbabilities)[i].end(); ++it) {
if (it->first == NOT_AN_INDEX) {
sstream << it->first
@@ -827,7 +785,7 @@ namespace latinime {
// Decrease key probabilities of points which don't have the highest probability of that key
// among nearby points. Probabilities of the first point and the last point are not suppressed.
- for (int i = max(start, 1); i < sampledInputSize; ++i) {
+ for (int i = std::max(start, 1); i < sampledInputSize; ++i) {
for (int j = i + 1; j < sampledInputSize; ++j) {
if (!suppressCharProbabilities(
mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
@@ -835,7 +793,7 @@ namespace latinime {
break;
}
}
- for (int j = i - 1; j >= max(start, 0); --j) {
+ for (int j = i - 1; j >= std::max(start, 0); --j) {
if (!suppressCharProbabilities(
mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
charProbabilities)) {
@@ -847,12 +805,11 @@ namespace latinime {
// Converting from raw probabilities to log probabilities to calculate spatial distance.
for (int i = start; i < sampledInputSize; ++i) {
for (int j = 0; j < keyCount; ++j) {
- hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].find(j);
+ std::unordered_map<int, float>::iterator it = (*charProbabilities)[i].find(j);
if (it == (*charProbabilities)[i].end()){
- (*sampledNearKeySets)[i].reset(j);
+ continue;
} else if(it->second < ProximityInfoParams::MIN_PROBABILITY) {
// Erases from near keys vector because it has very low probability.
- (*sampledNearKeySets)[i].reset(j);
(*charProbabilities)[i].erase(j);
} else {
it->second = -logf(it->second);
@@ -864,9 +821,8 @@ namespace latinime {
/* static */ void ProximityInfoStateUtils::updateSampledSearchKeySets(
const ProximityInfo *const proximityInfo, const int sampledInputSize,
- const int lastSavedInputSize,
- const std::vector<int> *const sampledLengthCache,
- const std::vector<NearKeycodesSet> *const sampledNearKeySets,
+ const int lastSavedInputSize, const std::vector<int> *const sampledLengthCache,
+ const std::vector<std::unordered_map<int, float> > *const charProbabilities,
std::vector<NearKeycodesSet> *sampledSearchKeySets,
std::vector<std::vector<int> > *sampledSearchKeyVectors) {
sampledSearchKeySets->resize(sampledInputSize);
@@ -878,12 +834,17 @@ namespace latinime {
if (i >= lastSavedInputSize) {
(*sampledSearchKeySets)[i].reset();
}
- for (int j = max(i, lastSavedInputSize); j < sampledInputSize; ++j) {
+ for (int j = std::max(i, lastSavedInputSize); j < sampledInputSize; ++j) {
// TODO: Investigate if this is required. This may not fail.
if ((*sampledLengthCache)[j] - (*sampledLengthCache)[i] >= readForwordLength) {
break;
}
- (*sampledSearchKeySets)[i] |= (*sampledNearKeySets)[j];
+ for(const auto& charProbability : charProbabilities->at(j)) {
+ if (charProbability.first == NOT_AN_INDEX) {
+ continue;
+ }
+ (*sampledSearchKeySets)[i].set(charProbability.first);
+ }
}
}
const int keyCount = proximityInfo->getKeyCount();
@@ -907,7 +868,7 @@ namespace latinime {
/* static */ bool ProximityInfoStateUtils::suppressCharProbabilities(const int mostCommonKeyWidth,
const int sampledInputSize, const std::vector<int> *const lengthCache,
const int index0, const int index1,
- std::vector<hash_map_compat<int, float> > *charProbabilities) {
+ std::vector<std::unordered_map<int, float> > *charProbabilities) {
ASSERT(0 <= index0 && index0 < sampledInputSize);
ASSERT(0 <= index1 && index1 < sampledInputSize);
const float keyWidthFloat = static_cast<float>(mostCommonKeyWidth);
@@ -918,9 +879,9 @@ namespace latinime {
const float suppressionRate = ProximityInfoParams::MIN_SUPPRESSION_RATE
+ diff / keyWidthFloat / ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT
* ProximityInfoParams::SUPPRESSION_WEIGHT;
- for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[index0].begin();
+ for (std::unordered_map<int, float>::iterator it = (*charProbabilities)[index0].begin();
it != (*charProbabilities)[index0].end(); ++it) {
- hash_map_compat<int, float>::iterator it2 = (*charProbabilities)[index1].find(it->first);
+ std::unordered_map<int, float>::iterator it2 = (*charProbabilities)[index1].find(it->first);
if (it2 != (*charProbabilities)[index1].end() && it->second < it2->second) {
const float newProbability = it->second * suppressionRate;
const float suppression = it->second - newProbability;
@@ -929,7 +890,7 @@ namespace latinime {
(*charProbabilities)[index0][NOT_AN_INDEX] += suppression;
// Add the probability of the same key nearby index1
- const float probabilityGain = min(suppression
+ const float probabilityGain = std::min(suppression
* ProximityInfoParams::SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN,
(*charProbabilities)[index1][NOT_AN_INDEX]
* ProximityInfoParams::SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN);
@@ -972,7 +933,7 @@ namespace latinime {
// returns probability of generating the word.
/* static */ float ProximityInfoStateUtils::getMostProbableString(
const ProximityInfo *const proximityInfo, const int sampledInputSize,
- const std::vector<hash_map_compat<int, float> > *const charProbabilities,
+ const std::vector<std::unordered_map<int, float> > *const charProbabilities,
int *const codePointBuf) {
ASSERT(sampledInputSize >= 0);
memset(codePointBuf, 0, sizeof(codePointBuf[0]) * MAX_WORD_LENGTH);
@@ -982,7 +943,7 @@ namespace latinime {
for (int i = 0; i < sampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) {
float minLogProbability = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
int character = NOT_AN_INDEX;
- for (hash_map_compat<int, float>::const_iterator it = (*charProbabilities)[i].begin();
+ for (std::unordered_map<int, float>::const_iterator it = (*charProbabilities)[i].begin();
it != (*charProbabilities)[i].end(); ++it) {
const float logProbability = (it->first != NOT_AN_INDEX)
? it->second + ProximityInfoParams::DEMOTION_LOG_PROBABILITY : it->second;
@@ -992,7 +953,16 @@ namespace latinime {
}
}
if (character != NOT_AN_INDEX) {
- codePointBuf[index] = proximityInfo->getCodePointOf(character);
+ const int codePoint = proximityInfo->getCodePointOf(character);
+ if (codePoint == NOT_A_CODE_POINT) {
+ AKLOGE("Key index(%d) is not found. Cannot construct most probable string",
+ character);
+ ASSERT(false);
+ // Make the length zero, which means most probable string won't be used.
+ index = 0;
+ break;
+ }
+ codePointBuf[index] = codePoint;
index++;
}
sumLogProbability += minLogProbability;
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
index 6de970033..211a79737 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
@@ -18,10 +18,10 @@
#define LATINIME_PROXIMITY_INFO_STATE_UTILS_H
#include <bitset>
+#include <unordered_map>
#include <vector>
#include "defines.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
class ProximityInfo;
@@ -29,7 +29,7 @@ class ProximityInfoParams;
class ProximityInfoStateUtils {
public:
- typedef hash_map_compat<int, float> NearKeysDistanceMap;
+ typedef std::unordered_map<int, float> NearKeysDistanceMap;
typedef std::bitset<MAX_KEY_COUNT_IN_A_KEYBOARD> NearKeycodesSet;
static int trimLastTwoTouchPoints(std::vector<int> *sampledInputXs,
@@ -56,7 +56,7 @@ class ProximityInfoStateUtils {
const std::vector<int> *const sampledLengthCache,
const std::vector<int> *const sampledInputIndice,
std::vector<float> *sampledSpeedRates, std::vector<float> *sampledDirections);
- static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
+ static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
const int inputSize, const int *const xCoordinates, const int *const yCoordinates,
const int *times, const int sampledInputSize,
const std::vector<int> *const sampledInputXs,
@@ -71,12 +71,12 @@ class ProximityInfoStateUtils {
const std::vector<float> *const sampledSpeedRates,
const std::vector<int> *const sampledLengthCache,
const std::vector<float> *const sampledNormalizedSquaredLengthCache,
- std::vector<NearKeycodesSet> *sampledNearKeySets,
- std::vector<hash_map_compat<int, float> > *charProbabilities);
+ const ProximityInfo *const proximityInfo,
+ std::vector<std::unordered_map<int, float> > *charProbabilities);
static void updateSampledSearchKeySets(const ProximityInfo *const proximityInfo,
const int sampledInputSize, const int lastSavedInputSize,
const std::vector<int> *const sampledLengthCache,
- const std::vector<NearKeycodesSet> *const sampledNearKeySets,
+ const std::vector<std::unordered_map<int, float> > *const charProbabilities,
std::vector<NearKeycodesSet> *sampledSearchKeySets,
std::vector<std::vector<int> > *sampledSearchKeyVectors);
static float getPointToKeyByIdLength(const float maxPointToKeyLength,
@@ -86,14 +86,9 @@ class ProximityInfoStateUtils {
const int sampledInputSize, const int lastSavedInputSize, const bool isGeometric,
const std::vector<int> *const sampledInputXs,
const std::vector<int> *const sampledInputYs,
- std::vector<NearKeycodesSet> *sampledNearKeySets,
std::vector<float> *sampledNormalizedSquaredLengthCache);
static void initPrimaryInputWord(const int inputSize, const int *const inputProximities,
int *primaryInputWord);
- static void initNormalizedSquaredDistances(const ProximityInfo *const proximityInfo,
- const int inputSize, const int *inputXCoordinates, const int *inputYCoordinates,
- const int *const inputProximities, const std::vector<int> *const sampledInputXs,
- const std::vector<int> *const sampledInputYs, int *normalizedSquaredDistances);
static void dump(const bool isGeometric, const int inputSize,
const int *const inputXCoordinates, const int *const inputYCoordinates,
const int sampledInputSize, const std::vector<int> *const sampledInputXs,
@@ -110,7 +105,7 @@ class ProximityInfoStateUtils {
// TODO: Move to most_probable_string_utils.h
static float getMostProbableString(const ProximityInfo *const proximityInfo,
const int sampledInputSize,
- const std::vector<hash_map_compat<int, float> > *const charProbabilities,
+ const std::vector<std::unordered_map<int, float> > *const charProbabilities,
int *const codePointBuf);
private:
@@ -152,7 +147,7 @@ class ProximityInfoStateUtils {
const int index2);
static bool suppressCharProbabilities(const int mostCommonKeyWidth,
const int sampledInputSize, const std::vector<int> *const lengthCache, const int index0,
- const int index1, std::vector<hash_map_compat<int, float> > *charProbabilities);
+ const int index1, std::vector<std::unordered_map<int, float> > *charProbabilities);
static float calculateSquaredDistanceFromSweetSpotCenter(
const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
const std::vector<int> *const sampledInputYs, const int keyIndex,
diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h
index 0e28560fc..178aada2d 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h
@@ -18,18 +18,18 @@
#define LATINIME_PROXIMITY_INFO_UTILS_H
#include <cmath>
+#include <unordered_map>
#include "defines.h"
#include "suggest/core/layout/additional_proximity_chars.h"
#include "suggest/core/layout/geometry_utils.h"
#include "utils/char_utils.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
class ProximityInfoUtils {
public:
static AK_FORCE_INLINE int getKeyIndexOf(const int keyCount, const int c,
- const hash_map_compat<int, int> *const codeToKeyMap) {
+ const std::unordered_map<int, int> *const codeToKeyMap) {
if (keyCount == 0) {
// We do not have the coordinate data
return NOT_AN_INDEX;
@@ -38,7 +38,7 @@ class ProximityInfoUtils {
return NOT_AN_INDEX;
}
const int lowerCode = CharUtils::toLowerCase(c);
- hash_map_compat<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
+ std::unordered_map<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
if (mapPos != codeToKeyMap->end()) {
return mapPos->second;
}
@@ -52,7 +52,7 @@ class ProximityInfoUtils {
const int *const proximityCharsArray, const int cellHeight, const int cellWidth,
const int gridWidth, const int mostCommonKeyWidth, const int keyCount,
const char *const localeStr,
- const hash_map_compat<int, int> *const codeToKeyMap, int *inputProximities) {
+ const std::unordered_map<int, int> *const codeToKeyMap, int *inputProximities) {
// Initialize
// - mInputCodes
// - mNormalizedSquaredDistances
@@ -100,6 +100,10 @@ class ProximityInfoUtils {
const float dotProduct = ray1x * ray2x + ray1y * ray2y;
const float lineLengthSqr = GeometryUtils::SQUARE_FLOAT(ray2x)
+ GeometryUtils::SQUARE_FLOAT(ray2y);
+ if (lineLengthSqr <= 0.0f) {
+ // Return point to the point distance.
+ return getSquaredDistanceFloat(x, y, x1, y1);
+ }
const float projectionLengthSqr = dotProduct / lineLengthSqr;
float projectionX;
@@ -121,29 +125,6 @@ class ProximityInfoUtils {
return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
}
- // Normal distribution N(u, sigma^2).
- struct NormalDistribution {
- public:
- NormalDistribution(const float u, const float sigma)
- : mU(u), mSigma(sigma),
- mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F
- * GeometryUtils::SQUARE_FLOAT(sigma))),
- mPreComputedExponentPart(-1.0f / (2.0f * GeometryUtils::SQUARE_FLOAT(sigma))) {}
-
- float getProbabilityDensity(const float x) const {
- const float shiftedX = x - mU;
- return mPreComputedNonExpPart
- * expf(mPreComputedExponentPart * GeometryUtils::SQUARE_FLOAT(shiftedX));
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution);
- const float mU; // mean value
- const float mSigma; // standard deviation
- const float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2)
- const float mPreComputedExponentPart; // = -1 / (2 * sigma^2)
- }; // struct NormalDistribution
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoUtils);
@@ -163,10 +144,16 @@ class ProximityInfoUtils {
const int *const proximityCharsArray, const int cellHeight, const int cellWidth,
const int gridWidth, const int mostCommonKeyWidth, const int keyCount,
const int x, const int y, const int primaryKey, const char *const localeStr,
- const hash_map_compat<int, int> *const codeToKeyMap, int *proximities) {
+ const std::unordered_map<int, int> *const codeToKeyMap, int *proximities) {
const int mostCommonKeyWidthSquare = mostCommonKeyWidth * mostCommonKeyWidth;
int insertPos = 0;
proximities[insertPos++] = primaryKey;
+ if (x == NOT_A_COORDINATE || y == NOT_A_COORDINATE) {
+ for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ proximities[i] = NOT_A_CODE_POINT;
+ }
+ return;
+ }
const int startIndex = getStartIndexFromCoordinates(x, y, cellHeight, cellWidth, gridWidth);
if (startIndex >= 0) {
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
diff --git a/native/jni/src/suggest/core/layout/touch_position_correction_utils.h b/native/jni/src/suggest/core/layout/touch_position_correction_utils.h
index 9130e87d3..14074c13d 100644
--- a/native/jni/src/suggest/core/layout/touch_position_correction_utils.h
+++ b/native/jni/src/suggest/core/layout/touch_position_correction_utils.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_TOUCH_POSITION_CORRECTION_UTILS_H
#define LATINIME_TOUCH_POSITION_CORRECTION_UTILS_H
+#include <algorithm>
+
#include "defines.h"
#include "suggest/core/layout/proximity_info_params.h"
@@ -34,7 +36,7 @@ class TouchPositionCorrectionUtils {
static const float R2 = 1.0f;
const float x = normalizedSquaredDistance;
if (!isTouchPositionCorrectionEnabled) {
- return min(C, x);
+ return std::min(C, x);
}
// factor is a piecewise linear function like:
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 5492c6070..a61227626 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -17,6 +17,9 @@
#ifndef LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
#define LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H
+#include <map>
+#include <vector>
+
#include "defines.h"
namespace latinime {
@@ -27,21 +30,29 @@ namespace latinime {
*/
class DictionaryHeaderStructurePolicy {
public:
+ typedef std::map<std::vector<int>, std::vector<int> > AttributeMap;
+
virtual ~DictionaryHeaderStructurePolicy() {}
- virtual bool supportsDynamicUpdate() const = 0;
+ virtual int getFormatVersionNumber() const = 0;
- virtual bool requiresGermanUmlautProcessing() const = 0;
+ virtual int getSize() const = 0;
- virtual bool requiresFrenchLigatureProcessing() const = 0;
+ virtual const AttributeMap *getAttributeMap() const = 0;
- virtual float getMultiWordCostMultiplier() const = 0;
+ virtual bool requiresGermanUmlautProcessing() const = 0;
- virtual int getLastDecayedTime() const = 0;
+ virtual float getMultiWordCostMultiplier() const = 0;
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const = 0;
+ virtual bool shouldBoostExactMatches() const = 0;
+
+ virtual const std::vector<int> *getLocale() const = 0;
+
+ virtual bool supportsBeginningOfSentence() const = 0;
+
protected:
DictionaryHeaderStructurePolicy() {}
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 41f82049f..b72601109 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -17,7 +17,10 @@
#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+#include <memory>
+
#include "defines.h"
+#include "suggest/core/dictionary/property/word_property.h"
namespace latinime {
@@ -26,25 +29,29 @@ class DicNodeVector;
class DictionaryBigramsStructurePolicy;
class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
+class PrevWordsInfo;
+class UnigramProperty;
/*
- * This class abstracts structure of dictionaries.
+ * This class abstracts the structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
class DictionaryStructureWithBufferPolicy {
public:
+ typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
+
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0;
- virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
+ virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
- virtual int getTerminalNodePositionOfWord(const int *const inWord,
+ virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability,
@@ -63,28 +70,45 @@ class DictionaryStructureWithBufferPolicy {
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
// Returns whether the update was success or not.
- virtual bool addUnigramWord(const int *const word, const int length,
- const int probability) = 0;
+ virtual bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) = 0;
+
+ // Returns whether the update was success or not.
+ virtual bool removeUnigramEntry(const int *const word, const int length) = 0;
// Returns whether the update was success or not.
- virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability) = 0;
+ virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) = 0;
// Returns whether the update was success or not.
- virtual bool removeBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1) = 0;
+ virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) = 0;
- virtual void flush(const char *const filePath) = 0;
+ // Returns whether the flush was success or not.
+ virtual bool flush(const char *const filePath) = 0;
- virtual void flushWithGC(const char *const filePath) = 0;
+ // Returns whether the GC and flush were success or not.
+ virtual bool flushWithGC(const char *const filePath) = 0;
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
// Currently, this method is used only for testing. You may want to consider creating new
// dedicated method instead of this if you want to use this in the production.
- virtual void getProperty(const char *const query, char *const outResult,
+ virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) = 0;
+ // Used for testing.
+ virtual const WordProperty getWordProperty(const int *const codePonts,
+ const int codePointCount) const = 0;
+
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ virtual int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) = 0;
+
+ virtual bool isCorrupted() const = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h
index 102e856f5..292194bf2 100644
--- a/native/jni/src/suggest/core/policy/scoring.h
+++ b/native/jni/src/suggest/core/policy/scoring.h
@@ -23,26 +23,24 @@ namespace latinime {
class DicNode;
class DicTraverseSession;
+class SuggestionResults;
// This class basically tweaks suggestions and distances apart from CompoundDistance
class Scoring {
public:
virtual int calculateFinalScore(const float compoundDistance, const int inputSize,
- const bool forceCommit) const = 0;
- virtual bool getMostProbableString(const DicTraverseSession *const traverseSession,
- const int terminalSize, const float languageWeight, int *const outputCodePoints,
- int *const type, int *const freq) const = 0;
- virtual void safetyNetForMostProbableString(const int terminalSize,
- const int maxScore, int *const outputCodePoints, int *const frequencies) const = 0;
- // TODO: Make more generic
- virtual void searchWordWithDoubleLetter(DicNode *terminals, const int terminalSize,
- int *doubleLetterTerminalIndex, DoubleLetterLevel *doubleLetterLevel) const = 0;
+ const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
+ const bool boostExactMatches) const = 0;
+ virtual void getMostProbableString(const DicTraverseSession *const traverseSession,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) const = 0;
virtual float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession,
DicNode *const terminals, const int size) const = 0;
- virtual float getDoubleLetterDemotionDistanceCost(const int terminalIndex,
- const int doubleLetterTerminalIndex,
- const DoubleLetterLevel doubleLetterLevel) const = 0;
+ virtual float getDoubleLetterDemotionDistanceCost(
+ const DicNode *const terminalDicNode) const = 0;
virtual bool doesAutoCorrectValidWord() const = 0;
+ virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
+ virtual bool sameAsTyped(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const = 0;
protected:
Scoring() {}
diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h
index e935533f2..8ddaa0514 100644
--- a/native/jni/src/suggest/core/policy/traversal.h
+++ b/native/jni/src/suggest/core/policy/traversal.h
@@ -41,13 +41,11 @@ class Traversal {
const DicNode *const dicNode) const = 0;
virtual ProximityType getProximityType(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const DicNode *const childDicNode) const = 0;
- virtual bool sameAsTyped(const DicTraverseSession *const traverseSession,
- const DicNode *const dicNode) const = 0;
virtual bool needsToTraverseAllUserInput() const = 0;
virtual float getMaxSpatialDistance() const = 0;
- virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
virtual int getDefaultExpandDicNodeSize() const = 0;
virtual int getMaxCacheSize(const int inputSize) const = 0;
+ virtual int getTerminalCacheSize() const = 0;
virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
virtual bool isGoodToTraverseNextWord(const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index 0c4016893..c202b81fe 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -20,6 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
@@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
traverseSession, parentDicNode, dicNode, &inputStateG);
const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
traverseSession, parentDicNode, dicNode, multiBigramMap);
- const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession,
- parentDicNode, dicNode);
+ const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
+ traverseSession, parentDicNode, dicNode);
profile(correctionType, dicNode);
if (inputStateG.mNeedsToUpdateInputStateG) {
dicNode->updateInputIndexG(&inputStateG);
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index 2d49e98a6..bd6b3cf41 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -18,6 +18,7 @@
#define LATINIME_WEIGHTING_H
#include "defines.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -84,7 +85,7 @@ class Weighting {
virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
- virtual ErrorType getErrorType(const CorrectionType correctionType,
+ virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/result/suggested_word.h b/native/jni/src/suggest/core/result/suggested_word.h
new file mode 100644
index 000000000..258a40eeb
--- /dev/null
+++ b/native/jni/src/suggest/core/result/suggested_word.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTED_WORD_H
+#define LATINIME_SUGGESTED_WORD_H
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/core/dictionary/dictionary.h"
+
+namespace latinime {
+
+class SuggestedWord {
+ public:
+ class Comparator {
+ public:
+ bool operator()(const SuggestedWord &left, const SuggestedWord &right) {
+ if (left.getScore() != right.getScore()) {
+ return left.getScore() > right.getScore();
+ }
+ return left.getCodePointCount() < right.getCodePointCount();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(Comparator);
+ };
+
+ SuggestedWord(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+ const int autoCommitFirstWordConfidence)
+ : mCodePoints(codePoints, codePoints + codePointCount), mScore(score),
+ mType(type), mIndexToPartialCommit(indexToPartialCommit),
+ mAutoCommitFirstWordConfidence(autoCommitFirstWordConfidence) {}
+
+ const int *getCodePoint() const {
+ return &mCodePoints.at(0);
+ }
+
+ int getCodePointCount() const {
+ return mCodePoints.size();
+ }
+
+ int getScore() const {
+ return mScore;
+ }
+
+ int getType() const {
+ return mType;
+ }
+
+ int getIndexToPartialCommit() const {
+ return mIndexToPartialCommit;
+ }
+
+ int getAutoCommitFirstWordConfidence() const {
+ return mAutoCommitFirstWordConfidence;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(SuggestedWord);
+
+ std::vector<int> mCodePoints;
+ int mScore;
+ int mType;
+ int mIndexToPartialCommit;
+ int mAutoCommitFirstWordConfidence;
+};
+} // namespace latinime
+#endif /* LATINIME_SUGGESTED_WORD_H */
diff --git a/native/jni/src/suggest/core/result/suggestion_results.cpp b/native/jni/src/suggest/core/result/suggestion_results.cpp
new file mode 100644
index 000000000..4c10bd08a
--- /dev/null
+++ b/native/jni/src/suggest/core/result/suggestion_results.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/result/suggestion_results.h"
+
+#include "utils/jni_data_utils.h"
+
+namespace latinime {
+
+void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCount,
+ jintArray outputCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
+ jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
+ jfloatArray outLanguageWeight) {
+ int outputIndex = 0;
+ while (!mSuggestedWords.empty()) {
+ const SuggestedWord &suggestedWord = mSuggestedWords.top();
+ suggestedWord.getCodePointCount();
+ const int start = outputIndex * MAX_WORD_LENGTH;
+ JniDataUtils::outputCodePoints(env, outputCodePointsArray, start,
+ MAX_WORD_LENGTH /* maxLength */, suggestedWord.getCodePoint(),
+ suggestedWord.getCodePointCount(), true /* needsNullTermination */);
+ JniDataUtils::putIntToArray(env, outScoresArray, outputIndex, suggestedWord.getScore());
+ JniDataUtils::putIntToArray(env, outSpaceIndicesArray, outputIndex,
+ suggestedWord.getIndexToPartialCommit());
+ JniDataUtils::putIntToArray(env, outTypesArray, outputIndex, suggestedWord.getType());
+ if (mSuggestedWords.size() == 1) {
+ JniDataUtils::putIntToArray(env, outAutoCommitFirstWordConfidenceArray, 0 /* index */,
+ suggestedWord.getAutoCommitFirstWordConfidence());
+ }
+ ++outputIndex;
+ mSuggestedWords.pop();
+ }
+ JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, outputIndex);
+ JniDataUtils::putFloatToArray(env, outLanguageWeight, 0 /* index */, mLanguageWeight);
+}
+
+void SuggestionResults::addPrediction(const int *const codePoints, const int codePointCount,
+ const int probability) {
+ if (probability == NOT_A_PROBABILITY) {
+ // Invalid word.
+ return;
+ }
+ addSuggestion(codePoints, codePointCount, probability, Dictionary::KIND_PREDICTION,
+ NOT_AN_INDEX, NOT_A_FIRST_WORD_CONFIDENCE);
+}
+
+void SuggestionResults::addSuggestion(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+ const int autocimmitFirstWordConfindence) {
+ if (codePointCount <= 0 || codePointCount > MAX_WORD_LENGTH) {
+ // Invalid word.
+ AKLOGE("Invalid word is added to the suggestion results. codePointCount: %d",
+ codePointCount);
+ return;
+ }
+ if (getSuggestionCount() >= mMaxSuggestionCount) {
+ const SuggestedWord &mWorstSuggestion = mSuggestedWords.top();
+ if (score > mWorstSuggestion.getScore() || (score == mWorstSuggestion.getScore()
+ && codePointCount < mWorstSuggestion.getCodePointCount())) {
+ mSuggestedWords.pop();
+ } else {
+ return;
+ }
+ }
+ mSuggestedWords.push(SuggestedWord(codePoints, codePointCount, score, type,
+ indexToPartialCommit, autocimmitFirstWordConfindence));
+}
+
+void SuggestionResults::getSortedScores(int *const outScores) const {
+ auto copyOfSuggestedWords = mSuggestedWords;
+ while (!copyOfSuggestedWords.empty()) {
+ const SuggestedWord &suggestedWord = copyOfSuggestedWords.top();
+ outScores[copyOfSuggestedWords.size() - 1] = suggestedWord.getScore();
+ copyOfSuggestedWords.pop();
+ }
+}
+
+void SuggestionResults::dumpSuggestions() const {
+ AKLOGE("language weight: %f", mLanguageWeight);
+ std::vector<SuggestedWord> suggestedWords;
+ auto copyOfSuggestedWords = mSuggestedWords;
+ while (!copyOfSuggestedWords.empty()) {
+ suggestedWords.push_back(copyOfSuggestedWords.top());
+ copyOfSuggestedWords.pop();
+ }
+ int index = 0;
+ for (auto it = suggestedWords.rbegin(); it != suggestedWords.rend(); ++it) {
+ DUMP_SUGGESTION(it->getCodePoint(), it->getCodePointCount(), index, it->getScore());
+ index++;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/result/suggestion_results.h b/native/jni/src/suggest/core/result/suggestion_results.h
new file mode 100644
index 000000000..8e845e2d3
--- /dev/null
+++ b/native/jni/src/suggest/core/result/suggestion_results.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTION_RESULTS_H
+#define LATINIME_SUGGESTION_RESULTS_H
+
+#include <queue>
+#include <vector>
+
+#include "defines.h"
+#include "jni.h"
+#include "suggest/core/result/suggested_word.h"
+
+namespace latinime {
+
+class SuggestionResults {
+ public:
+ explicit SuggestionResults(const int maxSuggestionCount)
+ : mMaxSuggestionCount(maxSuggestionCount), mLanguageWeight(NOT_A_LANGUAGE_WEIGHT),
+ mSuggestedWords() {}
+
+ // Returns suggestion count.
+ void outputSuggestions(JNIEnv *env, jintArray outSuggestionCount, jintArray outCodePointsArray,
+ jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
+ jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray outLanguageWeight);
+ void addPrediction(const int *const codePoints, const int codePointCount, const int score);
+ void addSuggestion(const int *const codePoints, const int codePointCount,
+ const int score, const int type, const int indexToPartialCommit,
+ const int autocimmitFirstWordConfindence);
+ void getSortedScores(int *const outScores) const;
+ void dumpSuggestions() const;
+
+ void setLanguageWeight(const float languageWeight) {
+ mLanguageWeight = languageWeight;
+ }
+
+ int getSuggestionCount() const {
+ return mSuggestedWords.size();
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionResults);
+
+ const int mMaxSuggestionCount;
+ float mLanguageWeight;
+ std::priority_queue<
+ SuggestedWord, std::vector<SuggestedWord>, SuggestedWord::Comparator> mSuggestedWords;
+};
+} // namespace latinime
+#endif // LATINIME_SUGGESTION_RESULTS_H
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
new file mode 100644
index 000000000..7b0e7e1b4
--- /dev/null
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/result/suggestions_output_utils.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
+#include "suggest/core/dictionary/error_type_utils.h"
+#include "suggest/core/policy/scoring.h"
+#include "suggest/core/result/suggestion_results.h"
+#include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/suggest_options.h"
+
+namespace latinime {
+
+const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
+
+/* static */ void SuggestionsOutputUtils::outputSuggestions(
+ const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) {
+#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
+ const int terminalSize = 0;
+#else
+ const int terminalSize = traverseSession->getDicTraverseCache()->terminalSize();
+#endif
+ std::vector<DicNode> terminals(terminalSize);
+ for (int index = terminalSize - 1; index >= 0; --index) {
+ traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
+ }
+ // Compute a language weight when an invalid language weight is passed.
+ // NOT_A_LANGUAGE_WEIGHT (-1) is assumed as an invalid language weight.
+ const float languageWeightToOutputSuggestions = (languageWeight < 0.0f) ?
+ scoringPolicy->getAdjustedLanguageWeight(
+ traverseSession, terminals.data(), terminalSize) : languageWeight;
+ outSuggestionResults->setLanguageWeight(languageWeightToOutputSuggestions);
+ // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
+ // a long multiple words suggestion.
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ const bool forceCommitMultiWords = scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
+ && (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
+ && !terminals.empty() && terminals.front().hasMultipleWords());
+ // TODO: have partial commit work even with multiple pointers.
+ const bool outputSecondWordFirstLetterInputIndex =
+ traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
+ const bool boostExactMatches = traverseSession->getDictionaryStructurePolicy()->
+ getHeaderStructurePolicy()->shouldBoostExactMatches();
+
+ // Output suggestion results here
+ for (auto &terminalDicNode : terminals) {
+ outputSuggestionsOfDicNode(scoringPolicy, traverseSession, &terminalDicNode,
+ languageWeightToOutputSuggestions, boostExactMatches, forceCommitMultiWords,
+ outputSecondWordFirstLetterInputIndex, outSuggestionResults);
+ }
+ scoringPolicy->getMostProbableString(traverseSession, languageWeightToOutputSuggestions,
+ outSuggestionResults);
+}
+
+/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
+ const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
+ const DicNode *const terminalDicNode, const float languageWeight,
+ const bool boostExactMatches, const bool forceCommitMultiWords,
+ const bool outputSecondWordFirstLetterInputIndex,
+ SuggestionResults *const outSuggestionResults) {
+ if (DEBUG_GEO_FULL) {
+ terminalDicNode->dump("OUT:");
+ }
+ const float doubleLetterCost =
+ scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
+ const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ + doubleLetterCost;
+ const bool isPossiblyOffensiveWord =
+ traverseSession->getDictionaryStructurePolicy()->getProbability(
+ terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
+ const bool isExactMatch =
+ ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
+ const bool isExactMatchWithIntentionalOmission =
+ ErrorTypeUtils::isExactMatchWithIntentionalOmission(
+ terminalDicNode->getContainedErrorTypes());
+ const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
+ // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
+ // (e.g. "AMD" and "and")
+ const bool isSafeExactMatch = isExactMatch
+ && !(isPossiblyOffensiveWord && isFirstCharUppercase);
+ const int outputTypeFlags =
+ (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
+ | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
+ | (isExactMatchWithIntentionalOmission ?
+ Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
+
+ // Entries that are blacklisted or do not represent a word should not be output.
+ const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
+ // When we have to block offensive words, non-exact matched offensive words should not be
+ // output.
+ const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
+ const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord
+ && !isSafeExactMatch;
+
+ // Increase output score of top typing suggestion to ensure autocorrection.
+ // TODO: Better integration with java side autocorrection logic.
+ const int finalScore = scoringPolicy->calculateFinalScore(
+ compoundDistance, traverseSession->getInputSize(),
+ terminalDicNode->getContainedErrorTypes(),
+ (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
+ || (isValidWord && scoringPolicy->doesAutoCorrectValidWord()),
+ boostExactMatches);
+
+ // Don't output invalid or blocked offensive words. However, we still need to submit their
+ // shortcuts if any.
+ if (isValidWord && !isBlockedOffensiveWord) {
+ int codePoints[MAX_WORD_LENGTH];
+ terminalDicNode->outputResult(codePoints);
+ const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
+ terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0)) :
+ NOT_AN_INDEX;
+ outSuggestionResults->addSuggestion(codePoints,
+ terminalDicNode->getTotalNodeCodePointCount(),
+ finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
+ indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
+ }
+
+ // Output shortcuts.
+ // Shortcut is not supported for multiple words suggestions.
+ // TODO: Check shortcuts during traversal for multiple words suggestions.
+ if (!terminalDicNode->hasMultipleWords()) {
+ BinaryDictionaryShortcutIterator shortcutIt(
+ traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
+ traverseSession->getDictionaryStructurePolicy()
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
+ const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
+ const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
+ scoringPolicy->calculateFinalScore(compoundDistance,
+ traverseSession->getInputSize(),
+ terminalDicNode->getContainedErrorTypes(),
+ true /* forceCommit */, boostExactMatches) : finalScore;
+ outputShortcuts(&shortcutIt, shortcutBaseScore, sameAsTyped, outSuggestionResults);
+ }
+}
+
+/* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
+ const DicNode *const terminalDicNode) {
+ // Get the number of spaces in the first suggestion
+ const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
+ // Get the number of characters in the first suggestion
+ const int length = terminalDicNode->getTotalNodeCodePointCount();
+ // Get the distance for the first word of the suggestion
+ const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
+
+ // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
+ // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
+ // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
+ // we are very confident.
+ // Expected space count is 1 ~ 5
+ static const int MIN_EXPECTED_SPACE_COUNT = 1;
+ static const int MAX_EXPECTED_SPACE_COUNT = 5;
+ // Expected length is about 4 ~ 30
+ static const int MIN_EXPECTED_LENGTH = 4;
+ static const int MAX_EXPECTED_LENGTH = 30;
+ // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
+ static const float MIN_EXPECTED_DISTANCE = 0.0;
+ static const float MAX_EXPECTED_DISTANCE = 2.0;
+ // This is not strict: it's where most stuff will be falling, but it's still fine if it's
+ // outside these values. We want to output a value that reflects all of these. Each factor
+ // contributes a bit.
+
+ // We need at least a space.
+ if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
+
+ // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
+ // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
+ // weight of the distance. Clamp to avoid overflows.
+ const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
+ : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
+ const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
+ * (MAX_EXPECTED_DISTANCE - clampedDistance)
+ / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
+ // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
+ // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
+ // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
+ const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
+ * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
+ // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
+ // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
+ // weight of the space count.
+ const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
+ * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
+ / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
+
+ return distanceContribution + lengthContribution + spaceContribution;
+}
+
+/* static */ void SuggestionsOutputUtils::outputShortcuts(
+ BinaryDictionaryShortcutIterator *const shortcutIt, const int finalScore,
+ const bool sameAsTyped, SuggestionResults *const outSuggestionResults) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ while (shortcutIt->hasNextShortcutTarget()) {
+ bool isWhilelist;
+ int shortcutTargetStringLength;
+ shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetStringLength, &isWhilelist);
+ int shortcutScore;
+ int kind;
+ if (isWhilelist && sameAsTyped) {
+ shortcutScore = S_INT_MAX;
+ kind = Dictionary::KIND_WHITELIST;
+ } else {
+ // shortcut entry's score == its base entry's score - 1
+ shortcutScore = finalScore;
+ // Protection against int underflow
+ shortcutScore = std::max(S_INT_MIN + 1, shortcutScore) - 1;
+ kind = Dictionary::KIND_SHORTCUT;
+ }
+ outSuggestionResults->addSuggestion(shortcutTarget, shortcutTargetStringLength,
+ std::max(S_INT_MIN + 1, shortcutScore) - 1, kind, NOT_AN_INDEX,
+ NOT_A_FIRST_WORD_CONFIDENCE);
+ }
+}
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.h b/native/jni/src/suggest/core/result/suggestions_output_utils.h
new file mode 100644
index 000000000..b099b4776
--- /dev/null
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGESTIONS_OUTPUT_UTILS
+#define LATINIME_SUGGESTIONS_OUTPUT_UTILS
+
+#include "defines.h"
+
+namespace latinime {
+
+class BinaryDictionaryShortcutIterator;
+class DicNode;
+class DicTraverseSession;
+class Scoring;
+class SuggestionResults;
+
+class SuggestionsOutputUtils {
+ public:
+ /**
+ * Outputs the final list of suggestions (i.e., terminal nodes).
+ */
+ static void outputSuggestions(const Scoring *const scoringPolicy,
+ DicTraverseSession *traverseSession, const float languageWeight,
+ SuggestionResults *const outSuggestionResults);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionsOutputUtils);
+
+ // Inputs longer than this will autocorrect if the suggestion is multi-word
+ static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
+
+ static void outputSuggestionsOfDicNode(const Scoring *const scoringPolicy,
+ DicTraverseSession *traverseSession, const DicNode *const terminalDicNode,
+ const float languageWeight, const bool boostExactMatches,
+ const bool forceCommitMultiWords, const bool outputSecondWordFirstLetterInputIndex,
+ SuggestionResults *const outSuggestionResults);
+ static void outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
+ const int finalScore, const bool sameAsTyped,
+ SuggestionResults *const outSuggestionResults);
+ static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
+};
+} // namespace latinime
+#endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 50f2bbd8d..f1e411f38 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -20,6 +20,7 @@
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/session/prev_words_info.h"
namespace latinime {
@@ -28,25 +29,14 @@ namespace latinime {
const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
256 * 1024;
-void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
- int prevWordLength, const SuggestOptions *const suggestOptions) {
+void DicTraverseSession::init(const Dictionary *const dictionary,
+ const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
- if (!prevWord) {
- mPrevWordPos = NOT_A_DICT_POS;
- return;
- }
- // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
- prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPos == NOT_A_DICT_POS) {
- // Check bigrams for lower-cased previous word if original was not found. Useful for
- // auto-capitalized words like "The [current_word]".
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
- prevWord, prevWordLength, true /* forceLowerCaseSearch */);
- }
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(
+ getDictionaryStructurePolicy(), mPrevWordsPtNodePos, true /* tryLowerCaseSearch */);
}
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
@@ -68,7 +58,6 @@ void DicTraverseSession::resetCache(const int thresholdForNextActiveDicNodes, co
mDicNodesCache.reset(thresholdForNextActiveDicNodes /* nextActiveSize */,
maxWords /* terminalSize */);
mMultiBigramMap.clear();
- mPartiallyCommited = false;
}
void DicTraverseSession::initializeProximityInfoStates(const int *const inputCodePoints,
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index e0b1c67d9..5a51a112d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -17,7 +17,6 @@
#ifndef LATINIME_DIC_TRAVERSE_SESSION_H
#define LATINIME_DIC_TRAVERSE_SESSION_H
-#include <stdint.h>
#include <vector>
#include "defines.h"
@@ -30,6 +29,7 @@ namespace latinime {
class Dictionary;
class DictionaryStructureWithBufferPolicy;
+class PrevWordsInfo;
class ProximityInfo;
class SuggestOptions;
@@ -45,32 +45,25 @@ class DicTraverseSession {
dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION);
}
- static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession,
- const Dictionary *const dictionary, const int *prevWord, const int prevWordLength,
- const SuggestOptions *const suggestOptions) {
- if (traverseSession) {
- DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
- tSession->init(dictionary, prevWord, prevWordLength, suggestOptions);
- }
- }
-
static AK_FORCE_INLINE void releaseSessionInstance(DicTraverseSession *traverseSession) {
delete traverseSession;
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
- : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
- mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
- mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
+ : mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr),
+ mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
// NOTE: mProximityInfoStates is an array of instances.
// No need to initialize it explicitly here.
+ for (size_t i = 0; i < NELEMS(mPrevWordsPtNodePos); ++i) {
+ mPrevWordsPtNodePos[i] = NOT_A_DICT_POS;
+ }
}
// Non virtual inline destructor -- never inherit this class
AK_FORCE_INLINE ~DicTraverseSession() {}
- void init(const Dictionary *dictionary, const int *prevWord, int prevWordLength,
+ void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions);
// TODO: Remove and merge into init
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
@@ -86,19 +79,13 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
- int getPrevWordPos() const { return mPrevWordPos; }
- // TODO: REMOVE
- void setPrevWordPos(int pos) { mPrevWordPos = pos; }
- // TODO: Use proper parameter when changed
- int getDicRootPos() const { return 0; }
+ const int *getPrevWordsPtNodePos() const { return mPrevWordsPtNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
return &mProximityInfoStates[id];
}
int getInputSize() const { return mInputSize; }
- void setPartiallyCommited() { mPartiallyCommited = true; }
- bool isPartiallyCommited() const { return mPartiallyCommited; }
bool isOnlyOnePointerUsed(int *pointerId) const {
// Not in the dictionary word
@@ -119,26 +106,13 @@ class DicTraverseSession {
return true;
}
- void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
- for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
- if (!mProximityInfoStates[i].isUsed()) {
- continue;
- }
- const int pointerId = node->getInputIndex(i);
- const std::vector<int> *const searchKeyVector =
- mProximityInfoStates[i].getSearchKeyVector(pointerId);
- outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
- searchKeyVector->end());
- }
- }
-
- ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
+ ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
- const int pointerId = node->getInputIndex(i);
+ const int pointerId = dicNode->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic
@@ -192,7 +166,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
- int mPrevWordPos;
+ int mPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;
@@ -203,7 +177,6 @@ class DicTraverseSession {
ProximityInfoState mProximityInfoStates[MAX_POINTER_COUNT_G];
int mInputSize;
- bool mPartiallyCommited;
int mMaxPointerCount;
/////////////////////////////////
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
new file mode 100644
index 000000000..e350c6996
--- /dev/null
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PREV_WORDS_INFO_H
+#define LATINIME_PREV_WORDS_INFO_H
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
+
+namespace latinime {
+
+// TODO: Support n-gram.
+class PrevWordsInfo {
+ public:
+ // No prev word information.
+ PrevWordsInfo() {
+ clear();
+ }
+
+ PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
+ mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from previous words.
+ PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
+ const size_t prevWordCount) {
+ clear();
+ for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) {
+ if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
+ continue;
+ }
+ memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
+ mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
+ mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from a previous word.
+ PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
+ const bool isBeginningOfSentence) {
+ clear();
+ if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
+ return;
+ }
+ memmove(mPrevWordCodePoints[0], prevWordCodePoints,
+ sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
+ mPrevWordCodePointCount[0] = prevWordCodePointCount;
+ mIsBeginningOfSentence[0] = isBeginningOfSentence;
+ }
+
+ bool isValid() const {
+ if (mPrevWordCodePointCount[0] > 0) {
+ return true;
+ }
+ if (mIsBeginningOfSentence[0]) {
+ return true;
+ }
+ return false;
+ }
+
+ void getPrevWordsTerminalPtNodePos(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
+ mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
+ mIsBeginningOfSentence[i], tryLowerCaseSearch);
+ }
+ }
+
+ BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
+ const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch(
+ dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
+ mIsBeginningOfSentence[0]);
+ return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
+ bigramListPos);
+ }
+
+ // n is 1-indexed.
+ const int *getNthPrevWordCodePoints(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return nullptr;
+ }
+ return mPrevWordCodePoints[n - 1];
+ }
+
+ // n is 1-indexed.
+ int getNthPrevWordCodePointCount(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return 0;
+ }
+ return mPrevWordCodePointCount[n - 1];
+ }
+
+ // n is 1-indexed.
+ bool isNthPrevWordBeginningOfSentence(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return false;
+ }
+ return mIsBeginningOfSentence[n - 1];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+
+ static int getTerminalPtNodePosOfWord(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
+ return NOT_A_DICT_POS;
+ }
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
+ const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ codePoints, codePointCount, false /* forceLowerCaseSearch */);
+ if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
+ // Return the position when when the word was found or doesn't try lower case
+ // search.
+ return wordPtNodePos;
+ }
+ // Check bigrams for lower-cased previous word if original was not found. Useful for
+ // auto-capitalized words like "The [current_word]".
+ return dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ codePoints, codePointCount, true /* forceLowerCaseSearch */);
+ }
+
+ static int getBigramListPositionForWordWithTryingLowerCaseSearch(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence) {
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
+ return NOT_A_DICT_POS;
+ }
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
+ codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
+ int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+ codePointCount, false /* forceLowerCaseSearch */);
+ // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
+ // dictionary or has no bigrams
+ if (NOT_A_DICT_POS == pos) {
+ // If no bigrams for this exact word, search again in lower case.
+ pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
+ codePointCount, true /* forceLowerCaseSearch */);
+ }
+ return pos;
+ }
+
+ static int getBigramListPositionForWord(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *wordCodePoints, const int wordCodePointCount,
+ const bool forceLowerCaseSearch) {
+ if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
+ const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
+ if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
+ return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
+ }
+
+ void clear() {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = 0;
+ mIsBeginningOfSentence[i] = false;
+ }
+ }
+
+ int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+ int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+};
+} // namespace latinime
+#endif // LATINIME_PREV_WORDS_INFO_H
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 73ccebc88..0cd305f5a 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -19,23 +19,19 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/dictionary/shortcut_utils.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/policy/scoring.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/policy/weighting.h"
+#include "suggest/core/result/suggestions_output_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
// Initialization of class constants.
-const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
-const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
/**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -46,10 +42,10 @@ const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
* automatically activated for sequential calls that share the same starting input.
* TODO: Stop detecting continuous suggestion. Start using traverseSession instead.
*/
-int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
+void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices,
- int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
+ int inputSize, const float languageWeight,
+ SuggestionResults *const outSuggestionResults) const {
PROF_OPEN;
PROF_START(0);
const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
@@ -58,7 +54,7 @@ int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
// TODO: Add the way to evaluate cache
- initializeSearch(tSession, commitPoint);
+ initializeSearch(tSession);
PROF_END(0);
PROF_START(1);
@@ -70,247 +66,38 @@ int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
}
PROF_END(1);
PROF_START(2);
- const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes,
- outputAutoCommitFirstWordConfidence);
+ SuggestionsOutputUtils::outputSuggestions(
+ SCORING, tSession, languageWeight, outSuggestionResults);
PROF_END(2);
PROF_CLOSE;
- return size;
}
/**
* Initializes the search at the root of the lexicon trie. Note that when possible the search will
* continue suggestion from where it left off during the last call.
*/
-void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const {
+void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
if (!traverseSession->getProximityInfoState(0)->isUsed()) {
return;
}
- // Never auto partial commit for now.
- commitPoint = 0;
-
if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
&& traverseSession->isContinuousSuggestionPossible()) {
- if (commitPoint == 0) {
- // Continue suggestion
- traverseSession->getDicTraverseCache()->continueSearch();
- } else {
- // Continue suggestion after partial commit.
- DicNode *topDicNode =
- traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
- traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
- traverseSession->getDicTraverseCache()->continueSearch();
- traverseSession->setPartiallyCommited();
- }
+ // Continue suggestion
+ traverseSession->getDicTraverseCache()->continueSearch();
} else {
// Restart recognition at the root.
traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
- MAX_RESULTS);
+ TRAVERSAL->getTerminalCacheSize());
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
- traverseSession->getPrevWordPos(), &rootNode);
+ traverseSession->getPrevWordsPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}
/**
- * Outputs the final list of suggestions (i.e., terminal nodes).
- */
-int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
- int *outputAutoCommitFirstWordConfidence) const {
-#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
- const int terminalSize = 0;
-#else
- const int terminalSize = min(MAX_RESULTS,
- static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
-#endif
- DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
-
- for (int index = terminalSize - 1; index >= 0; --index) {
- traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
- }
-
- const float languageWeight = SCORING->getAdjustedLanguageWeight(
- traverseSession, terminals, terminalSize);
-
- int outputWordIndex = 0;
- // Insert most probable word at index == 0 as long as there is one terminal at least
- const bool hasMostProbableString =
- SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
- &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
- if (hasMostProbableString) {
- outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
- ++outputWordIndex;
- }
-
- // Initial value of the loop index for terminal nodes (words)
- int doubleLetterTerminalIndex = -1;
- DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
- SCORING->searchWordWithDoubleLetter(terminals, terminalSize,
- &doubleLetterTerminalIndex, &doubleLetterLevel);
-
- int maxScore = S_INT_MIN;
- // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
- // a long multiple words suggestion.
- // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
- // traverseSession->isPartiallyCommited() always returns false because we never auto partial
- // commit for now.
- const bool forceCommitMultiWords = (terminalSize > 0) ?
- TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
- && (traverseSession->isPartiallyCommited()
- || (traverseSession->getInputSize()
- >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
- && terminals[0].hasMultipleWords())) : false;
- // TODO: have partial commit work even with multiple pointers.
- const bool outputSecondWordFirstLetterInputIndex =
- traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
- if (terminalSize > 0) {
- // If we have no suggestions, don't write this
- outputAutoCommitFirstWordConfidence[0] =
- computeFirstWordConfidence(&terminals[0]);
- }
-
- // Output suggestion results here
- for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
- ++terminalIndex) {
- DicNode *terminalDicNode = &terminals[terminalIndex];
- if (DEBUG_GEO_FULL) {
- terminalDicNode->dump("OUT:");
- }
- const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost(
- terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
- const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
- + doubleLetterCost;
- const bool isPossiblyOffensiveWord =
- traverseSession->getDictionaryStructurePolicy()->getProbability(
- terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
- const bool isExactMatch = terminalDicNode->isExactMatch();
- const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
- // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
- // (e.g. "AMD" and "and")
- const bool isSafeExactMatch = isExactMatch
- && !(isPossiblyOffensiveWord && isFirstCharUppercase);
- const int outputTypeFlags =
- (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
- | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
-
- // Entries that are blacklisted or do not represent a word should not be output.
- const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
-
- // Increase output score of top typing suggestion to ensure autocorrection.
- // TODO: Better integration with java side autocorrection logic.
- const int finalScore = SCORING->calculateFinalScore(
- compoundDistance, traverseSession->getInputSize(),
- terminalDicNode->isExactMatch()
- || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
- || (isValidWord && SCORING->doesAutoCorrectValidWord()));
- if (maxScore < finalScore && isValidWord) {
- maxScore = finalScore;
- }
-
- // Don't output invalid words. However, we still need to submit their shortcuts if any.
- if (isValidWord) {
- outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
- frequencies[outputWordIndex] = finalScore;
- if (outputSecondWordFirstLetterInputIndex) {
- outputIndicesToPartialCommit[outputWordIndex] =
- terminalDicNode->getSecondWordFirstInputIndex(
- traverseSession->getProximityInfoState(0));
- } else {
- outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
- }
- // Populate the outputChars array with the suggested word.
- const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
- terminalDicNode->outputResult(&outputCodePoints[startIndex]);
- ++outputWordIndex;
- }
-
- if (!terminalDicNode->hasMultipleWords()) {
- BinaryDictionaryShortcutIterator shortcutIt(
- traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
- traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
- // Shortcut is not supported for multiple words suggestions.
- // TODO: Check shortcuts during traversal for multiple words suggestions.
- const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
- outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes,
- sameAsTyped);
- const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
- traverseSession->getProximityInfoState(0));
- for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
- if (outputSecondWordFirstLetterInputIndex) {
- outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
- } else {
- outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
- }
- }
- outputWordIndex = updatedOutputWordIndex;
- }
- DicNode::managedDelete(terminalDicNode);
- }
-
- if (hasMostProbableString) {
- SCORING->safetyNetForMostProbableString(terminalSize, maxScore,
- &outputCodePoints[0], &frequencies[0]);
- }
- return outputWordIndex;
-}
-
-int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
- // Get the number of spaces in the first suggestion
- const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
- // Get the number of characters in the first suggestion
- const int length = terminalDicNode->getTotalNodeCodePointCount();
- // Get the distance for the first word of the suggestion
- const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
-
- // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
- // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
- // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
- // we are very confident.
- // Expected space count is 1 ~ 5
- static const int MIN_EXPECTED_SPACE_COUNT = 1;
- static const int MAX_EXPECTED_SPACE_COUNT = 5;
- // Expected length is about 4 ~ 30
- static const int MIN_EXPECTED_LENGTH = 4;
- static const int MAX_EXPECTED_LENGTH = 30;
- // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
- static const float MIN_EXPECTED_DISTANCE = 0.0;
- static const float MAX_EXPECTED_DISTANCE = 2.0;
- // This is not strict: it's where most stuff will be falling, but it's still fine if it's
- // outside these values. We want to output a value that reflects all of these. Each factor
- // contributes a bit.
-
- // We need at least a space.
- if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
-
- // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
- // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
- // weight of the distance. Clamp to avoid overflows.
- const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
- : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
- const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
- * (MAX_EXPECTED_DISTANCE - clampedDistance)
- / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
- // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
- // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
- // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
- const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
- * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
- // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
- // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
- // weight of the space count.
- const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
- * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
- / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
-
- return distanceContribution + lengthContribution + spaceContribution;
-}
-
-/**
* Expands the dicNodes in the current search priority queue by advancing to the possible child
* nodes based on the next touch point(s) (or no touch points for lookahead)
*/
@@ -421,15 +208,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
break;
case UNRELATED_CHAR:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
default:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
}
}
- // Push the node for look-ahead correction
+ // Push the dicNode for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
}
@@ -442,15 +229,17 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return;
}
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
return;
}
+ if (!dicNode->hasMatchedOrProximityCodePoints()) {
+ return;
+ }
// Create a non-cached node here.
- DicNode terminalDicNode;
- DicNodeUtils::initByCopy(dicNode, &terminalDicNode);
+ DicNode terminalDicNode(*dicNode);
if (TRAVERSAL->needsToTraverseAllUserInput()
&& dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
@@ -463,7 +252,7 @@ void Suggest::processTerminalDicNode(
/**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
- * (by the space omission error correction) search path if input dicNode is on a terminal node.
+ * (by the space omission error correction) search path if input dicNode is on a terminal.
*/
void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
@@ -478,7 +267,6 @@ void Suggest::processExpandedDicNode(
traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
}
}
- DicNode::managedDelete(dicNode);
}
void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
@@ -505,7 +293,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode);
}
-// Process the node codepoint as a digraph. This means that composite glyphs like the German
+// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@@ -518,7 +306,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any
- * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
+ * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions.
*/
@@ -572,6 +360,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
DicNode *dicNode) const {
const int16_t pointIndex = dicNode->getInputIndex(0);
DicNodeVector childDicNodes1;
+ DicNodeVector childDicNodes2;
DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
&childDicNodes1);
const int childSize1 = childDicNodes1.getSizeAndLock();
@@ -583,7 +372,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
continue;
}
if (childDicNodes1[i]->hasChildren()) {
- DicNodeVector childDicNodes2;
+ childDicNodes2.clear();
DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
const int childSize2 = childDicNodes2.getSizeAndLock();
@@ -600,12 +389,11 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode2);
}
}
- DicNode::managedDelete(childDicNodes1[i]);
}
}
/**
- * Weight child node by aligning it to the key
+ * Weight child dicNode by aligning it to the key
*/
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize();
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index b20343d29..788e0314b 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -36,37 +36,30 @@ class DicNode;
class DicTraverseSession;
class ProximityInfo;
class Scoring;
+class SuggestionResults;
class Traversal;
class Weighting;
class Suggest : public SuggestInterface {
public:
AK_FORCE_INLINE Suggest(const SuggestPolicy *const suggestPolicy)
- : TRAVERSAL(suggestPolicy ? suggestPolicy->getTraversal() : 0),
- SCORING(suggestPolicy ? suggestPolicy->getScoring() : 0),
- WEIGHTING(suggestPolicy ? suggestPolicy->getWeighting() : 0) {}
+ : TRAVERSAL(suggestPolicy ? suggestPolicy->getTraversal() : nullptr),
+ SCORING(suggestPolicy ? suggestPolicy->getScoring() : nullptr),
+ WEIGHTING(suggestPolicy ? suggestPolicy->getWeighting() : nullptr) {}
AK_FORCE_INLINE virtual ~Suggest() {}
- int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
- int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint,
- int *outWords, int *frequencies, int *outputIndices, int *outputTypes,
- int *outputAutoCommitFirstWordConfidence) const;
+ void getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
+ int *times, int *pointerIds, int *inputCodePoints, int inputSize,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest);
void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
const bool spaceSubstitution) const;
- int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
- int *outputAutoCommitFirstWordConfidence) const;
- int computeFirstWordConfidence(const DicNode *const terminalDicNode) const;
- void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
+ void initializeSearch(DicTraverseSession *traverseSession) const;
void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void processExpandedDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
- float getAutocorrectScore(DicTraverseSession *traverseSession, DicNode *dicNode) const;
- void generateFeatures(
- DicTraverseSession *traverseSession, DicNode *dicNode, float *features) const;
void processDicNodeAsOmission(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void processDicNodeAsDigraph(DicTraverseSession *traverseSession, DicNode *dicNode) const;
void processDicNodeAsTransposition(DicTraverseSession *traverseSession,
@@ -79,13 +72,8 @@ class Suggest : public SuggestInterface {
void processDicNodeAsMatch(DicTraverseSession *traverseSession,
DicNode *childDicNode) const;
- // Inputs longer than this will autocorrect if the suggestion is multi-word
- static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
static const int MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE;
- // Threshold for autocorrection classifier
- static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
-
const Traversal *const TRAVERSAL;
const Scoring *const SCORING;
const Weighting *const WEIGHTING;
diff --git a/native/jni/src/suggest/core/suggest_interface.h b/native/jni/src/suggest/core/suggest_interface.h
index 4deb4d924..a6e5aefae 100644
--- a/native/jni/src/suggest/core/suggest_interface.h
+++ b/native/jni/src/suggest/core/suggest_interface.h
@@ -22,13 +22,13 @@
namespace latinime {
class ProximityInfo;
+class SuggestionResults;
class SuggestInterface {
public:
- virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
+ virtual void getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize,
- int commitPoint, int *outWords, int *frequencies, int *outputIndices,
- int *outputTypes, int *outputAutoCommitFirstWordConfidence) const = 0;
+ const float languageWeight, SuggestionResults *const suggestionResults) const = 0;
SuggestInterface() {}
virtual ~SuggestInterface() {}
private:
diff --git a/native/jni/src/suggest/core/suggest_options.h b/native/jni/src/suggest/core/suggest_options.h
index 1b21aafcf..2e22a7ac3 100644
--- a/native/jni/src/suggest/core/suggest_options.h
+++ b/native/jni/src/suggest/core/suggest_options.h
@@ -34,6 +34,10 @@ class SuggestOptions{
return getBoolOption(USE_FULL_EDIT_DISTANCE);
}
+ AK_FORCE_INLINE bool blockOffensiveWords() const {
+ return getBoolOption(BLOCK_OFFENSIVE_WORDS);
+ }
+
AK_FORCE_INLINE bool getAdditionalFeaturesBoolOption(const int key) const {
return getBoolOption(key + ADDITIONAL_FEATURES_OPTIONS);
}
@@ -45,9 +49,10 @@ class SuggestOptions{
// reorder options.
static const int IS_GESTURE = 0;
static const int USE_FULL_EDIT_DISTANCE = 1;
+ static const int BLOCK_OFFENSIVE_WORDS = 2;
// Additional features options are stored after the other options and used as setting values of
// experimental features.
- static const int ADDITIONAL_FEATURES_OPTIONS = 2;
+ static const int ADDITIONAL_FEATURES_OPTIONS = 3;
const int *const mOptions;
const int mLength;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
deleted file mode 100644
index 1926b9831..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
- 0x30;
-const BigramListReadWriteUtils::BigramFlags
- BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-const BigramListReadWriteUtils::BigramFlags
- BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-const BigramListReadWriteUtils::BigramFlags
- BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-const BigramListReadWriteUtils::BigramFlags
- BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-// Flag for presence of more attributes
-const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
- 0x80;
-// Mask for attribute probability, stored on 4 bits inside the flags byte.
-const BigramListReadWriteUtils::BigramFlags
- BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-
-/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
- int *const outTargetPtNodePos, int *const bigramEntryPos) {
- const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
- bigramEntryPos);
- if (outBigramFlags) {
- *outBigramFlags = bigramFlags;
- }
- const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
- bigramEntryPos);
- if (outTargetPtNodePos) {
- *outTargetPtNodePos = targetPos;
- }
-}
-
-/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
- int *const bigramListPos) {
- BigramFlags flags;
- do {
- getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
- bigramListPos);
- } while(hasNext(flags));
-}
-
-/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
- const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
- int offset = 0;
- const int origin = *pos;
- switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
- offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
- offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos);
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
- offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
- break;
- }
- if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
- return NOT_A_DICT_POS;
- } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
- return origin;
- }
- if (isOffsetNegative(flags)) {
- return origin - offset;
- } else {
- return origin + offset;
- }
-}
-
-/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
- BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
- const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
- int readingPos = entryPos;
- if (usesAdditionalBuffer) {
- readingPos -= buffer->getOriginalBufferSize();
- }
- BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
- buffer->getBuffer(usesAdditionalBuffer), &readingPos);
- if (hasNext) {
- bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
- } else {
- bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
- }
- int writingPos = entryPos;
- return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
-}
-
-/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
- BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
- const bool hasNext, int *const writingPos) {
- BigramFlags flags;
- if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
- return false;
- }
- return writeBigramEntry(buffer, flags, targetPos, writingPos);
-}
-
-/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
- BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
- const int targetPtNodePos, int *const writingPos) {
- const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
- const BigramFlags flagsToWrite = (offset < 0) ?
- (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
- if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
- return false;
- }
- const uint32_t absOffest = abs(offset);
- const int bigramTargetFieldSize = attributeAddressSize(flags);
- return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
- writingPos);
-}
-
-// Returns true if the bigram entry is valid and put entry flags into out*.
-/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
- const int targetPtNodePos, const int probability, const bool hasNext,
- BigramFlags *const outBigramFlags) {
- BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
- if (hasNext) {
- flags |= FLAG_ATTRIBUTE_HAS_NEXT;
- }
- const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
- if (offset < 0) {
- flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- }
- const uint32_t absOffest = abs(offset);
- if ((absOffest >> 24) != 0) {
- // Offset is too large.
- return false;
- } else if ((absOffest >> 16) != 0) {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- } else if ((absOffest >> 8) != 0) {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- } else {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- }
- // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
- // writing.
- // TODO: Remove following 2 lines and optimize memory space.
- flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- *outBigramFlags = flags;
- return true;
-}
-
-/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
- const int entryPos) {
- if (targetPtNodePos == NOT_A_DICT_POS) {
- return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
- } else {
- const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
- if (offset == 0) {
- return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
- } else {
- return offset;
- }
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
deleted file mode 100644
index b1170e251..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
-const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
-
-void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *bigramEntryPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int originalBigramPos;
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
- &originalBigramPos, bigramEntryPos);
- if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
- *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
- if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
- // This bigram is too weak to output.
- *outBigramPos = NOT_A_DICT_POS;
- } else {
- *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- }
- if (usesAdditionalBuffer) {
- *bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
-}
-
-void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos += mBuffer->getOriginalBufferSize();
- }
-}
-
-bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
- int *const fromPos, int *const toPos, int *const outBigramsCount) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
- if (usesAdditionalBuffer) {
- *fromPos -= mBuffer->getOriginalBufferSize();
- }
- *outBigramsCount = 0;
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- int lastWrittenEntryPos = NOT_A_DICT_POS;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- // The buffer address can be changed after calling buffer writing methods.
- int originalBigramPos;
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
- fromPos);
- if (originalBigramPos == NOT_A_DICT_POS) {
- // skip invalid bigram entry.
- continue;
- }
- if (usesAdditionalBuffer) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- if (bigramPos == NOT_A_DICT_POS) {
- // Target PtNode has been invalidated.
- continue;
- }
- lastWrittenEntryPos = *toPos;
- if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
- BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
- return false;
- }
- (*outBigramsCount)++;
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- // Makes the last entry the terminal of the list. Updates the flags.
- if (lastWrittenEntryPos != NOT_A_DICT_POS) {
- if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
- lastWrittenEntryPos)) {
- return false;
- }
- }
- if (usesAdditionalBuffer) {
- *fromPos += mBuffer->getOriginalBufferSize();
- }
- return true;
-}
-
-// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
-// has been deleted or is not a valid terminal.
-bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
- int *const bigramListPos, int *const outValidBigramEntryCount) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int bigramEntryPos = *bigramListPos;
- int originalBigramPos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
- bigramListPos);
- if (usesAdditionalBuffer) {
- bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
- if (originalBigramPos == NOT_A_DICT_POS) {
- // This entry has already been removed.
- continue;
- }
- if (usesAdditionalBuffer) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- const int bigramTargetNodePos =
- followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
- if (nodeReader.isDeleted() || !nodeReader.isTerminal()
- || bigramTargetNodePos == NOT_A_DICT_POS) {
- // The target is no longer valid terminal. Invalidate the current bigram entry.
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
- return false;
- }
- continue;
- }
- bool isRemoved = false;
- if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
- &isRemoved)) {
- return false;
- }
- if (!isRemoved) {
- (*outValidBigramEntryCount) += 1;
- }
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- return true;
-}
-
-// Updates bigram target PtNode positions in the list after the placing step in GC.
-bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
- const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
- ptNodePositionRelocationMap, int *const outBigramEntryCount) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int bigramEntryPos = *bigramListPos;
- if (usesAdditionalBuffer) {
- bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
- int bigramTargetPtNodePos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
- bigramListPos);
- if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
- continue;
- }
- if (usesAdditionalBuffer) {
- bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
- }
-
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
- ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
- if (it != ptNodePositionRelocationMap->end()) {
- bigramTargetPtNodePos = it->second;
- } else {
- bigramTargetPtNodePos = NOT_A_DICT_POS;
- }
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- bigramTargetPtNodePos, &bigramEntryPos)) {
- return false;
- }
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- (*outBigramEntryCount) = bigramEntryCount;
- return true;
-}
-
-bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
- const int probability, int *const bigramListPos, bool *const outAddedNewBigram) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int entryPos = *bigramListPos;
- if (usesAdditionalBuffer) {
- entryPos += mBuffer->getOriginalBufferSize();
- }
- int originalBigramPos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
- bigramListPos);
- if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
- // Update this bigram entry.
- *outAddedNewBigram = false;
- const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
- bigramFlags);
- const int probabilityToWrite = mIsDecayingDict ?
- ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
- probability) : probability;
- const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
- probabilityToWrite);
- return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
- originalBigramPos, &entryPos);
- }
- if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
- continue;
- }
- // The current last entry is found.
- // First, update the flags of the last entry.
- if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
- *outAddedNewBigram = false;
- return false;
- }
- if (usesAdditionalBuffer) {
- *bigramListPos += mBuffer->getOriginalBufferSize();
- }
- // Then, add a new entry after the last entry.
- *outAddedNewBigram = true;
- return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- // We return directly from the while loop.
- ASSERT(false);
- return false;
-}
-
-bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
- int *const writingPos) {
- // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
- const int probabilityToWrite = mIsDecayingDict ?
- ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
- probability;
- return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
- probabilityToWrite, false /* hasNext */, writingPos);
-}
-
-bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
- int pos = bigramListPos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int bigramEntryPos = pos;
- int originalBigramPos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
- if (usesAdditionalBuffer) {
- bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
- if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- if (bigramPos != bigramTargetPos) {
- continue;
- }
- // Target entry is found. Write an invalid target position to mark the bigram invalid.
- return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- return false;
-}
-
-int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
- const int originalBigramPos) const {
- if (originalBigramPos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- int currentPos = originalBigramPos;
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
- int bigramLinkCount = 0;
- while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
- currentPos = nodeReader.getBigramLinkedNodePos();
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
- bigramLinkCount++;
- if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
- AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
- ASSERT(false);
- return NOT_A_DICT_POS;
- }
- }
- return currentPos;
-}
-
-bool DynamicBigramListPolicy::updateProbabilityForDecay(
- const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
- int *const bigramEntryPos, bool *const outRemoved) const {
- *outRemoved = false;
- if (mIsDecayingDict) {
- // Update bigram probability for decaying.
- const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
- if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
- // Write new probability.
- const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
- BigramListReadWriteUtils::setProbabilityInFlags(
- bigramFlags, newProbability);
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
- targetPtNodePos, bigramEntryPos)) {
- return false;
- }
- } else {
- // Remove current bigram entry.
- *outRemoved = true;
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
- return false;
- }
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
deleted file mode 100644
index 0504b59d5..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
-#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryHeaderStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This is a dynamic version of BigramListPolicy and supports an additional buffer.
- */
-class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
- public:
- DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
- BufferWithExtendableBuffer *const buffer,
- const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
- const bool isDecayingDict)
- : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
- mIsDecayingDict(isDecayingDict) {}
-
- ~DynamicBigramListPolicy() {}
-
- void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
- int *const bigramEntryPos) const;
-
- void skipAllBigrams(int *const bigramListPos) const;
-
- // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
- // bufferToWrite and advance these positions after bigram lists. This method skips invalid
- // bigram entries and write the valid bigram entry count to outBigramsCount.
- bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
- int *const toPos, int *const outBigramsCount) const;
-
- bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos,
- int *const outBigramEntryCount);
-
- bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
- const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
- ptNodePositionRelocationMap, int *const outValidBigramEntryCount);
-
- bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
- int *const bigramListPos, bool *const outAddedNewBigram);
-
- bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
- int *const writingPos);
-
- // Return whether or not targetBigramPos is found.
- bool removeBigram(const int bigramListPos, const int bigramTargetPos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
-
- static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
- static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
-
- const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
- BufferWithExtendableBuffer *const mBuffer;
- const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
- const bool mIsDecayingDict;
-
- // Follow bigram link and return the position of bigram target PtNode that is currently valid.
- int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
-
- bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
- const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
-};
-} // namespace latinime
-#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
deleted file mode 100644
index ff80dd2f6..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
- const int size, const bool isUpdatable) {
- // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
- // impl classes of DictionaryStructureWithBufferPolicy.
- const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
- isUpdatable);
- if (!mmapedBuffer) {
- return 0;
- }
- switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
- mmapedBuffer->getBufferSize())) {
- case FormatUtils::VERSION_2:
- return new PatriciaTriePolicy(mmapedBuffer);
- case FormatUtils::VERSION_3:
- return new DynamicPatriciaTriePolicy(mmapedBuffer);
- default:
- AKLOGE("DICT: dictionary format is unknown, bad magic number");
- delete mmapedBuffer;
- ASSERT(false);
- return 0;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
deleted file mode 100644
index 5724c5d88..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-bool DynamicPatriciaTrieGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
- // children.
- bool isUselessPtNode = !node->isTerminal();
- if (node->isTerminal() && mIsDecayingDict) {
- const int newProbability =
- ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
- mHeaderPolicy);
- int writingPos = node->getProbabilityFieldPos();
- // Update probability.
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
- mBuffer, newProbability, &writingPos)) {
- return false;
- }
- if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
- isUselessPtNode = true;
- }
- }
- if (mChildrenValue > 0) {
- isUselessPtNode = false;
- } else if (node->isTerminal()) {
- // Remove children as all children are useless.
- int writingPos = node->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
- mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
- return false;
- }
- }
- if (isUselessPtNode) {
- // Current PtNode is no longer needed. Mark it as deleted.
- if (!mWritingHelper->markNodeAsDeleted(node)) {
- return false;
- }
- } else {
- mValueStack.back() += 1;
- if (node->isTerminal()) {
- mValidUnigramCount += 1;
- }
- }
- return true;
-}
-
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (!node->isDeleted()) {
- int pos = node->getBigramsPos();
- if (pos != NOT_A_DICT_POS) {
- int bigramEntryCount = 0;
- if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
- &bigramEntryCount)) {
- return false;
- }
- mValidBigramEntryCount += bigramEntryCount;
- }
- }
- return true;
-}
-
-// Writes dummy PtNode array size when the head of PtNode array is read.
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onDescend(const int ptNodeArrayPos) {
- mValidPtNodeCount = 0;
- int writingPos = mBufferToWrite->getTailPosition();
- mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
- DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
- ptNodeArrayPos, writingPos));
- // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
- // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
- mPtNodeArraySizeFieldPos = writingPos;
- return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
- mBufferToWrite, 0 /* arraySize */, &writingPos);
-}
-
-// Write PtNode array terminal and actual PtNode array size.
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onReadingPtNodeArrayTail() {
- int writingPos = mBufferToWrite->getTailPosition();
- // Write PtNode array terminal.
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
- mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Write actual PtNode array size.
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
- mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
- return false;
- }
- return true;
-}
-
-// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (node->isDeleted()) {
- // Current PtNode is not written in new buffer because it has been deleted.
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), NOT_A_DICT_POS));
- return true;
- }
- int writingPos = mBufferToWrite->getTailPosition();
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), writingPos));
- mValidPtNodeCount++;
- // Writes current PtNode.
- return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
- node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
- node->getProbability(), &writingPos);
-}
-
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- // Updates parent position.
- int parentPos = node->getParentPos();
- if (parentPos != NOT_A_DICT_POS) {
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
- if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
- parentPos = it->second;
- }
- }
- int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- // Write updated parent offset.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
- parentPos, node->getHeadPos(), &writingPos)) {
- return false;
- }
-
- // Updates children position.
- int childrenPos = node->getChildrenPos();
- if (childrenPos != NOT_A_DICT_POS) {
- DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
- mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
- if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
- childrenPos = it->second;
- }
- }
- writingPos = node->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
- childrenPos, &writingPos)) {
- return false;
- }
-
- // Updates bigram target PtNode positions in the bigram list.
- int bigramsPos = node->getBigramsPos();
- if (bigramsPos != NOT_A_DICT_POS) {
- int bigramEntryCount;
- if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
- &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
- return false;
- }
- mBigramCount += bigramEntryCount;
- }
- if (node->isTerminal()) {
- mUnigramCount++;
- }
-
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
deleted file mode 100644
index 2fa3111d3..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
- ptNodePos, mBuffer->getTailPosition());
- ASSERT(false);
- invalidatePtNodeInfo();
- return;
- }
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- int pos = ptNodePos;
- mHeadPos = ptNodePos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPosOffset =
- DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
- &pos);
- mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
- if (outCodePoints != 0) {
- mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
- } else {
- mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
- }
- if (isTerminal()) {
- mProbabilityFieldPos = pos;
- if (usesAdditionalBuffer) {
- mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
- }
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
- } else {
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- }
- mChildrenPosFieldPos = pos;
- if (usesAdditionalBuffer) {
- mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
- }
- mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, &pos);
- if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
- mChildrenPos += mBuffer->getOriginalBufferSize();
- }
- if (mSiblingPos == NOT_A_DICT_POS) {
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- mBigramLinkedNodePos = mChildrenPos;
- } else {
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- }
- }
- if (usesAdditionalBuffer) {
- pos += mBuffer->getOriginalBufferSize();
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
- mShortcutPos = pos;
- mShortcutsPolicy->skipAllShortcuts(&pos);
- } else {
- mShortcutPos = NOT_A_DICT_POS;
- }
- if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
- mBigramPos = pos;
- mBigramsPolicy->skipAllBigrams(&pos);
- } else {
- mBigramPos = NOT_A_DICT_POS;
- }
- // Update siblingPos if needed.
- if (mSiblingPos == NOT_A_DICT_POS) {
- // Sibling position is the tail position of current node.
- mSiblingPos = pos;
- }
- // Read destination node if the read node is a moved node.
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- // The destination position is stored at the same place as the parent position.
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
- outCodePoints);
- }
-}
-
-void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
- mHeadPos = NOT_A_DICT_POS;
- mFlags = 0;
- mParentPos = NOT_A_DICT_POS;
- mCodePointCount = 0;
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- mChildrenPosFieldPos = NOT_A_DICT_POS;
- mChildrenPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- mShortcutPos = NOT_A_DICT_POS;
- mBigramPos = NOT_A_DICT_POS;
- mSiblingPos = NOT_A_DICT_POS;
-}
-
-}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
deleted file mode 100644
index 3b36d425f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryBigramsStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
- * node and reads node attributes.
- */
-class DynamicPatriciaTrieNodeReader {
- public:
- DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
- mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
- mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
- mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_DICT_POS) {}
-
- ~DynamicPatriciaTrieNodeReader() {}
-
- // Reads PtNode information from dictionary buffer and updates members with the information.
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
- fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
- 0 /* maxCodePointCount */, 0 /* outCodePoints */);
- }
-
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- mSiblingPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
- }
-
- // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
- AK_FORCE_INLINE int getHeadPos() const {
- return mHeadPos;
- }
-
- // Flags
- AK_FORCE_INLINE bool isDeleted() const {
- return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
- }
-
- AK_FORCE_INLINE bool hasChildren() const {
- return mChildrenPos != NOT_A_DICT_POS;
- }
-
- AK_FORCE_INLINE bool isTerminal() const {
- return PatriciaTrieReadingUtils::isTerminal(mFlags);
- }
-
- AK_FORCE_INLINE bool isBlacklisted() const {
- return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
- }
-
- AK_FORCE_INLINE bool isNotAWord() const {
- return PatriciaTrieReadingUtils::isNotAWord(mFlags);
- }
-
- // Parent node position
- AK_FORCE_INLINE int getParentPos() const {
- return mParentPos;
- }
-
- // Number of code points
- AK_FORCE_INLINE uint8_t getCodePointCount() const {
- return mCodePointCount;
- }
-
- // Probability
- AK_FORCE_INLINE int getProbabilityFieldPos() const {
- return mProbabilityFieldPos;
- }
-
- AK_FORCE_INLINE int getProbability() const {
- return mProbability;
- }
-
- // Children PtNode array position
- AK_FORCE_INLINE int getChildrenPosFieldPos() const {
- return mChildrenPosFieldPos;
- }
-
- AK_FORCE_INLINE int getChildrenPos() const {
- return mChildrenPos;
- }
-
- // Bigram linked node position.
- AK_FORCE_INLINE int getBigramLinkedNodePos() const {
- return mBigramLinkedNodePos;
- }
-
- // Shortcutlist position
- AK_FORCE_INLINE int getShortcutPos() const {
- return mShortcutPos;
- }
-
- // Bigrams position
- AK_FORCE_INLINE int getBigramsPos() const {
- return mBigramPos;
- }
-
- // Sibling node position
- AK_FORCE_INLINE int getSiblingNodePos() const {
- return mSiblingPos;
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
-
- const BufferWithExtendableBuffer *const mBuffer;
- const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
- const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- int mHeadPos;
- DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
- int mParentPos;
- uint8_t mCodePointCount;
- int mProbabilityFieldPos;
- int mProbability;
- int mChildrenPosFieldPos;
- int mChildrenPos;
- int mBigramLinkedNodePos;
- int mShortcutPos;
- int mBigramPos;
- int mSiblingPos;
-
- void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
- const int maxCodePointCount, int *const outCodePoints);
-
- void invalidatePtNodeInfo();
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
deleted file mode 100644
index 495b146c2..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-
-#include <cstdio>
-#include <cstring>
-#include <ctime>
-
-#include "defines.h"
-#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
-
-namespace latinime {
-
-// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and
-// BinaryDictionaryDecayingTests.
-const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
- "SET_NEEDS_TO_DECAY_FOR_TESTING";
-const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
-const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
- DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-
-void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
- DicNodeVector *const childDicNodes) const {
- if (!dicNode->hasChildren()) {
- return;
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- while (!readingHelper.isEnd()) {
- bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted();
- if (isTerminal && mHeaderPolicy.isDecayingDict()) {
- // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
- // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
- // valid terminal DicNode.
- isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY)
- != NOT_A_PROBABILITY;
- }
- childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
- nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal,
- nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
- nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
- readingHelper.readNextSiblingNode();
- }
-}
-
-int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const {
- // This method traverses parent nodes from the terminal by following parent pointers; thus,
- // node code points are stored in the buffer in the reverse order.
- int reverseCodePoints[maxCodePointCount];
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- // First, read the terminal node and get its probability.
- readingHelper.initWithPtNodePos(ptNodePos);
- if (!readingHelper.isValidTerminalNode()) {
- // Node at the ptNodePos is not a valid terminal node.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store terminal node probability.
- *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
- // Then, following parent node link to the dictionary root and fetch node code points.
- while (!readingHelper.isEnd()) {
- if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
- // The ptNodePos is not a valid terminal node position in the dictionary.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store node code points to buffer in the reverse order.
- readingHelper.fetchMergedNodeCodePointsInReverseOrder(
- readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
- // Follow parent node toward the root node.
- readingHelper.readParentNode();
- }
- if (readingHelper.isError()) {
- // The node position or the dictionary is invalid.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Reverse the stored code points to output them.
- const int codePointCount = readingHelper.getTotalCodePointCount();
- for (int i = 0; i < codePointCount; ++i) {
- outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
- }
- return codePointCount;
-}
-
-int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const {
- int searchCodePoints[length];
- for (int i = 0; i < length; ++i) {
- searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(getRootPosition());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- while (!readingHelper.isEnd()) {
- const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
- if (readingHelper.getTotalCodePointCount() > length
- || !readingHelper.isMatchedCodePoint(0 /* index */,
- searchCodePoints[matchedCodePointCount])) {
- // Current node has too many code points or its first code point is different from
- // target code point. Skip this node and read the next sibling node.
- readingHelper.readNextSiblingNode();
- continue;
- }
- // Check following merged node code points.
- const int nodeCodePointCount = nodeReader->getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- if (!readingHelper.isMatchedCodePoint(
- j, searchCodePoints[matchedCodePointCount + j])) {
- // Different code point is found. The given word is not included in the dictionary.
- return NOT_A_DICT_POS;
- }
- }
- // All characters are matched.
- if (length == readingHelper.getTotalCodePointCount()) {
- // Terminal position is found.
- return nodeReader->getHeadPos();
- }
- if (!nodeReader->hasChildren()) {
- return NOT_A_DICT_POS;
- }
- // Advance to the children nodes.
- readingHelper.readChildNode();
- }
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- return NOT_A_DICT_POS;
-}
-
-int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
- const int bigramProbability) const {
- if (mHeaderPolicy.isDecayingDict()) {
- return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
- } else {
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return ProbabilityUtils::backoff(unigramProbability);
- } else {
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
- bigramProbability);
- }
- }
-}
-
-int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_PROBABILITY;
- }
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
- return NOT_A_PROBABILITY;
- }
- return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
-}
-
-int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return nodeReader.getShortcutPos();
-}
-
-int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return nodeReader.getBigramsPos();
-}
-
-bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
- const int probability) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
- return false;
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(getRootPosition());
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- bool addedNewUnigram = false;
- if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
- &addedNewUnigram)) {
- if (addedNewUnigram) {
- mUnigramCount++;
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1, const int probability) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
- return false;
- }
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
- return false;
- }
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
- return false;
- }
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- bool addedNewBigram = false;
- if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
- if (addedNewBigram) {
- mBigramCount++;
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
- return false;
- }
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
- return false;
- }
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
- return false;
- }
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
- mBigramCount--;
- return true;
- } else {
- return false;
- }
-}
-
-void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: flush() is called for non-updatable dictionary.");
- return;
- }
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
- writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
-}
-
-void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
- return;
- }
- const bool needsToDecay = mHeaderPolicy.isDecayingDict()
- && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
- false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
- DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
- &mShortcutListPolicy, needsToDecay);
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
- writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
- mNeedsToDecayForTesting = false;
-}
-
-bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
- // Additional buffer size is near the limit.
- return true;
- } else if (mHeaderPolicy.getExtendedRegionSize()
- + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
- > MAX_DICT_EXTENDED_REGION_SIZE) {
- // Total extended region size exceeds the limit.
- return true;
- } else if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
- && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
- // Needs to reduce dictionary size.
- return true;
- } else if (mHeaderPolicy.isDecayingDict()) {
- return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
- mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
- }
- return false;
-}
-
-void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
- const int maxResultLength) {
- if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mUnigramCount);
- } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mBigramCount);
- } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d",
- mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
- static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
- } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d",
- mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
- static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
- } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
- mNeedsToDecayForTesting = true;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
deleted file mode 100644
index be97ee1a5..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-class DicNode;
-class DicNodeVector;
-
-class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
- public:
- DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
- mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
- mShortcutListPolicy(&mBufferWithExtendableBuffer),
- mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
- mHeaderPolicy.isDecayingDict()),
- mUnigramCount(mHeaderPolicy.getUnigramCount()),
- mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
-
- ~DynamicPatriciaTriePolicy() {
- delete mBuffer;
- }
-
- AK_FORCE_INLINE int getRootPosition() const {
- return 0;
- }
-
- void createAndGetAllChildNodes(const DicNode *const dicNode,
- DicNodeVector *const childDicNodes) const;
-
- int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const;
-
- int getTerminalNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const;
-
- int getProbability(const int unigramProbability, const int bigramProbability) const;
-
- int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
-
- int getShortcutPositionOfPtNode(const int ptNodePos) const;
-
- int getBigramsPositionOfPtNode(const int ptNodePos) const;
-
- const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
- return &mHeaderPolicy;
- }
-
- const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
- return &mBigramListPolicy;
- }
-
- const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
- return &mShortcutListPolicy;
- }
-
- bool addUnigramWord(const int *const word, const int length, const int probability);
-
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability);
-
- bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1);
-
- void flush(const char *const filePath);
-
- void flushWithGC(const char *const filePath);
-
- bool needsToRunGC(const bool mindsBlockByGC) const;
-
- void getProperty(const char *const query, char *const outResult,
- const int maxResultLength);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
-
- static const char *const UNIGRAM_COUNT_QUERY;
- static const char *const BIGRAM_COUNT_QUERY;
- static const char *const MAX_UNIGRAM_COUNT_QUERY;
- static const char *const MAX_BIGRAM_COUNT_QUERY;
- static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
- static const int MAX_DICT_EXTENDED_REGION_SIZE;
- static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
-
- const MmappedBuffer *const mBuffer;
- const HeaderPolicy mHeaderPolicy;
- BufferWithExtendableBuffer mBufferWithExtendableBuffer;
- DynamicShortcutListPolicy mShortcutListPolicy;
- DynamicBigramListPolicy mBigramListPolicy;
- int mUnigramCount;
- int mBigramCount;
- int mNeedsToDecayForTesting;
-};
-} // namespace latinime
-#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
deleted file mode 100644
index f108c219f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-// To avoid infinite loop caused by invalid or malicious forward links.
-const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
-
-// Visits all PtNodes in post-order depth first manner.
-// For example, visits c -> b -> y -> x -> a for the following dictionary:
-// a _ b _ c
-// \ x _ y
-bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
- TraversingEventListener *const listener) {
- bool alreadyVisitedChildren = false;
- // Descend from the root to the root PtNode array.
- if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
- return false;
- }
- while (!isEnd()) {
- if (!alreadyVisitedChildren) {
- if (mNodeReader.hasChildren()) {
- // Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
- return false;
- }
- pushReadingStateToStack();
- readChildNode();
- } else {
- alreadyVisitedChildren = true;
- }
- } else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
- return false;
- }
- readNextSiblingNode();
- if (isEnd()) {
- // All PtNodes in current linked PtNode arrays have been visited.
- // Return to the parent.
- if (!listener->onReadingPtNodeArrayTail()) {
- return false;
- }
- if (mReadingStateStack.size() <= 0) {
- break;
- }
- if (!listener->onAscend()) {
- return false;
- }
- popReadingStateFromStack();
- alreadyVisitedChildren = true;
- } else {
- // Process sibling PtNode.
- alreadyVisitedChildren = false;
- }
- }
- }
- // Ascend from the root PtNode array to the root.
- if (!listener->onAscend()) {
- return false;
- }
- return !isError();
-}
-
-// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
-// that PtNodes are written in the dictionary buffer.
-// For example, visits a -> b -> x -> c -> y for the following dictionary:
-// a _ b _ c
-// \ x _ y
-bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- TraversingEventListener *const listener) {
- bool alreadyVisitedAllPtNodesInArray = false;
- bool alreadyVisitedChildren = false;
- // Descend from the root to the root PtNode array.
- if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
- return false;
- }
- if (isEnd()) {
- // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
- if (!listener->onReadingPtNodeArrayTail()) {
- return false;
- }
- }
- pushReadingStateToStack();
- while (!isEnd()) {
- if (alreadyVisitedAllPtNodesInArray) {
- if (alreadyVisitedChildren) {
- // Move to next sibling PtNode's children.
- readNextSiblingNode();
- if (isEnd()) {
- // Return to the parent PTNode.
- if (!listener->onAscend()) {
- return false;
- }
- if (mReadingStateStack.size() <= 0) {
- break;
- }
- popReadingStateFromStack();
- alreadyVisitedChildren = true;
- alreadyVisitedAllPtNodesInArray = true;
- } else {
- alreadyVisitedChildren = false;
- }
- } else {
- if (mNodeReader.hasChildren()) {
- // Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
- return false;
- }
- pushReadingStateToStack();
- readChildNode();
- // Push state to return the head of PtNode array.
- pushReadingStateToStack();
- alreadyVisitedAllPtNodesInArray = false;
- alreadyVisitedChildren = false;
- } else {
- alreadyVisitedChildren = true;
- }
- }
- } else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
- return false;
- }
- readNextSiblingNode();
- if (isEnd()) {
- if (!listener->onReadingPtNodeArrayTail()) {
- return false;
- }
- // Return to the head of current PtNode array.
- popReadingStateFromStack();
- alreadyVisitedAllPtNodesInArray = true;
- }
- }
- }
- popReadingStateFromStack();
- // Ascend from the root PtNode array to the root.
- if (!listener->onAscend()) {
- return false;
- }
- return !isError();
-}
-
-// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
-// method to avoid an infinite loop.
-void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
- if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of a bug or a broken dictionary.
- AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
- mReadingState.mPos, mBuffer->getTailPosition());
- ASSERT(false);
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- return;
- }
- mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- mReadingState.mPos -= mBuffer->getOriginalBufferSize();
- }
- mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
- dictBuf, &mReadingState.mPos);
- if (usesAdditionalBuffer) {
- mReadingState.mPos += mBuffer->getOriginalBufferSize();
- }
- // Count up nodes and node arrays to avoid infinite loop.
- mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
- mReadingState.mNodeArrayCount++;
- if (mReadingState.mNodeCount < 0
- || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
- || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
- // Invalid dictionary.
- AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
- "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
- mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
- MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
- MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
- ASSERT(false);
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- return;
- }
- if (mReadingState.mNodeCount == 0) {
- // Empty node array. Try following forward link.
- followForwardLink();
- }
-}
-
-// Follow the forward link and read the next node array if exists.
-void DynamicPatriciaTrieReadingHelper::followForwardLink() {
- if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
- mReadingState.mPos, mBuffer->getTailPosition());
- ASSERT(false);
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- return;
- }
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- mReadingState.mPos -= mBuffer->getOriginalBufferSize();
- }
- const int forwardLinkPosition =
- DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
- if (usesAdditionalBuffer) {
- mReadingState.mPos += mBuffer->getOriginalBufferSize();
- }
- mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
- if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
- // Follow the forward link.
- mReadingState.mPos += forwardLinkPosition;
- nextPtNodeArray();
- } else {
- // All node arrays have been read.
- mReadingState.mPos = NOT_A_DICT_POS;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
deleted file mode 100644
index a71c06971..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
-
-#include <cstddef>
-#include <vector>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryBigramsStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
- * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
- */
-class DynamicPatriciaTrieReadingHelper {
- public:
- class TraversingEventListener {
- public:
- virtual ~TraversingEventListener() {};
-
- // Returns whether the event handling was succeeded or not.
- virtual bool onAscend() = 0;
-
- // Returns whether the event handling was succeeded or not.
- virtual bool onDescend(const int ptNodeArrayPos) = 0;
-
- // Returns whether the event handling was succeeded or not.
- virtual bool onReadingPtNodeArrayTail() = 0;
-
- // Returns whether the event handling was succeeded or not.
- virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) = 0;
-
- protected:
- TraversingEventListener() {};
-
- private:
- DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
- };
-
- DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mIsError(false), mReadingState(), mBuffer(buffer),
- mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
-
- ~DynamicPatriciaTrieReadingHelper() {}
-
- AK_FORCE_INLINE bool isError() const {
- return mIsError;
- }
-
- AK_FORCE_INLINE bool isEnd() const {
- return mReadingState.mPos == NOT_A_DICT_POS;
- }
-
- // Initialize reading state with the head position of a PtNode array.
- AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
- if (ptNodeArrayPos == NOT_A_DICT_POS) {
- mReadingState.mPos = NOT_A_DICT_POS;
- } else {
- mIsError = false;
- mReadingState.mPos = ptNodeArrayPos;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
- mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingStateStack.clear();
- nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- }
- }
-
- // Initialize reading state with the head position of a node.
- AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
- if (ptNodePos == NOT_A_DICT_POS) {
- mReadingState.mPos = NOT_A_DICT_POS;
- } else {
- mIsError = false;
- mReadingState.mPos = ptNodePos;
- mReadingState.mNodeCount = 1;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
- mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
- mReadingStateStack.clear();
- fetchPtNodeInfo();
- }
- }
-
- AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
- return &mNodeReader;
- }
-
- AK_FORCE_INLINE bool isValidTerminalNode() const {
- return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
- }
-
- AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
- return mMergedNodeCodePoints[index] == codePoint;
- }
-
- // Return code point count exclude the last read node's code points.
- AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount;
- }
-
- // Return code point count include the last read node's code points.
- AK_FORCE_INLINE int getTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
- }
-
- AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
- const int index, int *const outCodePoints) const {
- const int nodeCodePointCount = mNodeReader.getCodePointCount();
- for (int i = 0; i < nodeCodePointCount; ++i) {
- outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
- }
- }
-
- AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
- return mMergedNodeCodePoints;
- }
-
- AK_FORCE_INLINE void readNextSiblingNode() {
- mReadingState.mNodeCount -= 1;
- mReadingState.mPos = mNodeReader.getSiblingNodePos();
- if (mReadingState.mNodeCount <= 0) {
- // All nodes in the current node array have been read.
- followForwardLink();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- } else {
- fetchPtNodeInfo();
- }
- }
-
- // Read the first child node of the current node.
- AK_FORCE_INLINE void readChildNode() {
- if (mNodeReader.hasChildren()) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
- mReadingState.mPos = mNodeReader.getChildrenPos();
- mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- // Read children node array.
- nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- } else {
- mReadingState.mPos = NOT_A_DICT_POS;
- }
- }
-
- // Read the parent node of the current node.
- AK_FORCE_INLINE void readParentNode() {
- if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
- mReadingState.mNodeCount = 1;
- mReadingState.mPos = mNodeReader.getParentPos();
- mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
- fetchPtNodeInfo();
- } else {
- mReadingState.mPos = NOT_A_DICT_POS;
- }
- }
-
- AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
- return mReadingState.mPosOfLastForwardLinkField;
- }
-
- AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
- return mReadingState.mPosOfLastPtNodeArrayHead;
- }
-
- AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- }
-
- bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
-
- bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- TraversingEventListener *const listener);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
-
- class ReadingState {
- public:
- // Note that copy constructor and assignment operator are used for this class to use
- // std::vector.
- ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
- mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
- mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
-
- int mPos;
- // Node count of a node array.
- int mNodeCount;
- int mPrevTotalCodePointCount;
- int mTotalNodeCount;
- int mNodeArrayCount;
- int mPosOfLastForwardLinkField;
- int mPosOfLastPtNodeArrayHead;
- };
-
- static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
- static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
- static const size_t MAX_READING_STATE_STACK_SIZE;
-
- // TODO: Introduce error code to track what caused the error.
- bool mIsError;
- ReadingState mReadingState;
- const BufferWithExtendableBuffer *const mBuffer;
- DynamicPatriciaTrieNodeReader mNodeReader;
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- std::vector<ReadingState> mReadingStateStack;
-
- void nextPtNodeArray();
-
- void followForwardLink();
-
- AK_FORCE_INLINE void fetchPtNodeInfo() {
- mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
- MAX_WORD_LENGTH, mMergedNodeCodePoints);
- if (mNodeReader.getCodePointCount() <= 0) {
- // Empty node is not allowed.
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- }
- }
-
- AK_FORCE_INLINE void pushReadingStateToStack() {
- if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
- AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
- ASSERT(false);
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- } else {
- mReadingStateStack.push_back(mReadingState);
- }
- }
-
- AK_FORCE_INLINE void popReadingStateFromStack() {
- if (mReadingStateStack.empty()) {
- mReadingState.mPos = NOT_A_DICT_POS;
- } else {
- mReadingState = mReadingStateStack.back();
- mReadingStateStack.pop_back();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- }
- }
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
deleted file mode 100644
index 052558bfc..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
-// TODO: Make MAX_DICTIONARY_SIZE 8MB.
-const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
-
-bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
- DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- bool *const outAddedNewUnigram) {
- int parentPos = NOT_A_DICT_POS;
- while (!readingHelper->isEnd()) {
- const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
- if (!readingHelper->isMatchedCodePoint(0 /* index */,
- wordCodePoints[matchedCodePointCount])) {
- // The first code point is different from target code point. Skip this node and read
- // the next sibling node.
- readingHelper->readNextSiblingNode();
- continue;
- }
- // Check following merged node code points.
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
- const int nodeCodePointCount = nodeReader->getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- const int nextIndex = matchedCodePointCount + j;
- if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
- wordCodePoints[matchedCodePointCount + j])) {
- *outAddedNewUnigram = true;
- return reallocatePtNodeAndAddNewPtNodes(nodeReader,
- readingHelper->getMergedNodeCodePoints(), j,
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
- probability),
- wordCodePoints + matchedCodePointCount,
- codePointCount - matchedCodePointCount);
- }
- }
- // All characters are matched.
- if (codePointCount == readingHelper->getTotalCodePointCount()) {
- return setPtNodeProbability(nodeReader, probability,
- readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
- }
- if (!nodeReader->hasChildren()) {
- *outAddedNewUnigram = true;
- return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- wordCodePoints + readingHelper->getTotalCodePointCount(),
- codePointCount - readingHelper->getTotalCodePointCount());
- }
- // Advance to the children nodes.
- parentPos = nodeReader->getHeadPos();
- readingHelper->readChildNode();
- }
- if (readingHelper->isError()) {
- // The dictionary is invalid.
- return false;
- }
- int pos = readingHelper->getPosOfLastForwardLinkField();
- *outAddedNewUnigram = true;
- return createAndInsertNodeIntoPtNodeArray(parentPos,
- wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
- codePointCount - readingHelper->getPrevTotalCodePointCount(),
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability, bool *const outAddedNewBigram) {
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
- mMergedNodeCodePoints);
- // Move node to add bigram entry.
- const int newNodePos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
- return false;
- }
- int writingPos = newNodePos;
- // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
- mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
- &writingPos)) {
- return false;
- }
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
- if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
- // Insert a new bigram entry into the existing bigram list.
- int bigramListPos = nodeReader.getBigramsPos();
- return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
- outAddedNewBigram);
- } else {
- // The PtNode doesn't have a bigram list.
- *outAddedNewBigram = true;
- // First, Write a bigram entry at the tail position of the PtNode.
- if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
- return false;
- }
- // Then, Mark as the PtNode having bigram list in the flags.
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
- nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
- nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
- nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
- writingPos = newNodePos;
- // Write updated flags into the moved PtNode's flags field.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos);
- }
-}
-
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
- if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
- return false;
- }
- return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
-}
-
-void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
- const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
- mBuffer->getUsedAdditionalBufferSize();
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
- false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
- return;
- }
- DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
-}
-
-void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
- const char *const fileName, const HeaderPolicy *const headerPolicy) {
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
- MAX_DICTIONARY_SIZE);
- int unigramCount = 0;
- int bigramCount = 0;
- if (mNeedsToDecay) {
- ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
- }
- if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
- return;
- }
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
- return;
- }
- DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
-}
-
-bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
- const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
- int pos = nodeToUpdate->getHeadPos();
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
- true /* isDeleted */);
- int writingPos = nodeToUpdate->getHeadPos();
- // Update flags.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
- const int bigramLinkedNodePos) {
- int pos = originalNode->getHeadPos();
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
- false /* isDeleted */);
- int writingPos = originalNode->getHeadPos();
- // Update flags.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos)) {
- return false;
- }
- // Update moved position, which is stored in the parent offset field.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
- return false;
- }
- // Update bigram linked node position, which is stored in the children position field.
- int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
- return false;
- }
- if (originalNode->hasChildren()) {
- // Update children's parent position.
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
- while (!readingHelper.isEnd()) {
- int parentOffsetFieldPos = nodeReader->getHeadPos()
- + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
- &parentOffsetFieldPos)) {
- // Parent offset cannot be written because of a bug or a broken dictionary; thus,
- // we give up to update dictionary.
- return false;
- }
- readingHelper.readNextSiblingNode();
- }
- }
- return true;
-}
-
-// Write new PtNode at writingPos.
-bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
- BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
- const bool isNotAWord, const int parentPos, const int *const codePoints,
- const int codePointCount, const int probability, const int childrenPos,
- const int originalBigramListPos, const int originalShortcutListPos,
- int *const writingPos) {
- const int nodePos = *writingPos;
- // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
- // PtNode writing.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
- 0 /* nodeFlags */, writingPos)) {
- return false;
- }
- // Calculate a parent offset and write the offset.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
- parentPos, nodePos, writingPos)) {
- return false;
- }
- // Write code points
- if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
- codePoints, codePointCount, writingPos)) {
- return false;
- }
- // Write probability when the probability is a valid probability, which means this node is
- // terminal.
- if (probability != NOT_A_PROBABILITY) {
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
- probability, writingPos)) {
- return false;
- }
- }
- // Write children position
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
- childrenPos, writingPos)) {
- return false;
- }
- // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
- if (originalShortcutListPos != NOT_A_DICT_POS) {
- int fromPos = originalShortcutListPos;
- if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
- writingPos)) {
- return false;
- }
- }
- // Copy bigram list when the originalBigramListPos is valid dictionary position.
- int bigramCount = 0;
- if (originalBigramListPos != NOT_A_DICT_POS) {
- int fromPos = originalBigramListPos;
- if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
- return false;
- }
- }
- // Create node flags and write them.
- PatriciaTrieReadingUtils::NodeFlags nodeFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
- probability != NOT_A_PROBABILITY /* isTerminal */,
- originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
- bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
- CHILDREN_POSITION_FIELD_SIZE);
- int flagsFieldPos = nodePos;
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
- &flagsFieldPos)) {
- return false;
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
- BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
- false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
- NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
- NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
- BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
- originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
- originalNode->getChildrenPos(), originalNode->getBigramsPos(),
- originalNode->getShortcutPos(), writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
- const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
- int *const forwardLinkFieldPos) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, forwardLinkFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
- probability);
-}
-
-bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
- const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
- const int *const codePoints, bool *const outAddedNewUnigram) {
- if (originalPtNode->isTerminal()) {
- // Overwrites the probability.
- *outAddedNewUnigram = false;
- const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
- probability);
- int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
- probabilityToWrite, &probabilityFieldPos)) {
- return false;
- }
- } else {
- // Make the node terminal and write the probability.
- *outAddedNewUnigram = true;
- int movedPos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
- return false;
- }
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
- originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- &movedPos)) {
- return false;
- }
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, &childrenPosFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
- codePointCount, probability);
-}
-
-bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
- const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
- const int probability) {
- int writingPos = mBuffer->getTailPosition();
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- 1 /* arraySize */, &writingPos)) {
- return false;
- }
- if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
- probability, &writingPos)) {
- return false;
- }
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- return true;
-}
-
-// Returns whether the dictionary updating was succeeded or not.
-bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
- const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
- const int newNodeCodePointCount) {
- // When addsExtraChild is true, split the reallocating PtNode and add new child.
- // Reallocating PtNode: abcde, newNode: abcxy.
- // abc (1st, not terminal) __ de (2nd)
- // \_ xy (extra child, terminal)
- // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
- // Reallocating PtNode: abcde, newNode: abc.
- // abc (1st, terminal) __ de (2nd)
- const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
- const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
- int writingPos = firstPartOfReallocatedPtNodePos;
- // Write the 1st part of the reallocating node. The children position will be updated later
- // with actual children position.
- const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
- if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
- reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
- &writingPos)) {
- return false;
- }
- const int actualChildrenPos = writingPos;
- // Create new children PtNode array.
- const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- newPtNodeCount, &writingPos)) {
- return false;
- }
- // Write the 2nd part of the reallocating node.
- const int secondPartOfReallocatedPtNodePos = writingPos;
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
- firstPartOfReallocatedPtNodePos,
- reallocatingPtNodeCodePoints + overlappingCodePointCount,
- reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
- reallocatingPtNode->getProbability(), &writingPos)) {
- return false;
- }
- if (addsExtraChild) {
- if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
- newNodeCodePoints + overlappingCodePointCount,
- newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
- &writingPos)) {
- return false;
- }
- }
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Update original reallocatingPtNode as moved.
- if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
- secondPartOfReallocatedPtNodePos)) {
- return false;
- }
- // Load node info. Information of the 1st part will be fetched.
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
- // Update children position.
- int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
- actualChildrenPos, &childrenPosFieldPos)) {
- return false;
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
- int *const outUnigramCount, int *const outBigramCount) {
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- headerPolicy, this, mBuffer, mNeedsToDecay);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
- return false;
- }
- if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
- // TODO: Remove more unigrams.
- }
-
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- traversePolicyToUpdateBigramProbability(mBigramPolicy);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateBigramProbability)) {
- return false;
- }
- if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
- > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
- // TODO: Remove more bigrams.
- }
-
- // Mapping from positions in mBuffer to positions in bufferToWrite.
- DictPositionRelocationMap dictPositionRelocationMap;
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
- &dictPositionRelocationMap);
- if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
- return false;
- }
-
- // Create policy instance for the GCed dictionary.
- DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
- mNeedsToDecay);
- // Create reading helper for the GCed dictionary.
- DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
- &newDictShortcutPolicy);
- newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
- &dictPositionRelocationMap);
- if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToUpdateAllPositionFields)) {
- return false;
- }
- *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
- *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
- return true;
-}
-
-int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
- const int newProbability) {
- if (mNeedsToDecay) {
- return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
- newProbability);
- } else {
- return newProbability;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
deleted file mode 100644
index ca8664729..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DynamicBigramListPolicy;
-class DynamicPatriciaTrieNodeReader;
-class DynamicPatriciaTrieReadingHelper;
-class DynamicShortcutListPolicy;
-class HeaderPolicy;
-
-class DynamicPatriciaTrieWritingHelper {
- public:
- typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
- typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
- struct DictPositionRelocationMap {
- public:
- DictPositionRelocationMap()
- : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
-
- PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
- PtNodePositionRelocationMap mPtNodePositionRelocationMap;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
- };
-
- static const size_t MAX_DICTIONARY_SIZE;
-
- DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
- DynamicBigramListPolicy *const bigramPolicy,
- DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
- mNeedsToDecay(needsToDecay) {}
-
- ~DynamicPatriciaTrieWritingHelper() {}
-
- // Add a word to the dictionary. If the word already exists, update the probability.
- bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- bool *const outAddedNewUnigram);
-
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
- bool *const outAddedNewBigram);
-
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
-
- void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
- const int unigramCount, const int bigramCount);
-
- void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
- const HeaderPolicy *const headerPolicy);
-
- // CAVEAT: This method must be called only from inner classes of
- // DynamicPatriciaTrieGcEventListeners.
- bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
-
- // CAVEAT: This method must be called only from this class or inner classes of
- // DynamicPatriciaTrieGcEventListeners.
- bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
-
- static const int CHILDREN_POSITION_FIELD_SIZE;
-
- BufferWithExtendableBuffer *const mBuffer;
- DynamicBigramListPolicy *const mBigramPolicy;
- DynamicShortcutListPolicy *const mShortcutPolicy;
- const bool mNeedsToDecay;
-
- bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
- const int movedPos, const int bigramLinkedNodePos);
-
- bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool isBlacklisted, const bool isNotAWord,
- const int parentPos, const int *const codePoints, const int codePointCount,
- const int probability, const int childrenPos, const int originalBigramListPos,
- const int originalShortcutListPos, int *const writingPos);
-
- bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const int parentPos, const int *const codePoints, const int codePointCount,
- const int probability, int *const writingPos);
-
- bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
-
- bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
- const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
-
- bool createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount);
-
- bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const int probability);
-
- bool reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
- const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
- const int newNodeCodePointCount);
-
- bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
- BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
- int *const outBigramCount);
-
- int getUpdatedProbability(const int originalProbability, const int newProbability);
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index eb072fbaf..6ed65d921 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -16,19 +16,45 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include <algorithm>
+
namespace latinime {
-// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
+// Note that these are corresponding definitions in Java side in DictionaryHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
+const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
+ "REQUIRES_GERMAN_UMLAUT_PROCESSING";
// TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
-const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
+const char *const HeaderPolicy::DATE_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
+// Historical info is information that is needed to support decaying such as timestamp, level and
+// count.
+const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
+const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
+const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY =
+ "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP";
+const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
+ "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
+const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY =
+ "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS";
+
+const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
+const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
+
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
+const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2;
+const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
+// 30 days
+const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS =
+ 30 * 24 * 60 * 60;
+
+const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
+const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
// Used for logging. Question mark is used to indicate that the key is not found.
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
@@ -40,20 +66,25 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
}
std::vector<int> keyCodePointVector;
HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
- HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
+ DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it =
+ mAttributeMap.find(keyCodePointVector);
if (it == mAttributeMap.end()) {
// The key was not found.
outValue[0] = '?';
outValue[1] = '\0';
return;
}
- const int terminalIndex = min(static_cast<int>(it->second.size()), outValueSize - 1);
+ const int terminalIndex = std::min(static_cast<int>(it->second.size()), outValueSize - 1);
for (int i = 0; i < terminalIndex; ++i) {
outValue[i] = it->second[i];
}
outValue[terminalIndex] = '\0';
}
+const std::vector<int> HeaderPolicy::readLocale() const {
+ return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMap, LOCALE_KEY);
+}
+
float HeaderPolicy::readMultipleWordCostMultiplier() const {
const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
@@ -63,54 +94,65 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const {
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
}
-bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount, const int extendedRegionSize) const {
+bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
+ return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
+}
+
+bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount,
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const {
int writingPos = 0;
- if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
+ DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap);
+ fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
+ extendedRegionSize, &attributeMapToWrite);
+ if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
&writingPos)) {
return false;
}
- if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags,
+ if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
&writingPos)) {
return false;
}
// Temporarily writes a dummy header size.
int headerSizeFieldPos = writingPos;
- if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */,
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */,
&writingPos)) {
return false;
}
- HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, UNIGRAM_COUNT_KEY, unigramCount);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, BIGRAM_COUNT_KEY, bigramCount);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, EXTENDED_REGION_SIZE_KEY,
- extendedRegionSize);
- if (updatesLastUpdatedTime) {
- // Set current time as a last updated time.
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
- time(0));
- }
- if (updatesLastDecayedTime) {
- // Set current time as a last updated time.
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY,
- time(0));
- }
- if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToWrite,
&writingPos)) {
return false;
}
- // Writes an actual header size.
- if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos,
+ // Writes the actual header size.
+ if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
&headerSizeFieldPos)) {
return false;
}
return true;
}
-/* static */ HeaderReadWriteUtils::AttributeMap
+void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount,
+ const int bigramCount, const int extendedRegionSize,
+ DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
+ extendedRegionSize);
+ // Set the current time as the generation time.
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
+ TimeKeeper::peekCurrentTime());
+ HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KEY, mLocale);
+ if (updatesLastDecayedTime) {
+ // Set current time as the last updated time.
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME_KEY,
+ TimeKeeper::peekCurrentTime());
+ }
+}
+
+/* static */ DictionaryHeaderStructurePolicy::AttributeMap
HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
- HeaderReadWriteUtils::AttributeMap attributeMap;
+ DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
return attributeMap;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index a9c7805a8..87cf0cd3b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -17,69 +17,153 @@
#ifndef LATINIME_HEADER_POLICY_H
#define LATINIME_HEADER_POLICY_H
-#include <ctime>
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "utils/char_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
// Reads information from existing dictionary buffer.
- HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
- : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
+ : mDictFormatVersion(formatVersion),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
+ mLocale(readLocale()),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
- mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)),
+ LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {}
+ EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
+ mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
+ mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
+ mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
- const HeaderReadWriteUtils::AttributeMap *const attributeMap)
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap)
: mDictFormatVersion(dictFormatVersion),
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
- attributeMap)), mSize(0), mAttributeMap(*attributeMap),
+ attributeMap)), mSize(0), mAttributeMap(*attributeMap), mLocale(locale),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
- mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
- mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
+ mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
+ mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
+ mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
- ~HeaderPolicy() {}
+ // Copy header information
+ HeaderPolicy(const HeaderPolicy *const headerPolicy)
+ : mDictFormatVersion(headerPolicy->mDictFormatVersion),
+ mDictionaryFlags(headerPolicy->mDictionaryFlags), mSize(headerPolicy->mSize),
+ mAttributeMap(headerPolicy->mAttributeMap), mLocale(headerPolicy->mLocale),
+ mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier),
+ mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing),
+ mIsDecayingDict(headerPolicy->mIsDecayingDict),
+ mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
+ mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
+ mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
+ mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
+ mForgettingCurveOccurrencesToLevelUp(
+ headerPolicy->mForgettingCurveOccurrencesToLevelUp),
+ mForgettingCurveProbabilityValuesTableId(
+ headerPolicy->mForgettingCurveProbabilityValuesTableId),
+ mForgettingCurveDurationToLevelDown(
+ headerPolicy->mForgettingCurveDurationToLevelDown),
+ mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
+ mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
- AK_FORCE_INLINE int getSize() const {
- return mSize;
- }
+ // Temporary dummy header.
+ HeaderPolicy()
+ : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
+ mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f),
+ mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
+ mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
+ mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
+ mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
+ mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {}
- AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags);
+ ~HeaderPolicy() {}
+
+ virtual int getFormatVersionNumber() const {
+ // Conceptually this converts the symbolic value we use in the code into the
+ // hardcoded of the bytes in the file. But we want the constants to be the
+ // same so we use them for both here.
+ switch (mDictFormatVersion) {
+ case FormatUtils::VERSION_2:
+ return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
+ case FormatUtils::VERSION_4:
+ return FormatUtils::VERSION_4;
+ case FormatUtils::VERSION_4_DEV:
+ return FormatUtils::VERSION_4_DEV;
+ default:
+ return FormatUtils::UNKNOWN_VERSION;
+ }
}
- AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ AK_FORCE_INLINE bool isValid() const {
+ // Decaying dictionary must have historical information.
+ if (!mIsDecayingDict) {
+ return true;
+ }
+ if (mHasHistoricalInfoOfWords) {
+ return true;
+ } else {
+ return false;
+ }
}
- AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags);
+ AK_FORCE_INLINE int getSize() const {
+ return mSize;
}
AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
@@ -90,8 +174,12 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mIsDecayingDict;
}
- AK_FORCE_INLINE int getLastUpdatedTime() const {
- return mLastUpdatedTime;
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
+ return mRequiresGermanUmlautProcessing;
+ }
+
+ AK_FORCE_INLINE int getDate() const {
+ return mDate;
}
AK_FORCE_INLINE int getLastDecayedTime() const {
@@ -110,41 +198,109 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize;
}
+ AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
+ return mHasHistoricalInfoOfWords;
+ }
+
+ AK_FORCE_INLINE bool shouldBoostExactMatches() const {
+ // TODO: Investigate better ways to handle exact matches for personalized dictionaries.
+ return !isDecayingDict();
+ }
+
+ const DictionaryHeaderStructurePolicy::AttributeMap *getAttributeMap() const {
+ return &mAttributeMap;
+ }
+
+ AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
+ return mForgettingCurveOccurrencesToLevelUp;
+ }
+
+ AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
+ return mForgettingCurveProbabilityValuesTableId;
+ }
+
+ AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const {
+ return mForgettingCurveDurationToLevelDown;
+ }
+
+ AK_FORCE_INLINE int getMaxUnigramCount() const {
+ return mMaxUnigramCount;
+ }
+
+ AK_FORCE_INLINE int getMaxBigramCount() const {
+ return mMaxBigramCount;
+ }
+
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
- bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount, const int extendedRegionSize) const;
+ bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount,
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const;
+
+ void fillInHeader(const bool updatesLastDecayedTime,
+ const int unigramCount, const int bigramCount, const int extendedRegionSize,
+ DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
+
+ AK_FORCE_INLINE const std::vector<int> *getLocale() const {
+ return &mLocale;
+ }
+
+ bool supportsBeginningOfSentence() const {
+ return mDictFormatVersion >= FormatUtils::VERSION_4;
+ }
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
+ DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
static const char *const IS_DECAYING_DICT_KEY;
- static const char *const LAST_UPDATED_TIME_KEY;
+ static const char *const DATE_KEY;
static const char *const LAST_DECAYED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
static const char *const EXTENDED_REGION_SIZE_KEY;
+ static const char *const HAS_HISTORICAL_INFO_KEY;
+ static const char *const LOCALE_KEY;
+ static const char *const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY;
+ static const char *const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY;
+ static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY;
+ static const char *const MAX_UNIGRAM_COUNT_KEY;
+ static const char *const MAX_BIGRAM_COUNT_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
+ static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
+ static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
+ static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS;
+ static const int DEFAULT_MAX_UNIGRAM_COUNT;
+ static const int DEFAULT_MAX_BIGRAM_COUNT;
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
const int mSize;
- HeaderReadWriteUtils::AttributeMap mAttributeMap;
+ DictionaryHeaderStructurePolicy::AttributeMap mAttributeMap;
+ const std::vector<int> mLocale;
const float mMultiWordCostMultiplier;
+ const bool mRequiresGermanUmlautProcessing;
const bool mIsDecayingDict;
- const int mLastUpdatedTime;
+ const int mDate;
const int mLastDecayedTime;
const int mUnigramCount;
const int mBigramCount;
const int mExtendedRegionSize;
+ const bool mHasHistoricalInfoOfWords;
+ const int mForgettingCurveOccurrencesToLevelUp;
+ const int mForgettingCurveProbabilityValuesTableId;
+ const int mForgettingCurveDurationToLevelDown;
+ const int mMaxUnigramCount;
+ const int mMaxBigramCount;
+ const std::vector<int> readLocale() const;
float readMultipleWordCostMultiplier() const;
+ bool readRequiresGermanUmlautProcessing() const;
- static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
+ static DictionaryHeaderStructurePolicy::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 5ded8f6a1..a8f8f284b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -26,6 +26,13 @@
namespace latinime {
+// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
+// As such, this is the maximum number of characters will be needed to represent an int as a
+// string, including the terminator; this is used as the size of a string buffer large enough to
+// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
+// of the binary dictionary where a {key,value} string pair scheme is used.
+const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
+
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
@@ -35,22 +42,8 @@ const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
-// Flags for special processing
-// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
-// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const HeaderReadWriteUtils::DictionaryFlags
- HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const HeaderReadWriteUtils::DictionaryFlags
- HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const HeaderReadWriteUtils::DictionaryFlags
- HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-
-// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
-const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
-const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
- "REQUIRES_GERMAN_UMLAUT_PROCESSING";
-const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
- "REQUIRES_FRENCH_LIGATURE_PROCESSING";
+
+typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
// See the format of the header in the comment in
@@ -67,18 +60,8 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
/* static */ HeaderReadWriteUtils::DictionaryFlags
HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
- const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap,
- REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */);
- const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap,
- REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */);
- const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap,
- SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */);
- DictionaryFlags dictflags = NO_FLAGS;
- dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
- dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
- dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
- return dictflags;
+ const AttributeMap *const attributeMap) {
+ return NO_FLAGS;
}
/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
@@ -115,8 +98,10 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
case FormatUtils::VERSION_2:
// Version 2 dictionary writing is not supported.
return false;
- case FormatUtils::VERSION_3:
- return buffer->writeUintAndAdvancePosition(3 /* data */,
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_4_DEV:
+ return buffer->writeUintAndAdvancePosition(version /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
@@ -156,6 +141,13 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
return true;
}
+/* static */ void HeaderReadWriteUtils::setCodePointVectorAttribute(
+ AttributeMap *const headerAttributes, const char *const key, const std::vector<int> value) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ (*headerAttributes)[keyVector] = value;
+}
+
/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
const char *const key, const bool value) {
setIntAttribute(headerAttributes, key, value ? 1 : 0);
@@ -171,12 +163,24 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value) {
AttributeMap::mapped_type valueVector;
- char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
- snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value);
+ char charBuf[LARGEST_INT_DIGIT_COUNT];
+ snprintf(charBuf, sizeof(charBuf), "%d", value);
insertCharactersIntoVector(charBuf, &valueVector);
(*headerAttributes)[*key] = valueVector;
}
+/* static */ const std::vector<int> HeaderReadWriteUtils::readCodePointVectorAttributeValue(
+ const AttributeMap *const headerAttributes, const char *const key) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ AttributeMap::const_iterator it = headerAttributes->find(keyVector);
+ if (it == headerAttributes->end()) {
+ return std::vector<int>();
+ } else {
+ return it->second;
+ }
+}
+
/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
const AttributeMap *const headerAttributes, const char *const key,
const bool defaultValue) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index 225968323..9b90488fc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -17,11 +17,10 @@
#ifndef LATINIME_HEADER_READ_WRITE_UTILS_H
#define LATINIME_HEADER_READ_WRITE_UTILS_H
-#include <map>
-#include <stdint.h>
-#include <vector>
+#include <cstdint>
#include "defines.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
@@ -31,34 +30,21 @@ class BufferWithExtendableBuffer;
class HeaderReadWriteUtils {
public:
typedef uint16_t DictionaryFlags;
- typedef std::map<std::vector<int>, std::vector<int> > AttributeMap;
static int getHeaderSize(const uint8_t *const dictBuf);
static DictionaryFlags getFlags(const uint8_t *const dictBuf);
- static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
- return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
- return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
- return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
- }
-
static AK_FORCE_INLINE int getHeaderOptionsPosition() {
return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ HEADER_SIZE_FIELD_SIZE;
}
static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap(
- const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
- AttributeMap *const headerAttributes);
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);
static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
const FormatUtils::FORMAT_VERSION version, int *const writingPos);
@@ -70,29 +56,43 @@ class HeaderReadWriteUtils {
const int size, int *const writingPos);
static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
- const AttributeMap *const headerAttributes, int *const writingPos);
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ int *const writingPos);
/**
* Methods for header attributes.
*/
- static void setBoolAttribute(AttributeMap *const headerAttributes,
+ static void setCodePointVectorAttribute(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key, const std::vector<int> value);
+
+ static void setBoolAttribute(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
const char *const key, const bool value);
- static void setIntAttribute(AttributeMap *const headerAttributes,
+ static void setIntAttribute(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
const char *const key, const int value);
- static bool readBoolAttributeValue(const AttributeMap *const headerAttributes,
+ static const std::vector<int> readCodePointVectorAttributeValue(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const char *const key);
+
+ static bool readBoolAttributeValue(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
const char *const key, const bool defaultValue);
- static int readIntAttributeValue(const AttributeMap *const headerAttributes,
+ static int readIntAttributeValue(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
const char *const key, const int defaultValue);
static void insertCharactersIntoVector(const char *const characters,
- AttributeMap::key_type *const key);
+ DictionaryHeaderStructurePolicy::AttributeMap::key_type *const key);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
+ static const int LARGEST_INT_DIGIT_COUNT;
static const int MAX_ATTRIBUTE_KEY_LENGTH;
static const int MAX_ATTRIBUTE_VALUE_LENGTH;
@@ -101,23 +101,18 @@ class HeaderReadWriteUtils {
static const int HEADER_FLAG_SIZE;
static const int HEADER_SIZE_FIELD_SIZE;
+ // Value for the "flags" field. It's unused at the moment.
static const DictionaryFlags NO_FLAGS;
- // Flags for special processing
- // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
- // something very bad (like, the apocalypse) will happen. Please update both at the same time.
- static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
- static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
- static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
-
- static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
- static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
- static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
-
- static void setIntAttributeInner(AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const int value);
-
- static int readIntAttributeValueInner(const AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const int defaultValue);
+
+ static void setIntAttributeInner(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const DictionaryHeaderStructurePolicy::AttributeMap::key_type *const key,
+ const int value);
+
+ static int readIntAttributeValueInner(
+ const DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes,
+ const DictionaryHeaderStructurePolicy::AttributeMap::key_type *const key,
+ const int defaultValue);
};
}
#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
deleted file mode 100644
index bd3211f6a..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
-#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-/*
- * This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
- */
-class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
- public:
- explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
- : mBuffer(buffer) {}
-
- ~DynamicShortcutListPolicy() {}
-
- int getStartPos(const int pos) const {
- if (pos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
- }
-
- void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
- int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
- int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- const ShortcutListReadingUtils::ShortcutFlags flags =
- ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
- if (outHasNext) {
- *outHasNext = ShortcutListReadingUtils::hasNext(flags);
- }
- if (outIsWhitelist) {
- *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
- }
- if (outCodePoint) {
- *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
- buffer, maxCodePointCount, outCodePoint, pos);
- }
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
-
- void skipAllShortcuts(int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- const int shortcutListSize = ShortcutListReadingUtils
- ::getShortcutListSizeAndForwardPointer(buffer, pos);
- *pos += shortcutListSize;
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
-
- // Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
- // bufferToWrite and advance these positions after the shortcut lists. This returns whether
- // the copy was succeeded or not.
- bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
- int *const fromPos, int *const toPos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
- if (usesAdditionalBuffer) {
- *fromPos -= mBuffer->getOriginalBufferSize();
- }
- const int shortcutListSize = ShortcutListReadingUtils
- ::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
- fromPos);
- // Copy shortcut list size.
- if (!bufferToWrite->writeUintAndAdvancePosition(
- shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
- ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
- return false;
- }
- // Copy shortcut list.
- for (int i = 0; i < shortcutListSize; ++i) {
- const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
- if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
- return false;
- }
- }
- if (usesAdditionalBuffer) {
- *fromPos += mBuffer->getOriginalBufferSize();
- }
- return true;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
-
- const BufferWithExtendableBuffer *const mBuffer;
-};
-} // namespace latinime
-#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt
new file mode 100644
index 000000000..9e29e836c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt
@@ -0,0 +1 @@
+Files under this directory have been auto generated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 000000000..3e8e059f2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+
+#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ }
+ if (outProbability) {
+ if (bigramEntry.hasHistoricalInfo()) {
+ *outProbability =
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
+ mHeaderPolicy);
+ } else {
+ *outProbability = bigramEntry.getProbability();
+ }
+ }
+ if (outHasNext) {
+ *outHasNext = bigramEntry.hasNext();
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ // 1. The word has no bigrams yet.
+ // 2. The word has bigrams, and there is the target in the list.
+ // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
+ // 4. The word has bigrams. We have to append new bigram entry to the list.
+ // 5. Same as 4, but the list is the last entry of the content file.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Case 1. PtNode that doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
+ bigramProperty);
+ // Write an entry.
+ const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ int tailEntryPos = NOT_A_DICT_POS;
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
+ &tailEntryPos);
+ if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5.
+ // Add new entry to the bigram list.
+ if (tailEntryPos == NOT_A_DICT_POS) {
+ // Case 4. Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ // Copy existing bigram list.
+ if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
+ return false;
+ }
+ }
+ // Write new entry at the tail position of the bigram content.
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &newBigramEntry, bigramProperty);
+ if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
+ return false;
+ }
+ // Update has next flag of the tail entry.
+ if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
+ const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
+ // entry is updated.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &updatedBigramEntry, bigramProperty);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
+ nullptr /* outTailEntryPos */);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return true;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ // Invalidate bigram entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ } else if (bigramEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy);
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
+ const BigramEntry updatedBigramEntry =
+ bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ *outBigramCount += 1;
+ } else {
+ // Remove entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ }
+ } else {
+ *outBigramCount += 1;
+ }
+ }
+ return true;
+}
+
+int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return 0;
+ }
+ int bigramCount = 0;
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.isValid()) {
+ bigramCount++;
+ }
+ }
+ return bigramCount;
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos, int *const outTailEntryPos) const {
+ if (outTailEntryPos) {
+ *outTailEntryPos = NOT_A_DICT_POS;
+ }
+ bool hasNext = true;
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (!bigramEntry.isValid()) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ }
+ }
+ return invalidEntryPos;
+}
+
+const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
+ const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
+ bigramProperty->getLevel(), bigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
+ &historicalInfoForUpdate, mHeaderPolicy);
+ return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
+ } else {
+ return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
+ }
+}
+
+bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
+ const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
new file mode 100644
index 000000000..61623468e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class BigramDictContent;
+} // namespace v402
+} // namespace backward
+class BigramProperty;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class TerminalPositionLookupTable;
+
+class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable,
+ const HeaderPolicy *const headerPolicy)
+ : mBigramDictContent(bigramDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable),
+ mHeaderPolicy(headerPolicy) {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const;
+
+ void skipAllBigrams(int *const pos) const {
+ // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+ }
+
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount);
+
+ int getBigramEntryConut(const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
+
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
+ int *const outTailEntryPos) const;
+
+ const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const;
+
+ bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
+
+ BigramDictContent *const mBigramDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+ const HeaderPolicy *const mHeaderPolicy;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
new file mode 100644
index 000000000..e2dd93c5e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
+ ASSERT(false);
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = NOT_A_TIMESTAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ }
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
+ } else {
+ return BigramEntry(hasNext, probability, targetTerminalId);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ historicalInfo->getTimeStamp());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ historicalInfo->getLevel());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ historicalInfo->getCount());
+ return false;
+ }
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ bigramEntryToWrite->getProbability());
+ return false;
+ }
+ }
+ const int targetTerminalIdToWrite =
+ (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
+ bigramEntryToWrite->getTargetTerminalId();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
+ return false;
+ }
+ return true;
+}
+
+bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
+ int *const outTailEntryPos) {
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ bool hasNext = true;
+ while (hasNext) {
+ const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!hasNext) {
+ *outTailEntryPos = writingPos;
+ }
+ if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalBigramListPos =
+ originalBigramDictContent->getBigramListHeadPos(it->first);
+ if (originalBigramListPos == NOT_A_DICT_POS) {
+ // This terminal does not have a bigram list.
+ continue;
+ }
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ int bigramEntryCount = 0;
+ // Copy bigram list with GC from original content.
+ if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
+ terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
+ return false;
+ }
+ if (bigramEntryCount == 0) {
+ // All bigram entries are useless. This terminal does not have a bigram list.
+ continue;
+ }
+ *outBigramEntryCount += bigramEntryCount;
+ // Set bigram list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns whether GC for the bigram list was succeeded or not.
+bool BigramDictContent::runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntrycount) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ int lastEntryPos = NOT_A_DICT_POS;
+ while (hasNext) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ continue;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
+ if (it == terminalIdMap->end()) {
+ // Target word has been removed.
+ continue;
+ }
+ lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
+ return false;
+ }
+ *outEntrycount += 1;
+ }
+ if (lastEntryPos != NOT_A_DICT_POS) {
+ // Update has next flag in the last written entry.
+ const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
+ false /* hasNext */);
+ if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
+ AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h
new file mode 100644
index 000000000..b554e5676
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class BigramDictContent : public SparseTableDictContent {
+ public:
+ BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ BigramDictContent(const bool hasHistoricalInfo)
+ : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ const BigramEntry getBigramEntry(const int bigramEntryPos) const {
+ int readingPos = bigramEntryPos;
+ return getBigramEntryAndAdvancePosition(&readingPos);
+ }
+
+ const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
+
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
+ int writingPos = getContentBuffer()->getTailPosition();
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
+ int writingPos = entryWritingPos;
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
+ int *const entryWritingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
+
+ bool flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION);
+ }
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount);
+
+ bool isContentTailPos(const int pos) const {
+ return pos == getContentBuffer()->getTailPosition();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ int createAndGetBigramFlags(const bool hasNext) const {
+ return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
+ }
+
+ int getBigramEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ }
+ }
+
+ bool runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount);
+
+ bool mHasHistoricalInfo;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
new file mode 100644
index 000000000..40968b4d8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
+#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class BigramEntry {
+ public:
+ BigramEntry(const BigramEntry& bigramEntry)
+ : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
+ mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
+ mTargetTerminalId(targetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability,
+ const HistoricalInfo *const historicalInfo, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
+ mTargetTerminalId(targetTerminalId) {}
+
+ const BigramEntry getInvalidatedEntry() const {
+ return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+
+ const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
+ return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
+ return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
+ }
+
+ const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
+ return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateHistoricalInfoAndGetEntry(
+ const HistoricalInfo *const historicalInfo) const {
+ return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
+ }
+
+ bool isValid() const {
+ return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ }
+
+ bool hasNext() const {
+ return mHasNext;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ int getTargetTerminalId() const {
+ return mTargetTerminalId;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
+
+ const bool mHasNext;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+ const int mTargetTerminalId;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h
new file mode 100644
index 000000000..0f2f25534
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_DICT_CONTENT_H
+
+#include "defines.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class DictContent {
+ public:
+ virtual ~DictContent() {}
+ virtual bool isValid() const = 0;
+
+ protected:
+ DictContent() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictContent);
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
new file mode 100644
index 000000000..c671647d4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ // This method can be called with invalid terminal id during GC.
+ return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
+ }
+ const BufferWithExtendableBuffer *const buffer = getBuffer();
+ int entryPos = getEntryPos(terminalId);
+ const int flags = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
+ const int probability = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
+ if (mHasHistoricalInfo) {
+ const int timestamp = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
+ const int level = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
+ const int count = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, probability, &historicalInfo);
+ } else {
+ return ProbabilityEntry(flags, probability);
+ }
+}
+
+bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
+ const ProbabilityEntry *const probabilityEntry) {
+ if (terminalId < 0) {
+ return false;
+ }
+ const int entryPos = getEntryPos(terminalId);
+ if (terminalId >= mSize) {
+ ProbabilityEntry dummyEntry;
+ // Write new entry.
+ int writingPos = getBuffer()->getTailPosition();
+ while (writingPos <= entryPos) {
+ // Fulfilling with dummy entries until writingPos.
+ if (!writeEntry(&dummyEntry, writingPos)) {
+ AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
+ return false;
+ }
+ writingPos += getEntrySize();
+ mSize++;
+ }
+ }
+ return writeEntry(probabilityEntry, entryPos);
+}
+
+bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
+ for (int i = 0; i < mSize; ++i) {
+ const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
+ if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
+ return false;
+ }
+ }
+ return probabilityDictContentToWrite.flush(dictPath,
+ Ver4DictConstants::FREQ_FILE_EXTENSION);
+ } else {
+ return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
+ }
+}
+
+bool ProbabilityDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent) {
+ mSize = 0;
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityDictContent->getProbabilityEntry(it->first);
+ if (!setProbabilityEntry(it->second, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
+ return false;
+ }
+ mSize++;
+ }
+ return true;
+}
+
+int ProbabilityDictContent::getEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE;
+ }
+}
+
+int ProbabilityDictContent::getEntryPos(const int terminalId) const {
+ return terminalId * getEntrySize();
+}
+
+bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
+ const int entryPos) {
+ BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
+ int writingPos = entryPos;
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
+ AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
+ AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h
new file mode 100644
index 000000000..3734797d4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class ProbabilityEntry;
+
+class ProbabilityDictContent : public SingleDictContent {
+ public:
+ ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
+ mHasHistoricalInfo(hasHistoricalInfo),
+ mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
+
+ ProbabilityDictContent(const bool hasHistoricalInfo)
+ : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
+
+ const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
+
+ bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
+
+ int getEntrySize() const;
+
+ int getEntryPos(const int terminalId) const;
+
+ bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
+
+ bool mHasHistoricalInfo;
+ int mSize;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
new file mode 100644
index 000000000..8ccfa33dc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
+#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class ProbabilityEntry {
+ public:
+ ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
+ : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
+ mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
+
+ // Dummy entry
+ ProbabilityEntry()
+ : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
+
+ // Entry without historical information
+ ProbabilityEntry(const int flags, const int probability)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
+
+ // Entry with historical information.
+ ProbabilityEntry(const int flags, const int probability,
+ const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+
+ const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
+ return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
+ }
+
+ const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
+ const HistoricalInfo *const historicalInfo) const {
+ return ProbabilityEntry(mFlags, mProbability, historicalInfo);
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ int getFlags() const {
+ return mFlags;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
+
+ const int mFlags;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp
new file mode 100644
index 000000000..56bc8b98d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const {
+ const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
+ if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) {
+ AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d",
+ *shortcutEntryPos, shortcutListBuffer->getTailPosition());
+ ASSERT(false);
+ if (outhasNext) {
+ *outhasNext = false;
+ }
+ if (outCodePointCount) {
+ *outCodePointCount = 0;
+ }
+ return;
+ }
+
+ const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ if (outProbability) {
+ *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
+ }
+ if (outhasNext) {
+ *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ }
+ if (outCodePoint && outCodePointCount) {
+ shortcutListBuffer->readCodePointsAndAdvancePosition(
+ maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
+ }
+}
+
+int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+}
+
+bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
+}
+
+bool ShortcutDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalShortcutListPos =
+ originalShortcutDictContent->getShortcutListHeadPos(it->first);
+ if (originalShortcutListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const int shortcutListPos = getContentBuffer()->getTailPosition();
+ // Copy shortcut list from original content.
+ if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
+ shortcutListPos)) {
+ AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
+ originalShortcutListPos, shortcutListPos);
+ return false;
+ }
+ // Set shortcut list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
+ AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
+ it->second, shortcutListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
+ const int shortcutListListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
+}
+
+bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
+ return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
+}
+
+bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int writingPos = toPos;
+ int codePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ int probability = 0;
+ int codePointCount = 0;
+ sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
+ codePoints, &codePointCount, &probability, &hasNext, &readingPos);
+ if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
+ hasNext, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = shortcutListBuffer->readUint(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
+ return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+}
+
+bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
+ if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
+ return false;
+ }
+ if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
+ true /* writesTerminator */, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
+ return false;
+ }
+ return true;
+}
+
+// Find a shortcut entry that has specified target and return its position.
+int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int targetCodePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ const int entryPos = readingPos;
+ int probability = 0;
+ int targetCodePointCount = 0;
+ getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
+ &probability, &hasNext, &readingPos);
+ if (targetCodePointCount != codePointCount) {
+ continue;
+ }
+ bool matched = true;
+ for (int i = 0; i < codePointCount; ++i) {
+ if (targetCodePointsToFind[i] != targetCodePoints[i]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ return entryPos;
+ }
+ }
+ return NOT_A_DICT_POS;
+}
+
+int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
+ const bool hasNext) const {
+ return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h
new file mode 100644
index 000000000..179cec5bb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class ShortcutDictContent : public SparseTableDictContent {
+ public:
+ ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ ShortcutDictContent()
+ : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
+ const int shortcutEntryPos) {
+ int readingPos = shortcutEntryPos;
+ return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
+ outCodePointCount, outProbability, outhasNext, &readingPos);
+ }
+
+ void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const;
+
+ // Returns head position of shortcut list for a PtNode specified by terminalId.
+ int getShortcutListHeadPos(const int terminalId) const;
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent);
+
+ bool createNewShortcutList(const int terminalId);
+
+ bool copyShortcutList(const int shortcutListPos, const int toPos);
+
+ bool setProbability(const int probability, const int shortcutEntryPos);
+
+ bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
+ const int probability, const bool hasNext, const int shortcutEntryPos) {
+ int writingPos = shortcutEntryPos;
+ return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
+ hasNext, &writingPos);
+ }
+
+ bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos);
+
+ int findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
+
+ bool copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
+
+ int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
new file mode 100644
index 000000000..6433650b0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class SingleDictContent : public DictContent {
+ public:
+ SingleDictContent(const char *const dictPath, const char *const contentFileName,
+ const bool isUpdatable)
+ : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
+ mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mIsValid(mMmappedBuffer) {}
+
+ SingleDictContent()
+ : mMmappedBuffer(nullptr),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {}
+
+ virtual ~SingleDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
+ return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ contentFileNameSuffix, &mExpandableContentBuffer);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ const bool mIsValid;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp
new file mode 100644
index 000000000..7c9b4967a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool SparseTableDictContent::flush(const char *const dictPath,
+ const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
+ const char *const contentFileNameSuffix) const {
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
+ &mExpandableLookupTableBuffer)){
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
+ &mExpandableAddressTableBuffer)) {
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
+ &mExpandableContentBuffer)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
new file mode 100644
index 000000000..c7233edd3
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent : public DictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
+ const char *const lookupTableFileName, const char *const addressTableFileName,
+ const char *const contentFileName, const bool isUpdatable,
+ const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
+ mAddressTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
+ mContentBuffer(
+ MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableLookupTableBuffer(
+ mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
+ mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
+ mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
+ mContentBuffer ? mContentBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize),
+ mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {}
+
+ SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(),
+ mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableLookupTableBuffer.isNearSizeLimit()
+ || mExpandableAddressTableBuffer.isNearSizeLimit()
+ || mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
+ const SparseTable *getAddressLookupTable() const {
+ return &mAddressLookupTable;
+ }
+
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getContentBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
+ const char *const addressTableFileName, const char *const contentFileName) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mContentBuffer;
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ SparseTable mAddressLookupTable;
+ const bool mIsValid;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp
new file mode 100644
index 000000000..a9f841779
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ return NOT_A_DICT_POS;
+ }
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+ return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
+ NOT_A_DICT_POS : terminalPos;
+}
+
+bool TerminalPositionLookupTable::setTerminalPtNodePosition(
+ const int terminalId, const int terminalPtNodePos) {
+ if (terminalId < 0) {
+ return NOT_A_DICT_POS;
+ }
+ while (terminalId >= mSize) {
+ // Write new entry.
+ if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
+ return false;
+ }
+ mSize++;
+ }
+ const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
+ terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
+ return getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+}
+
+bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
+ // If the used buffer size is smaller than the actual buffer size, regenerate the lookup
+ // table and write the new table to the file.
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ TerminalPositionLookupTable lookupTableToWrite;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPtNodePosition = getTerminalPtNodePosition(i);
+ if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
+ AKLOGE("Cannot set terminal position to lookupTableToWrite."
+ " terminalId: %d, position: %d", i, terminalPtNodePosition);
+ return false;
+ }
+ }
+ return lookupTableToWrite.flush(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ } else {
+ // We can simply use this lookup table because the buffer size has not been
+ // changed.
+ return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ }
+}
+
+bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
+ int removedEntryCount = 0;
+ int nextNewTerminalId = 0;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
+ if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
+ // This entry is a garbage.
+ removedEntryCount++;
+ } else {
+ // Give a new terminal id to the entry.
+ if (!getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
+ getEntryPos(nextNewTerminalId))) {
+ return false;
+ }
+ // Memorize the mapping to the old terminal id to the new terminal id.
+ terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
+ nextNewTerminalId++;
+ }
+ }
+ mSize = nextNewTerminalId;
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h
new file mode 100644
index 000000000..eadfe0faa
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
+
+#include <unordered_map>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class TerminalPositionLookupTable : public SingleDictContent {
+ public:
+ typedef std::unordered_map<int, int> TerminalIdMap;
+
+ TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
+ : SingleDictContent(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
+ mSize(getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
+
+ TerminalPositionLookupTable() : mSize(0) {}
+
+ int getTerminalPtNodePosition(const int terminalId) const;
+
+ bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
+
+ int getNextTerminalId() const {
+ return mSize;
+ }
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
+
+ int getEntryPos(const int terminalId) const {
+ return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ }
+
+ int mSize;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif // LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h
new file mode 100644
index 000000000..941fda748
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mShortcutDictContent(shortcutDictContent) {}
+
+ ~Ver4ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ // The first shortcut entry is located at the head position of the shortcut list.
+ return pos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ int probability = 0;
+ mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
+ outCodePoint, outCodePointCount, &probability, outHasNext, pos);
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
+ }
+
+ bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
+ const int probability) {
+ const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (shortcutListPos == NOT_A_DICT_POS) {
+ // Create shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
+ false /* hasNext */, writingPos);
+ }
+ const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
+ codePoints, codePointCount);
+ if (entryPos == NOT_A_DICT_POS) {
+ // Add new entry to the shortcut list.
+ // Create new shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
+ codePointCount, probability, true /* hasNext */, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
+ writingPos);
+ return false;
+ }
+ return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
+ }
+ // Overwrite existing entry.
+ bool hasNext = false;
+ mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
+ 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
+ if (!mShortcutDictContent->writeShortcutEntry(codePoints,
+ codePointCount, probability, hasNext, entryPos)) {
+ AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
+ entryPos);
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
+
+ ShortcutDictContent *const mShortcutDictContent;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif // LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
new file mode 100644
index 000000000..93f192976
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+
+#include <cerrno>
+#include <cstring>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
+ const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ if (!headerBuffer) {
+ ASSERT(false);
+ AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ // TODO: take only dictDirPath, and open both header and trie files in the constructor below
+ const bool isUpdatable = headerBuffer->isUpdatable();
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
+ formatVersion));
+}
+
+bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const {
+ // Create temporary directory.
+ const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ char tmpDirPath[tmpDirPathBufSize];
+ FileUtils::getFilePathWithSuffix(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
+ tmpDirPath);
+ if (FileUtils::existsDir(tmpDirPath)) {
+ if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
+ ASSERT(false);
+ return false;
+ }
+ }
+ umask(S_IWGRP | S_IWOTH);
+ if (mkdir(tmpDirPath, S_IRWXU) == -1) {
+ AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
+ return false;
+ }
+ // Get dictionary base path.
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
+ char dictPath[dictPathBufSize];
+ FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
+
+ // Write header file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
+ AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION);
+ return false;
+ }
+ // Write trie file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
+ AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION);
+ return false;
+ }
+ // Write dictionary contents.
+ if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
+ AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mProbabilityDictContent.flushToFile(dictPath)) {
+ AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mBigramDictContent.flushToFile(dictPath)) {
+ AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mShortcutDictContent.flushToFile(dictPath)) {
+ AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ // Remove existing dictionary.
+ if (!FileUtils::removeDirAndFiles(dictDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ // Rename temporary directory.
+ if (rename(tmpDirPath, dictDirPath) != 0) {
+ AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ return true;
+}
+
+Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
+ MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion)
+ : mHeaderBuffer(std::move(headerBuffer)),
+ mDictBuffer(MmappedBuffer::openBuffer(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
+ mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
+ mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
+ mDictBuffer ? mDictBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mTerminalPositionLookupTable(dictPath, isUpdatable),
+ mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
+ mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
+ mShortcutDictContent(dictPath, isUpdatable),
+ mIsUpdatable(isUpdatable) {}
+
+Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
+ : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
+ mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
+ mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
+ mIsUpdatable(true) {}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h
new file mode 100644
index 000000000..e775be52e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H
+#define LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H
+
+#include <memory>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+class Ver4DictBuffers {
+ public:
+ typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
+ const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid()
+ && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
+ && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mExpandableTrieBuffer.isNearSizeLimit()
+ || mTerminalPositionLookupTable.isNearSizeLimit()
+ || mProbabilityDictContent.isNearSizeLimit()
+ || mBigramDictContent.isNearSizeLimit()
+ || mShortcutDictContent.isNearSizeLimit();
+ }
+
+ AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
+ return &mExpandableHeaderBuffer;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ bool flush(const char *const dictDirPath) const {
+ return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
+ }
+
+ bool flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
+
+ Ver4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
+
+ const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mExpandableHeaderBuffer;
+ BufferWithExtendableBuffer mExpandableTrieBuffer;
+ TerminalPositionLookupTable mTerminalPositionLookupTable;
+ ProbabilityDictContent mProbabilityDictContent;
+ BigramDictContent mBigramDictContent;
+ ShortcutDictContent mShortcutDictContent;
+ const int mIsUpdatable;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp
new file mode 100644
index 000000000..81d85f495
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// These values MUST match the definitions in FormatSpec.java.
+const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
+const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
+const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
+// tat = Terminal Address Table
+const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
+const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
+const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
+const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
+const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
+ ".shortcut_index_shortcut";
+
+// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
+const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
+// Extended region size, which is not GCed region size in dict file + additional buffer size, is
+// limited to 1MB to prevent from inefficient traversing.
+const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
+
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
+const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
+const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
+const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
+const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+
+const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
+const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
+const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h
new file mode 100644
index 000000000..88ebd6a75
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H
+#define LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H
+
+#include "defines.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// TODO: Create PtConstants under the pt_common and move some constant values there.
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const TRIE_FILE_EXTENSION;
+ static const char *const HEADER_FILE_EXTENSION;
+ static const char *const FREQ_FILE_EXTENSION;
+ static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_FILE_EXTENSION;
+ static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_FILE_EXTENSION;
+ static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+
+ static const int MAX_DICTIONARY_SIZE;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ static const int NOT_A_TERMINAL_ADDRESS;
+ static const int TERMINAL_ID_FIELD_SIZE;
+ static const int TIME_STAMP_FIELD_SIZE;
+ static const int WORD_LEVEL_FIELD_SIZE;
+ static const int WORD_COUNT_FIELD_SIZE;
+
+ static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+
+ static const int BIGRAM_FLAGS_FIELD_SIZE;
+ static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_PROBABILITY_MASK;
+ static const int BIGRAM_HAS_NEXT_MASK;
+ // Used when bigram list has time stamp.
+ static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
+
+ static const int SHORTCUT_FLAGS_FIELD_SIZE;
+ static const int SHORTCUT_PROBABILITY_MASK;
+ static const int SHORTCUT_HAS_NEXT_MASK;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..82399f190
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ const ProbabilityEntry probabilityEntry =
+ mProbabilityDictContent->getProbabilityEntry(terminalId);
+ if (probabilityEntry.hasHistoricalInfo()) {
+ probability = ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPtReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newSiblingNodePos);
+ }
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h
new file mode 100644
index 000000000..1999a51a6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class ProbabilityDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form probabilityBuffer.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const ProbabilityDictContent *const probabilityDictContent,
+ const HeaderPolicy *const headerPolicy)
+ : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
+ mHeaderPolicy(headerPolicy) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const ProbabilityDictContent *const mProbabilityDictContent;
+ const HeaderPolicy *const mHeaderPolicy;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos) const;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
new file mode 100644
index 000000000..4220a9561
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -0,0 +1,429 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+
+#include "suggest/core/dictionary/property/unigram_property.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->isTerminal()) {
+ // The PtNode is a terminal. Delete entry from the terminal position lookup table.
+ return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
+ } else {
+ return true;
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
+ // Update children's parent position.
+ mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
+ while (!mReadingHelper.isEnd()) {
+ const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ mReadingHelper.readNextSiblingNode(childPtNodeParams);
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ false /* isDeleted */, true /* willBecomeNonTerminal */);
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
+ AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ // Update flags.
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) {
+ // Update probability and historical information.
+ // TODO: Update other information in the unigram property.
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
+ unigramProperty);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ if (originalProbabilityEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
+ AKLOGE("Cannot write updated probability entry. terminalId: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
+ if (!isValid) {
+ if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
+ return false;
+ }
+ }
+ *outNeedsToKeepPtNode = isValid;
+ } else {
+ // No need to update probability.
+ *outNeedsToKeepPtNode = true;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ newChildrenPosition, &childrenPosFieldPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId) {
+ return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
+ toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
+ return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
+ ptNodeWritingPos);
+}
+
+
+bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
+ int *const ptNodeWritingPos) {
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
+ ptNodeWritingPos)) {
+ return false;
+ }
+ // Write probability.
+ ProbabilityEntry newProbabilityEntry;
+ const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
+ &newProbabilityEntry, unigramProperty);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
+ &probabilityEntryToWrite);
+}
+
+bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
+ if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+ AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
+ sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ return false;
+ }
+ if (!sourcePtNodeParams->hasBigrams()) {
+ // Update has bigrams flag.
+ return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
+ sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
+ sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
+ true /* hasBigrams */,
+ sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
+ return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId());
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
+ return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
+ sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) {
+ int parentPos = toBeUpdatedPtNodeParams->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
+ return false;
+ }
+
+ // Counts bigram entries.
+ if (outBigramEntryCount) {
+ *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
+ targetCodePoints, targetCodePointCount, shortcutProbability)) {
+ AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ return false;
+ }
+ if (!ptNodeParams->hasShortcutTargets()) {
+ // Update has shortcut targets flag.
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(),
+ ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
+ ptNodeParams->hasBigrams(),
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
+ const PtNodeParams *const ptNodeParams) {
+ const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
+ const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
+ ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
+ hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos) {
+ const int nodePos = *ptNodeWritingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
+ 0 /* nodeFlags */, ptNodeWritingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
+ return false;
+ }
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!ptNodeParams->willBecomeNonTerminal()) {
+ if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ terminalId = ptNodeParams->getTerminalId();
+ } else if (ptNodeParams->isTerminal()) {
+ // Write terminal information using a new terminal id.
+ // Get a new unused terminal id.
+ terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
+ }
+ }
+ const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (isTerminal) {
+ // Update the lookup table.
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ terminalId, nodePos)) {
+ return false;
+ }
+ // Write terminal Id.
+ if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
+ Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
+ return false;
+ }
+ if (outTerminalId) {
+ *outTerminalId = terminalId;
+ }
+ }
+ // Write children position
+ if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
+ return false;
+ }
+ return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
+ unigramProperty->getLevel(), unigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalProbabilityEntry->getHistoricalInfo(),
+ unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
+ return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
+ &updatedHistoricalInfo);
+ } else {
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(
+ unigramProperty->getProbability());
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
+ const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
+ const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
+ hasShortcutTargets, hasBigrams, hasMultipleChars,
+ CHILDREN_POSITION_FIELD_SIZE);
+ if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
+ AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
+ return false;
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
new file mode 100644
index 000000000..08226ea26
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class Ver4BigramListPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PtNodeArrayReader;
+class Ver4ShortcutListPolicy;
+
+/*
+ * This class is used for helping to writes nodes of ver4 patricia trie.
+ */
+class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
+ public:
+ Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
+ Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
+ const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader,
+ Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
+ : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
+ mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
+ mShortcutPolicy(shortcutPolicy) {}
+
+ virtual ~Ver4PatriciaTrieNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty);
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition);
+
+ bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId);
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos);
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewBigram);
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam);
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount);
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability);
+
+ bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
+
+ bool writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos);
+
+ // Create updated probability entry using given unigram property. In addition to the
+ // probability, this method updates historical information if needed.
+ // TODO: Update flags belonging to the unigram property.
+ const ProbabilityEntry createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const;
+
+ bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
+ const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
+ const bool hasMultipleChars);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mTrieBuffer;
+ Ver4DictBuffers *const mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ DynamicPtReadingHelper mReadingHelper;
+ Ver4BigramListPolicy *const mBigramPolicy;
+ Ver4ShortcutListPolicy *const mShortcutPolicy;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
new file mode 100644
index 000000000..9780ae048
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,513 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
+
+#include <vector>
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/unigram_property.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "suggest/core/session/prev_words_info.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
+// BinaryDictionaryDecayingTests.
+const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
+const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy->isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
+ }
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ if (ptNodeParams.representsNonWordInfo()) {
+ // Skip PtNodes that represent non-word information.
+ continue;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ }
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
+ }
+ return codePointCount;
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const int ptNodePos =
+ readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+ return ptNodePos;
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy->isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return bigramProbability;
+ }
+ }
+}
+
+int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ shortcut.getTargetCodePoints()->size());
+ return false;
+ }
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ bool addedNewUnigram = false;
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
+ unigramProperty, &addedNewUnigram)) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
+ mUnigramCount++;
+ }
+ if (unigramProperty->getShortcuts().size() > 0) {
+ // Add shortcut target.
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (!mUpdatingHelper.addShortcutTarget(wordPos,
+ shortcut.getTargetCodePoints()->data(),
+ shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
+ shortcut.getProbability());
+ return false;
+ }
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ return false;
+ }
+ if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert the ngram to the dictionary. "
+ "length: %d", bigramProperty->getTargetCodePoints()->size());
+ return false;
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
+ const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ const UnigramProperty beginningOfSentenceUnigramProperty(
+ true /* representsBeginningOfSentence */, true /* isNotAWord */,
+ false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
+ &beginningOfSentenceUnigramProperty)) {
+ AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
+ return false;
+ }
+ // Refresh Terminal PtNode positions.
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ } else {
+ return false;
+ }
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(
+ bigramProperty->getTargetCodePoints()->data(),
+ bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewBigram = false;
+ if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
+ &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSerch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
+ AKLOGE("Cannot flush the dictionary to file.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
+ AKLOGE("Cannot flush the dictionary to file with GC.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBuffers->isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
+ > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size of the trie exceeds the limit.
+ return true;
+ } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy->isDecayingDict()) {
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ mHeaderPolicy);
+ }
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
+ char *const outResult, const int maxResultLength) {
+ const int compareLength = queryLength + 1 /* terminator */;
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getUnigramCountHardLimit(
+ mHeaderPolicy->getMaxUnigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getBigramCountHardLimit(
+ mHeaderPolicy->getMaxBigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ }
+}
+
+const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty is called for invalid word.");
+ return WordProperty();
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ ptNodeParams.getTerminalId());
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch bigram information.
+ std::vector<BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ if (bigramListPos != NOT_A_DICT_POS) {
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ const int word1TerminalId = bigramEntry.getTargetTerminalId();
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
+ if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
+ bigramEntry.getProbability();
+ bigrams.emplace_back(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount());
+ }
+ }
+ // Fetch shortcut information.
+ std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ const ShortcutDictContent *const shortcutDictContent =
+ mBuffers->getShortcutDictContent();
+ bool hasNext = true;
+ while (hasNext) {
+ int shortcutTargetLength = 0;
+ int shortcutProbability = NOT_A_PROBABILITY;
+ shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
+ const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
+ shortcuts.emplace_back(&target, shortcutProbability);
+ }
+ }
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount(), &shortcuts);
+ return WordProperty(&codePointVector, &unigramProperty, &bigrams);
+}
+
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
+ if (token == 0) {
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
new file mode 100644
index 000000000..16b1bd2c1
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
+ * Do not edit this file other than updating policy's interface.
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class DicNode;
+namespace backward {
+namespace v402 {
+} // namespace v402
+} // namespace backward
+class DicNodeVector;
+namespace backward {
+namespace v402 {
+
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
+ : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
+ mDictBuffer(mBuffers->getWritableTrieBuffer()),
+ mBigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
+ mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable()),
+ mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
+ mPtNodeArrayReader(mDictBuffer),
+ mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
+ &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
+ mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
+ mWritingHelper(mBuffers.get()),
+ mUnigramCount(mHeaderPolicy->getUnigramCount()),
+ mBigramCount(mHeaderPolicy->getBigramCount()),
+ mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutPolicy;
+ }
+
+ bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty);
+
+ bool removeUnigramEntry(const int *const word, const int length) {
+ // Removing unigram entry is not supported.
+ return false;
+ }
+
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty);
+
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
+ const int length1);
+
+ bool flush(const char *const filePath);
+
+ bool flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength);
+
+ const WordProperty getWordProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ bool isCorrupted() const {
+ return mIsCorrupted;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ static const char *const UNIGRAM_COUNT_QUERY;
+ static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
+ // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
+ // prevent the dictionary from overflowing.
+ static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+ const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ BufferWithExtendableBuffer *const mDictBuffer;
+ Ver4BigramListPolicy mBigramPolicy;
+ Ver4ShortcutListPolicy mShortcutPolicy;
+ Ver4PatriciaTrieNodeReader mNodeReader;
+ Ver4PtNodeArrayReader mPtNodeArrayReader;
+ Ver4PatriciaTrieNodeWriter mNodeWriter;
+ DynamicPtUpdatingHelper mUpdatingHelper;
+ Ver4PatriciaTrieWritingHelper mWritingHelper;
+ int mUnigramCount;
+ int mBigramCount;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
+ mutable bool mIsCorrupted;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..80d531198
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..3579c26d6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <cstdint>
+
+#include "defines.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..3fb4caa08
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
+
+#include <cstring>
+#include <queue>
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
+ const int unigramCount, const int bigramCount) const {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ AKLOGE("Cannot write header structure to buffer. "
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
+ "extendedRegionSize: %d", false, unigramCount, bigramCount,
+ extendedRegionSize);
+ return false;
+ }
+ return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const dictDirPath) {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
+ Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
+ return false;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ return false;
+ }
+ return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
+ mBuffers->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
+ Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
+ mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+
+ DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ &ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount();
+ const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
+ if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
+ if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
+ AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
+ maxUnigramCount);
+ return false;
+ }
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
+ const int maxBigramCount = headerPolicy->getMaxBigramCount();
+ if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
+ if (!truncateBigrams(maxBigramCount)) {
+ AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
+ return false;
+ }
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+ DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ &newShortcutPolicy);
+ // Re-assign terminal IDs for valid terminal PtNodes.
+ TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
+ if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
+ &terminalIdMap)) {
+ return false;
+ }
+ // Run GC for probability dict content.
+ if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
+ mBuffers->getProbabilityDictContent())) {
+ return false;
+ }
+ // Run GC for bigram dict content.
+ if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
+ mBuffers->getBigramDictContent(), outBigramCount)) {
+ return false;
+ }
+ // Run GC for shortcut dict content.
+ if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+ DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
+ const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
+ if (terminalPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ probabilityEntry.getProbability();
+ priorityQueue.push(DictProbability(terminalPos, probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+
+ // Delete unigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
+ const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
+ const PtNodeParams ptNodeParams =
+ ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
+ if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ bigramEntry.getProbability();
+ priorityQueue.push(DictProbability(entryPos, probability,
+ bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+ }
+
+ // Delete bigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
+ const int entryPos = priorityQueue.top().getDictPos();
+ const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
+ const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
+ AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isTerminal()) {
+ return true;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ mTerminalIdMap->find(ptNodeParams->getTerminalId());
+ if (it == mTerminalIdMap->end()) {
+ AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
+ ptNodeParams->getTerminalId(), mTerminalIdMap->size());
+ return false;
+ }
+ if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
+ AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
+ }
+ return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..9034ee656
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class HeaderPolicy;
+namespace backward {
+namespace v402 {
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PatriciaTrieNodeWriter;
+
+class Ver4PatriciaTrieWritingHelper {
+ public:
+ Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
+ : mBuffers(buffers) {}
+
+ bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
+ const int bigramCount) const;
+
+ // This method cannot be const because the original dictionary buffer will be updated to detect
+ // useless PtNodes during GC.
+ bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
+
+ class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
+ : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
+
+ Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
+ const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbability {
+ public:
+ DictProbability(const int dictPos, const int probability, const int timestamp)
+ : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
+
+ int getDictPos() const {
+ return mDictPos;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
+
+ int mDictPos;
+ int mProbability;
+ int mTimestamp;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbabilityComparator {
+ public:
+ bool operator()(const DictProbability &left, const DictProbability &right) {
+ if (left.getProbability() != right.getProbability()) {
+ return left.getProbability() > right.getProbability();
+ }
+ if (left.getTimestamp() != right.getTimestamp()) {
+ return left.getTimestamp() < right.getTimestamp();
+ }
+ return left.getDictPos() > right.getDictPos();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
+ };
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+
+ bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
+
+ bool truncateBigrams(const int maxBigramCount);
+
+ Ver4DictBuffers *const mBuffers;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+
+#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp
new file mode 100644
index 000000000..537a6d420
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
+ */
+
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = ptNodeArrayPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &readingPos);
+ if (usesAdditionalBuffer) {
+ readingPos += mBuffer->getOriginalBufferSize();
+ }
+ if (ptNodeCountInArray < 0) {
+ AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
+ return false;
+ }
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = forwordLinkPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int nextPtNodeArrayOffset =
+ DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
+ if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
+ *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
+ } else {
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ }
+ return true;
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h
new file mode 100644
index 000000000..4f8056801
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
+ */
+
+#ifndef LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H
+#define LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+} // namespace v402
+} // namespace backward
+class BufferWithExtendableBuffer;
+namespace backward {
+namespace v402 {
+
+class Ver4PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace v402
+} // namespace backward
+} // namespace latinime
+#endif /* LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..e4b5fa267
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <climits>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
+ const char *const path, const int bufOffset, const int size,
+ const bool isUpdatable) {
+ if (FileUtils::existsDir(path)) {
+ // Given path represents a directory.
+ return newPolicyForDirectoryDict(path, isUpdatable);
+ } else {
+ if (isUpdatable) {
+ AKLOGE("One file dictionaries don't support updating. path: %s", path);
+ ASSERT(false);
+ return nullptr;
+ }
+ return newPolicyForFileDict(path, bufOffset, size);
+ }
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict(
+ const int formatVersion, const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
+ switch (dictFormatVersion) {
+ case FormatUtils::VERSION_4: {
+ return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v402::Ver4PatriciaTriePolicy>(
+ dictFormatVersion, locale, attributeMap);
+ }
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV: {
+ return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
+ dictFormatVersion, locale, attributeMap);
+ }
+ default:
+ AKLOGE("DICT: dictionary format %d is not supported for on memory dictionary",
+ formatVersion);
+ break;
+ }
+ return nullptr;
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryV4Dict(
+ const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ HeaderPolicy headerPolicy(formatVersion, locale, attributeMap);
+ DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
+ DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
+ if (!DynamicPtWritingUtils::writeEmptyDictionary(
+ dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
+ return nullptr;
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new StructurePolicy(std::move(dictBuffers)));
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForDirectoryDict(
+ const char *const path, const bool isUpdatable) {
+ const int headerFilePathBufSize = PATH_MAX + 1 /* terminator */;
+ char headerFilePath[headerFilePathBufSize];
+ getHeaderFilePathInDictDir(path, headerFilePathBufSize, headerFilePath);
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // MmappedBufferPtr if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer =
+ MmappedBuffer::openBuffer(headerFilePath, isUpdatable);
+ if (!mmappedBuffer) {
+ return nullptr;
+ }
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
+ mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
+ switch (formatVersion) {
+ case FormatUtils::VERSION_2:
+ AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
+ break;
+ case FormatUtils::VERSION_4: {
+ return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v402::Ver4PatriciaTriePolicy>(
+ headerFilePath, formatVersion, std::move(mmappedBuffer));
+ }
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV: {
+ return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
+ headerFilePath, formatVersion, std::move(mmappedBuffer));
+ }
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
+ break;
+ }
+ ASSERT(false);
+ return nullptr;
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForV4Dict(
+ const char *const headerFilePath, const FormatUtils::FORMAT_VERSION formatVersion,
+ MmappedBuffer::MmappedBufferPtr &&mmappedBuffer) {
+ const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
+ char dictPath[dictDirPathBufSize];
+ if (!FileUtils::getFilePathWithoutSuffix(headerFilePath,
+ DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) {
+ AKLOGE("Dictionary file name is not valid as a ver4 dictionary. header path: %s",
+ headerFilePath);
+ ASSERT(false);
+ return nullptr;
+ }
+ DictBuffersPtr dictBuffers =
+ DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer), formatVersion);
+ if (!dictBuffers || !dictBuffers->isValid()) {
+ AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
+ dictPath);
+ ASSERT(false);
+ return nullptr;
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new StructurePolicy(std::move(dictBuffers)));
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForFileDict(
+ const char *const path, const int bufOffset, const int size) {
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // MmappedBufferPtr if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer(
+ MmappedBuffer::openBuffer(path, bufOffset, size, false /* isUpdatable */));
+ if (!mmappedBuffer) {
+ return nullptr;
+ }
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
+ mmappedBuffer->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new PatriciaTriePolicy(std::move(mmappedBuffer)));
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_4_DEV:
+ AKLOGE("Given path is a file but the format is version 4. path: %s", path);
+ break;
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
+ break;
+ }
+ ASSERT(false);
+ return nullptr;
+}
+
+/* static */ void DictionaryStructureWithBufferPolicyFactory::getHeaderFilePathInDictDir(
+ const char *const dictDirPath, const int outHeaderFileBufSize,
+ char *const outHeaderFilePath) {
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ snprintf(outHeaderFilePath, outHeaderFileBufSize, "%s/%s%s", dictDirPath,
+ dictName, Ver4DictConstants::HEADER_FILE_EXTENSION);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
new file mode 100644
index 000000000..768454d8d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class DictionaryStructureWithBufferPolicyFactory {
+ public:
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForExistingDictFile(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForOnMemoryDict(const int formatVersion, const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForOnMemoryV4Dict(const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForDirectoryDict(const char *const path, const bool isUpdatable);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr newPolicyForV4Dict(
+ const char *const headerFilePath, const FormatUtils::FORMAT_VERSION formatVersion,
+ MmappedBuffer::MmappedBufferPtr &&mmappedBuffer);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForFileDict(const char *const path, const int bufOffset, const int size);
+
+ static void getHeaderFilePathInDictDir(const char *const dirPath,
+ const int outHeaderFileBufSize, char *const outHeaderFilePath);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
new file mode 100644
index 000000000..08b4e0b5e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
+ 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
+ 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+
+/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
+ int *const outTargetPtNodePos, int *const bigramEntryPos) {
+ const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
+ bigramEntryPos);
+ if (outBigramFlags) {
+ *outBigramFlags = bigramFlags;
+ }
+ const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
+ bigramEntryPos);
+ if (outTargetPtNodePos) {
+ *outTargetPtNodePos = targetPos;
+ }
+}
+
+/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
+ int *const bigramListPos) {
+ BigramFlags flags;
+ do {
+ getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
+ bigramListPos);
+ } while(hasNext(flags));
+}
+
+/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
+ const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
+ int offset = 0;
+ const int origin = *pos;
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
+ break;
+ }
+ if (isOffsetNegative(flags)) {
+ return origin - offset;
+ } else {
+ return origin + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
index eabe4e099..15f924a6a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
@@ -17,8 +17,8 @@
#ifndef LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
#define LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
+#include <cstdint>
#include <cstdlib>
-#include <stdint.h>
#include "defines.h"
@@ -45,34 +45,6 @@ public:
// Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
- // Returns the size of the bigram position field that is stored in bigram flags.
- static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
- return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
- /* Note: this is a value-dependant optimization of what may probably be
- more readably written this way:
- switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
- default: return 0;
- }
- */
- }
-
- static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
- const bool hasNext, const int entryPos);
-
- static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
- const int probability) {
- return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
- }
-
- static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
- const int targetPos, const int probability, const bool hasNext, int *const writingPos);
-
- static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
- const int targetOffset, int *const writingPos);
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@@ -83,11 +55,6 @@ private:
static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
- static const int ATTRIBUTE_ADDRESS_SHIFT;
-
- // Returns true if the bigram entry is valid and put entry flags into out*.
- static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
- const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
@@ -95,8 +62,6 @@ private:
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
-
- static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
new file mode 100644
index 000000000..db1a802d0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+
+namespace latinime {
+
+bool DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
+ // children.
+ bool isUselessPtNode = !ptNodeParams->isTerminal();
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
+ bool needsToKeepPtNode = true;
+ if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ ptNodeParams, &needsToKeepPtNode)) {
+ AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC.");
+ return false;
+ }
+ if (!needsToKeepPtNode) {
+ isUselessPtNode = true;
+ }
+ }
+ if (mChildrenValue > 0) {
+ isUselessPtNode = false;
+ } else if (ptNodeParams->isTerminal()) {
+ // Remove children as all children are useless.
+ if (!mPtNodeWriter->updateChildrenPosition(ptNodeParams,
+ NOT_A_DICT_POS /* newChildrenPosition */)) {
+ return false;
+ }
+ }
+ if (isUselessPtNode) {
+ // Current PtNode is no longer needed. Mark it as deleted.
+ if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) {
+ return false;
+ }
+ } else {
+ mValueStack.back() += 1;
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
+ mValidUnigramCount += 1;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isDeleted()) {
+ int bigramEntryCount = 0;
+ if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
+ &bigramEntryCount)) {
+ return false;
+ }
+ mValidBigramEntryCount += bigramEntryCount;
+ }
+ return true;
+}
+
+// Writes dummy PtNode array size when the head of PtNode array is read.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onDescend(const int ptNodeArrayPos) {
+ mValidPtNodeCount = 0;
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::value_type(ptNodeArrayPos, writingPos));
+ // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
+ // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
+ mPtNodeArraySizeFieldPos = writingPos;
+ return DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, 0 /* arraySize */, &writingPos);
+}
+
+// Write PtNode array terminal and actual PtNode array size.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onReadingPtNodeArrayTail() {
+ int writingPos = mBufferToWrite->getTailPosition();
+ // Write PtNode array terminal.
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Write actual PtNode array size.
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isDeleted()) {
+ // Current PtNode is not written in new buffer because it has been deleted.
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ PtNodeWriter::PtNodePositionRelocationMap::value_type(
+ ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
+ return true;
+ }
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ PtNodeWriter::PtNodePositionRelocationMap::value_type(
+ ptNodeParams->getHeadPos(), writingPos));
+ mValidPtNodeCount++;
+ // Writes current PtNode.
+ return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
+}
+
+bool DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ // Updates parent position.
+ int bigramCount = 0;
+ if (!mPtNodeWriter->updateAllPositionFields(ptNodeParams, mDictPositionRelocationMap,
+ &bigramCount)) {
+ return false;
+ }
+ mBigramCount += bigramCount;
+ if (ptNodeParams->isTerminal()) {
+ mUnigramCount++;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
index 9755120b0..2aa402748 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
@@ -14,37 +14,31 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+#ifndef LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
+#define LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
-class DictionaryHeaderStructurePolicy;
+class PtNodeParams;
-class DynamicPatriciaTrieGcEventListeners {
+class DynamicPtGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
// TODO: Concatenate non-terminal PtNodes.
class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- const DictionaryHeaderStructurePolicy *const headerPolicy,
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
- : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
- mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
+ PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -66,8 +60,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidUnigramCount() const {
return mValidUnigramCount;
@@ -77,10 +70,7 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
- const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
- BufferWithExtendableBuffer *const mBuffer;
- const bool mIsDecayingDict;
+ PtNodeWriter *const mPtNodeWriter;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
@@ -89,11 +79,10 @@ class DynamicPatriciaTrieGcEventListeners {
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class TraversePolicyToUpdateBigramProbability
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateBigramProbability(
- DynamicBigramListPolicy *const bigramPolicy)
- : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
+ TraversePolicyToUpdateBigramProbability(PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
@@ -101,8 +90,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;
@@ -111,19 +99,17 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
- DynamicBigramListPolicy *const mBigramPolicy;
+ PtNodeWriter *const mPtNodeWriter;
int mValidBigramEntryCount;
};
class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const bufferToWrite,
- DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- dictPositionRelocationMap)
- : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
+ PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite,
+ PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
+ : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
@@ -133,31 +119,24 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail();
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBufferToWrite;
- DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- mDictPositionRelocationMap;
+ PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
int mValidPtNodeCount;
int mPtNodeArraySizeFieldPos;
};
class TraversePolicyToUpdateAllPositionFields
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateAllPositionFields(
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- DynamicBigramListPolicy *const bigramPolicy,
- BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- dictPositionRelocationMap)
- : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
- mBufferToWrite(bufferToWrite),
+ TraversePolicyToUpdateAllPositionFields(PtNodeWriter *const ptNodeWriter,
+ const PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
+ : mPtNodeWriter(ptNodeWriter),
mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
mBigramCount(0) {};
@@ -167,8 +146,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getUnigramCount() const {
return mUnigramCount;
@@ -181,17 +159,14 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
- DynamicBigramListPolicy *const mBigramPolicy;
- BufferWithExtendableBuffer *const mBufferToWrite;
- const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- mDictPositionRelocationMap;
+ PtNodeWriter *const mPtNodeWriter;
+ const PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
int mUnigramCount;
int mBigramCount;
};
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtGcEventListeners);
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
+#endif /* LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
new file mode 100644
index 000000000..086d98b4a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+#include "utils/char_utils.h"
+
+namespace latinime {
+
+// To avoid infinite loop caused by invalid or malicious forward links.
+const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+
+bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode(
+ const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) {
+ mTerminalPositions->push_back(ptNodeParams->getHeadPos());
+ }
+ return true;
+}
+
+// Visits all PtNodes in post-order depth first manner.
+// For example, visits c -> b -> y -> x -> a for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ if (!alreadyVisitedChildren) {
+ if (ptNodeParams.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode(ptNodeParams);
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
+ return false;
+ }
+ readNextSiblingNode(ptNodeParams);
+ if (isEnd()) {
+ // All PtNodes in current linked PtNode arrays have been visited.
+ // Return to the parent.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ if (!listener->onAscend()) {
+ return false;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ } else {
+ // Process sibling PtNode.
+ alreadyVisitedChildren = false;
+ }
+ }
+ }
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
+// that PtNodes are written in the dictionary buffer.
+// For example, visits a -> b -> x -> c -> y for the following dictionary:
+// a _ b _ c
+// \ x _ y
+bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener) {
+ bool alreadyVisitedAllPtNodesInArray = false;
+ bool alreadyVisitedChildren = false;
+ // Descend from the root to the root PtNode array.
+ if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
+ return false;
+ }
+ if (isEnd()) {
+ // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ }
+ pushReadingStateToStack();
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ if (alreadyVisitedAllPtNodesInArray) {
+ if (alreadyVisitedChildren) {
+ // Move to next sibling PtNode's children.
+ readNextSiblingNode(ptNodeParams);
+ if (isEnd()) {
+ // Return to the parent PTNode.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
+ popReadingStateFromStack();
+ alreadyVisitedChildren = true;
+ alreadyVisitedAllPtNodesInArray = true;
+ } else {
+ alreadyVisitedChildren = false;
+ }
+ } else {
+ if (ptNodeParams.hasChildren()) {
+ // Move to the first child.
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
+ return false;
+ }
+ pushReadingStateToStack();
+ readChildNode(ptNodeParams);
+ // Push state to return the head of PtNode array.
+ pushReadingStateToStack();
+ alreadyVisitedAllPtNodesInArray = false;
+ alreadyVisitedChildren = false;
+ } else {
+ alreadyVisitedChildren = true;
+ }
+ }
+ } else {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
+ return false;
+ }
+ readNextSiblingNode(ptNodeParams);
+ if (isEnd()) {
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ // Return to the head of current PtNode array.
+ popReadingStateFromStack();
+ alreadyVisitedAllPtNodesInArray = true;
+ }
+ }
+ }
+ popReadingStateFromStack();
+ // Ascend from the root PtNode array to the root.
+ if (!listener->onAscend()) {
+ return false;
+ }
+ return !isError();
+}
+
+int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ const PtNodeParams terminalPtNodeParams(getPtNodeParams());
+ // First, read the terminal node and get its probability.
+ if (!isValidTerminalNode(terminalPtNodeParams)) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = terminalPtNodeParams.getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ int totalCodePointCount = 0;
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ totalCodePointCount = getTotalCodePointCount(ptNodeParams);
+ if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
+ reverseCodePoints);
+ // Follow parent node toward the root node.
+ readParentNode(ptNodeParams);
+ }
+ if (isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ for (int i = 0; i < totalCodePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
+ }
+ return totalCodePointCount;
+}
+
+int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ const int matchedCodePointCount = getPrevTotalCodePointCount();
+ if (getTotalCodePointCount(ptNodeParams) > length
+ || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == getTotalCodePointCount(ptNodeParams)) {
+ if (!ptNodeParams.isTerminal()) {
+ return NOT_A_DICT_POS;
+ }
+ // Terminal position is found.
+ return ptNodeParams.getHeadPos();
+ }
+ if (!ptNodeParams.hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readChildNode(ptNodeParams);
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
+// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
+// method to avoid an infinite loop.
+void DynamicPtReadingHelper::nextPtNodeArray() {
+ int ptNodeCountInArray = 0;
+ int firstPtNodePos = NOT_A_DICT_POS;
+ if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid(
+ mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) {
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
+ mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray;
+ mReadingState.mPos = firstPtNodePos;
+ // Count up nodes and node arrays to avoid infinite loop.
+ mReadingState.mTotalPtNodeIndexInThisArrayChain +=
+ mReadingState.mRemainingPtNodeCountInThisArray;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain++;
+ if (mReadingState.mRemainingPtNodeCountInThisArray < 0
+ || mReadingState.mTotalPtNodeIndexInThisArrayChain
+ > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mPtNodeArrayIndexInThisArrayChain
+ > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ // Invalid dictionary.
+ AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
+ "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
+ mReadingState.mRemainingPtNodeCountInThisArray,
+ mReadingState.mTotalPtNodeIndexInThisArrayChain,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
+ mReadingState.mPtNodeArrayIndexInThisArrayChain,
+ MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
+ // Empty node array. Try following forward link.
+ followForwardLink();
+ }
+}
+
+// Follow the forward link and read the next node array if exists.
+void DynamicPtReadingHelper::followForwardLink() {
+ int nextPtNodeArrayPos = NOT_A_DICT_POS;
+ if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid(
+ mReadingState.mPos, &nextPtNodeArrayPos)) {
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
+ mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
+ if (nextPtNodeArrayPos != NOT_A_DICT_POS) {
+ // Follow the forward link.
+ mReadingState.mPos = nextPtNodeArrayPos;
+ nextPtNodeArray();
+ } else {
+ // All node arrays have been read.
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
new file mode 100644
index 000000000..2e05bf397
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H
+#define LATINIME_DYNAMIC_PT_READING_HELPER_H
+
+#include <cstddef>
+#include <vector>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+class PtNodeArrayReader;
+
+/*
+ * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
+ * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
+ */
+class DynamicPtReadingHelper {
+ public:
+ class TraversingEventListener {
+ public:
+ virtual ~TraversingEventListener() {};
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onAscend() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onDescend(const int ptNodeArrayPos) = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onReadingPtNodeArrayTail() = 0;
+
+ // Returns whether the event handling was succeeded or not.
+ virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
+
+ protected:
+ TraversingEventListener() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
+ };
+
+ class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener {
+ public:
+ TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions)
+ : mTerminalPositions(terminalPositions) {}
+ bool onAscend() { return true; }
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+ bool onReadingPtNodeArrayTail() { return true; }
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions);
+
+ std::vector<int> *const mTerminalPositions;
+ };
+
+ DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader)
+ : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader),
+ mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {}
+
+ ~DynamicPtReadingHelper() {}
+
+ AK_FORCE_INLINE bool isError() const {
+ return mIsError;
+ }
+
+ AK_FORCE_INLINE bool isEnd() const {
+ return mReadingState.mPos == NOT_A_DICT_POS;
+ }
+
+ // Initialize reading state with the head position of a PtNode array.
+ AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
+ if (ptNodeArrayPos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mReadingState.mPos = ptNodeArrayPos;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ nextPtNodeArray();
+ }
+ }
+
+ // Initialize reading state with the head position of a node.
+ AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mIsError = false;
+ mReadingState.mPos = ptNodePos;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingStateStack.clear();
+ }
+ }
+
+ AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
+ if (isEnd()) {
+ return PtNodeParams();
+ }
+ return mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(mReadingState.mPos);
+ }
+
+ AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
+ return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
+ }
+
+ AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
+ const int codePoint) const {
+ return ptNodeParams.getCodePoints()[index] == codePoint;
+ }
+
+ // Return code point count exclude the last read node's code points.
+ AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
+ return mReadingState.mTotalCodePointCountSinceInitialization;
+ }
+
+ // Return code point count include the last read node's code points.
+ AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
+ return mReadingState.mTotalCodePointCountSinceInitialization
+ + ptNodeParams.getCodePointCount();
+ }
+
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
+ const int index, int *const outCodePoints) const {
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ const int *const nodeCodePoints = ptNodeParams.getCodePoints();
+ for (int i = 0; i < nodeCodePointCount; ++i) {
+ outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
+ }
+ }
+
+ AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
+ mReadingState.mRemainingPtNodeCountInThisArray -= 1;
+ mReadingState.mPos = ptNodeParams.getSiblingNodePos();
+ if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
+ // All nodes in the current node array have been read.
+ followForwardLink();
+ }
+ }
+
+ // Read the first child node of the current node.
+ AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.hasChildren()) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPos = ptNodeParams.getChildrenPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ // Read children node array.
+ nextPtNodeArray();
+ } else {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ // Read the parent node of the current node.
+ AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mPos = ptNodeParams.getParentPos();
+ mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
+ } else {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
+ AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
+ return mReadingState.mPosOfLastForwardLinkField;
+ }
+
+ AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
+ return mReadingState.mPosOfThisPtNodeArrayHead;
+ }
+
+ bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
+
+ bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ TraversingEventListener *const listener);
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
+ int *const outCodePoints, int *const outUnigramProbability);
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
+ const bool forceLowerCaseSearch);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper);
+
+ // This class encapsulates the reading state of a position in the dictionary. It points at a
+ // specific PtNode in the dictionary.
+ class PtNodeReadingState {
+ public:
+ // Note that copy constructor and assignment operator are used for this class to use
+ // std::vector.
+ PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
+ mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
+ mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
+
+ int mPos;
+ // Remaining node count in the current array.
+ int mRemainingPtNodeCountInThisArray;
+ int mTotalCodePointCountSinceInitialization;
+ // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
+ int mTotalPtNodeIndexInThisArrayChain;
+ // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
+ // PtNode arrays.
+ int mPtNodeArrayIndexInThisArrayChain;
+ int mPosOfLastForwardLinkField;
+ int mPosOfThisPtNodeArrayHead;
+ };
+
+ static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const size_t MAX_READING_STATE_STACK_SIZE;
+
+ // TODO: Introduce error code to track what caused the error.
+ bool mIsError;
+ PtNodeReadingState mReadingState;
+ const PtNodeReader *const mPtNodeReader;
+ const PtNodeArrayReader *const mPtNodeArrayReader;
+ std::vector<PtNodeReadingState> mReadingStateStack;
+
+ void nextPtNodeArray();
+
+ void followForwardLink();
+
+ AK_FORCE_INLINE void pushReadingStateToStack() {
+ if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
+ AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingStateStack.push_back(mReadingState);
+ }
+ }
+
+ AK_FORCE_INLINE void popReadingStateFromStack() {
+ if (mReadingStateStack.empty()) {
+ mReadingState.mPos = NOT_A_DICT_POS;
+ } else {
+ mReadingState = mReadingStateStack.back();
+ mReadingStateStack.pop_back();
+ }
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
index d68446db6..3586b50ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
@@ -14,38 +14,38 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
-typedef DynamicPatriciaTrieReadingUtils DptReadingUtils;
-
-const DptReadingUtils::NodeFlags DptReadingUtils::MASK_MOVED = 0xC0;
-const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
-const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
-const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::MASK_MOVED = 0xC0;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_MOVED = 0x40;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_DELETED = 0x80;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_WILL_BECOME_NON_TERMINAL = 0x00;
// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
-const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
-const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+const int DynamicPtReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
-/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
+/* static */ int DynamicPtReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
const int pos) {
int linkAddressPos = pos;
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
}
-/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+/* static */ int DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
-/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
+/* static */ int DynamicPtReadingUtils::getParentPtNodePos(const int parentOffset,
+ const int ptNodePos) {
if (parentOffset == DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
} else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
@@ -55,7 +55,7 @@ const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
}
}
-/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
+/* static */ int DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const int base = *pos;
const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
index 67c3cc57e..b13a075d5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+#ifndef LATINIME_DYNAMIC_PT_READING_UTILS_H
+#define LATINIME_DYNAMIC_PT_READING_UTILS_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
namespace latinime {
-class DynamicPatriciaTrieReadingUtils {
+class DynamicPtReadingUtils {
public:
typedef uint8_t NodeFlags;
@@ -54,22 +54,30 @@ class DynamicPatriciaTrieReadingUtils {
return FLAG_IS_DELETED == (MASK_MOVED & flags);
}
+ static AK_FORCE_INLINE bool willBecomeNonTerminal(const NodeFlags flags) {
+ return FLAG_WILL_BECOME_NON_TERMINAL == (MASK_MOVED & flags);
+ }
+
static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags,
- const bool isMoved, const bool isDeleted) {
+ const bool isMoved, const bool isDeleted, const bool willBecomeNonTerminal) {
NodeFlags flags = originalFlags;
+ flags = willBecomeNonTerminal ?
+ ((flags & (~MASK_MOVED)) | FLAG_WILL_BECOME_NON_TERMINAL) : flags;
flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags;
flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags;
- flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
+ flags = (!isMoved && !isDeleted && !willBecomeNonTerminal) ?
+ ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
return flags;
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtReadingUtils);
static const NodeFlags MASK_MOVED;
static const NodeFlags FLAG_IS_NOT_MOVED;
static const NodeFlags FLAG_IS_MOVED;
static const NodeFlags FLAG_IS_DELETED;
+ static const NodeFlags FLAG_WILL_BECOME_NON_TERMINAL;
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H */
+#endif /* LATINIME_DYNAMIC_PT_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
new file mode 100644
index 000000000..f31c914d2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+
+#include "suggest/core/dictionary/property/unigram_property.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool DynamicPtUpdatingHelper::addUnigramWord(
+ DynamicPtReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
+ wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
+ return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
+ }
+ if (!ptNodeParams.hasChildren()) {
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
+ }
+ // Advance to the children nodes.
+ parentPos = ptNodeParams.getHeadPos();
+ readingHelper->readChildNode(ptNodeParams);
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
+ return createAndInsertNodeIntoPtNodeArray(parentPos,
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ unigramProperty, &pos);
+}
+
+bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
+ const PtNodeParams sourcePtNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
+ const PtNodeParams targetPtNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
+ return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
+ bigramProperty, outAddedNewBigram);
+}
+
+// Remove a bigram relation from word0Pos to word1Pos.
+bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
+ const PtNodeParams sourcePtNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
+ const PtNodeParams targetPtNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
+ return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
+}
+
+bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
+ return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount,
+ shortcutProbability);
+}
+
+bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
+ const int *const nodeCodePoints, const int nodeCodePointCount,
+ const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
+ unigramProperty);
+}
+
+bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
+ if (originalPtNodeParams->isTerminal() && !originalPtNodeParams->isDeleted()) {
+ // Overwrites the probability.
+ *outAddedNewUnigram = false;
+ return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty);
+ } else {
+ // Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
+ const int movedPos = mBuffer->getTailPosition();
+ int writingPos = movedPos;
+ const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, originalPtNodeParams->getParentPos(),
+ originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
+ const int *const codePoints, const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
+ codePointCount, unigramProperty);
+}
+
+bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
+ const UnigramProperty *const unigramProperty) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
+ parentPtNodePos, nodeCodePointCount, nodeCodePoints,
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be updated later
+ // with actual children position.
+ if (addsExtraChild) {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
+ reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
+ return false;
+ }
+ } else {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
+ overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
+ reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
+ reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
+ reallocatingPtNodeParams->getProbability()));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, firstPartOfReallocatedPtNodePos,
+ newNodeCodePointCount - overlappingCodePointCount,
+ newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
+ unigramProperty, &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocating PtNode as moved.
+ if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ const PtNodeParams ptNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
+ // Update children position.
+ return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
+ const PtNodeParams *const originalPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
+ probability);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
new file mode 100644
index 000000000..f10d15a9b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
+#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+class BigramProperty;
+class BufferWithExtendableBuffer;
+class DynamicPtReadingHelper;
+class PtNodeReader;
+class PtNodeWriter;
+class UnigramProperty;
+
+class DynamicPtUpdatingHelper {
+ public:
+ DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer,
+ const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
+ : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
+
+ ~DynamicPtUpdatingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
+
+ // Add a bigram relation from word0Pos to word1Pos.
+ bool addBigramWords(const int word0Pos, const int word1Pos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
+
+ // Remove a bigram relation from word0Pos to word1Pos.
+ bool removeBigramWords(const int word0Pos, const int word1Pos);
+
+ // Add a shortcut target.
+ bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
+ const int targetCodePointCount, const int shortcutProbability);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const PtNodeReader *const mPtNodeReader;
+ PtNodeWriter *const mPtNodeWriter;
+
+ bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
+ int *const forwardLinkFieldPos);
+
+ bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
+
+ bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
+ const UnigramProperty *const unigramProperty, const int *const codePoints,
+ const int codePointCount);
+
+ bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
+
+ bool reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount);
+
+ const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const int parentPos, const int codePointCount,
+ const int *const codePoints, const int probability) const;
+
+ const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
+ const bool isTerminal, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
index 30ff10cd6..664aeebbb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
@@ -14,29 +14,28 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include <cstddef>
+#include <cstdint>
#include <cstdlib>
-#include <stdint.h>
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
-const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
-const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
-const int DynamicPatriciaTrieWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
-const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
-const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
-const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
-const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
-const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
-const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
-const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
-const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
+const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
+const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
+const int DynamicPtWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
+const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
+const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
+const int DynamicPtWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
+const int DynamicPtWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
+const int DynamicPtWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
+const int DynamicPtWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
+const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
+/* static */ bool DynamicPtWritingUtils::writeEmptyDictionary(
BufferWithExtendableBuffer *const buffer, const int rootPos) {
int writingPos = rootPos;
if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) {
@@ -46,13 +45,13 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
&writingPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos) {
return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const size_t arraySize,
int *const arraySizeFieldPos) {
// Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
@@ -74,20 +73,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
}
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeFlagsAndAdvancePosition(
BufferWithExtendableBuffer *const buffer,
- const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
+ const DynamicPtReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos);
}
// Note that parentOffset is offset from node's head position.
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
int *const parentPosFieldPos) {
return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int *const codePoints,
const int codePointCount, int *const codePointFieldPos) {
if (codePointCount <= 0) {
@@ -101,34 +100,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
hasMultipleCodePoints, codePointFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
- BufferWithExtendableBuffer *const buffer, const int probability,
- int *const probabilityFieldPos) {
- if (probability < 0 || probability > MAX_PROBABILITY) {
- AKLOGI("probability cannot be written because the probability is invalid: %d",
- probability);
- ASSERT(false);
- return false;
- }
- return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE,
- probabilityFieldPos);
-}
-
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
int *const childrenPositionFieldPos) {
return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
childrenPositionFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
- BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
- int *const offsetFieldPos) {
+/* static */ bool DynamicPtWritingUtils::writeDictOffset(BufferWithExtendableBuffer *const buffer,
+ const int targetPos, const int basePos, int *const offsetFieldPos) {
int offset = targetPos - basePos;
if (targetPos == NOT_A_DICT_POS) {
- offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ offset = DynamicPtReadingUtils::DICT_OFFSET_INVALID;
} else if (offset == 0) {
- offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ offset = DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET;
}
if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
AKLOGI("offset cannot be written because the offset is too large or too small: %d",
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
index af76bc6b5..362fbd1cc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
@@ -14,19 +14,19 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
+#ifndef LATINIME_DYNAMIC_PT_WRITING_UTILS_H
+#define LATINIME_DYNAMIC_PT_WRITING_UTILS_H
#include <cstddef>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
namespace latinime {
class BufferWithExtendableBuffer;
-class DynamicPatriciaTrieWritingUtils {
+class DynamicPtWritingUtils {
public:
static const int NODE_FLAG_FIELD_SIZE;
@@ -39,8 +39,15 @@ class DynamicPatriciaTrieWritingUtils {
static bool writePtNodeArraySizeAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const size_t arraySize, int *const arraySizeFieldPos);
+ static bool writeFlags(BufferWithExtendableBuffer *const buffer,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags,
+ const int nodeFlagsFieldPos) {
+ int writingPos = nodeFlagsFieldPos;
+ return writeFlagsAndAdvancePosition(buffer, nodeFlags, &writingPos);
+ }
+
static bool writeFlagsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags,
int *const nodeFlagsFieldPos);
static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
@@ -49,14 +56,11 @@ class DynamicPatriciaTrieWritingUtils {
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
- static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const int probability, int *const probabilityFieldPos);
-
static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int childrenPosition, int *const childrenPositionFieldPos);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtWritingUtils);
static const size_t MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD;
static const size_t MAX_PTNODE_ARRAY_SIZE;
@@ -67,10 +71,9 @@ class DynamicPatriciaTrieWritingUtils {
static const int MAX_DICT_OFFSET_VALUE;
static const int MIN_DICT_OFFSET_VALUE;
static const int DICT_OFFSET_NEGATIVE_FLAG;
- static const int PROBABILITY_FIELD_SIZE;
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
const int basePos, int *const offsetFieldPos);
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
+#endif /* LATINIME_DYNAMIC_PT_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
index 7df55815f..e64a13cc4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
@@ -14,9 +14,11 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
return base + offset;
}
+/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos) {
+ int readingPos = ptNodePos;
+ const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
+ *outFlags = flags;
+ *outCodePointCount = getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+ *outProbability = isTerminal(flags) ?
+ readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
+ *outChildrenPos = hasChildrenInFlags(flags) ?
+ readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
+ *outShortcutPos = NOT_A_DICT_POS;
+ if (hasShortcutTargets(flags)) {
+ *outShortcutPos = readingPos;
+ shortcutPolicy->skipAllShortcuts(&readingPos);
+ }
+ *outBigramPos = NOT_A_DICT_POS;
+ if (hasBigrams(flags)) {
+ *outBigramPos = readingPos;
+ bigramPolicy->skipAllBigrams(&readingPos);
+ }
+ *outSiblingPos = readingPos;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
index 8420ee95a..c3f09c3b1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
@@ -17,12 +17,15 @@
#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
namespace latinime {
+class DictionaryShortcutsStructurePolicy;
+class DictionaryBigramsStructurePolicy;
+
class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
@@ -100,6 +103,13 @@ class PatriciaTrieReadingUtils {
return nodeFlags;
}
+ static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h
new file mode 100644
index 000000000..6078d8285
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_ARRAY_READER_H
+#define LATINIME_PT_NODE_ARRAY_READER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode array information.
+class PtNodeArrayReader {
+ public:
+ virtual ~PtNodeArrayReader() {}
+
+ // Returns if the position is valid or not.
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const = 0;
+
+ // Returns if the position is valid or not. NOT_A_DICT_POS is set to outNextPtNodeArrayPos when
+ // the next array doesn't exist.
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const = 0;
+
+ protected:
+ PtNodeArrayReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeArrayReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
new file mode 100644
index 000000000..b2e60a837
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_PARAMS_H
+#define LATINIME_PT_NODE_PARAMS_H
+
+#include <cstring>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "utils/char_utils.h"
+
+namespace latinime {
+
+// This class has information of a PtNode. This class is immutable.
+class PtNodeParams {
+ public:
+ // Invalid PtNode.
+ PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
+
+ PtNodeParams(const PtNodeParams& ptNodeParams)
+ : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
+ mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
+ mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
+ mTerminalId(ptNodeParams.mTerminalId),
+ mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
+ mProbability(ptNodeParams.mProbability),
+ mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
+ mChildrenPos(ptNodeParams.mChildrenPos),
+ mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
+ mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
+ mSiblingPos(ptNodeParams.mSiblingPos) {
+ memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode read from version 2 dictionary.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int codePointCount, const int *const codePoints, const int probability,
+ const int childrenPos, const int shortcutPos, const int bigramPos,
+ const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
+ mBigramPos(bigramPos), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode with a terminal id.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int terminalIdFieldPos, const int terminalId, const int probability,
+ const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
+ mBigramPos(terminalId), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // Construct new params by updating existing PtNode params.
+ PtNodeParams(const PtNodeParams *const ptNodeParams,
+ const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
+ mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
+ mTerminalId(ptNodeParams->getTerminalId()),
+ mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
+ mProbability(probability),
+ mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
+ mChildrenPos(ptNodeParams->getChildrenPos()),
+ mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
+ mShortcutPos(ptNodeParams->getShortcutPos()),
+ mBigramPos(ptNodeParams->getBigramsPos()),
+ mSiblingPos(ptNodeParams->getSiblingNodePos()) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mCodePointCount > 0;
+ }
+
+ // Head position of the PtNode
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool willBecomeNonTerminal() const {
+ return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasBigrams() const {
+ return PatriciaTrieReadingUtils::hasBigrams(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasShortcutTargets() const {
+ return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
+ }
+
+ AK_FORCE_INLINE bool representsNonWordInfo() const {
+ return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ && isNotAWord();
+ }
+
+ AK_FORCE_INLINE int representsBeginningOfSentence() const {
+ return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
+ && isNotAWord();
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ AK_FORCE_INLINE const int *getCodePoints() const {
+ return mCodePoints;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getTerminalIdFieldPos() const {
+ return mTerminalIdFieldPos;
+ }
+
+ AK_FORCE_INLINE int getTerminalId() const {
+ return mTerminalId;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ // This class have a public copy constructor to be used as a return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams);
+
+ const int mHeadPos;
+ const PatriciaTrieReadingUtils::NodeFlags mFlags;
+ const bool mHasMovedFlag;
+ const int mParentPos;
+ const uint8_t mCodePointCount;
+ int mCodePoints[MAX_WORD_LENGTH];
+ const int mTerminalIdFieldPos;
+ const int mTerminalId;
+ const int mProbabilityFieldPos;
+ const int mProbability;
+ const int mChildrenPosFieldPos;
+ const int mChildrenPos;
+ const int mBigramLinkedNodePos;
+ const int mShortcutPos;
+ const int mBigramPos;
+ const int mSiblingPos;
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_PARAMS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
new file mode 100644
index 000000000..31299a707
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_READER_H
+#define LATINIME_PT_NODE_READER_H
+
+#include "defines.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode information.
+class PtNodeReader {
+ public:
+ virtual ~PtNodeReader() {}
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(
+ const int ptNodePos) const = 0;
+
+ protected:
+ PtNodeReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
new file mode 100644
index 000000000..a8029f73f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_WRITER_H
+#define LATINIME_PT_NODE_WRITER_H
+
+#include <unordered_map>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+class BigramProperty;
+class UnigramProperty;
+
+// Interface class used to write PtNode information.
+class PtNodeWriter {
+ public:
+ typedef std::unordered_map<int, int> PtNodeArrayPositionRelocationMap;
+ typedef std::unordered_map<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
+ virtual ~PtNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) = 0;
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) = 0;
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ bool *const outNeedsToKeepPtNode) = 0;
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition) = 0;
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos) = 0;
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewBigram) = 0;
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam) = 0;
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) = 0;
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) = 0;
+
+ protected:
+ PtNodeWriter() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeWriter);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
index 847dcdee5..91c76941c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
@@ -44,7 +44,7 @@ const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
}
/* static */ int ShortcutListReadingUtils::readShortcutTarget(
- const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
index a83ed5a50..d065bf7fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H
#define LATINIME_SHORTCUT_LIST_READING_UTILS_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
index 6ff95cac4..00bb502dc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
@@ -17,11 +17,11 @@
#ifndef LATINIME_BIGRAM_LIST_POLICY_H
#define LATINIME_BIGRAM_LIST_POLICY_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 8a84bd261..002593c49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -15,25 +15,29 @@
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+#include "utils/char_utils.h"
namespace latinime {
-void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- int nextPos = dicNode->getChildrenPos();
+ int nextPos = dicNode->getChildrenPtNodeArrayPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize);
+ mIsCorrupted = true;
ASSERT(false);
return;
}
@@ -43,6 +47,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
nextPos, mDictBufferSize, i, childCount);
+ mIsCorrupted = true;
ASSERT(false);
return;
}
@@ -52,14 +57,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
-// it is possible to check for this with advantageous complexity. For each node, we search
+// it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
-// than the position we look for, and we have to descend the z node).
+// than the position we look for, and we have to descend the z PtNode).
/* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
@@ -74,18 +79,33 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
- // only traverse nodes that are actually a part of the terminal we are searching, so each time
- // we enter this loop we are one depth level further than last time.
- // The only reason we count nodes is because we want to reduce the probability of infinite
+ // only traverse PtNodes that are actually a part of the terminal we are searching, so each
+ // time we enter this loop we are one depth level further than last time.
+ // The only reason we count PtNodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
int lastCandidatePtNodePos = 0;
// Let's loop through PtNodes in this PtNode array searching for either the terminal
// or one of its ascendants.
+ if (pos < 0 || pos >= mDictBufferSize) {
+ AKLOGE("PtNode array position is invalid. pos: %d, dict size: %d",
+ pos, mDictBufferSize);
+ mIsCorrupted = true;
+ ASSERT(false);
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
const int startPos = pos;
+ if (pos < 0 || pos >= mDictBufferSize) {
+ AKLOGE("PtNode position is invalid. pos: %d, dict size: %d", pos, mDictBufferSize);
+ mIsCorrupted = true;
+ ASSERT(false);
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
@@ -140,8 +160,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true;
} else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the
- // position we should descend THIS node. So we trick the lastCandidatePtNodePos
- // so that we will descend this PtNode, not the previous one.
+ // position we should descend THIS PtNode. So we trick the
+ // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
+ // one.
lastCandidatePtNodePos = startPos;
found = true;
} else {
@@ -149,7 +170,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false;
}
} else {
- // Even if we don't have children here, we could still be on the last PtNode of /
+ // Even if we don't have children here, we could still be on the last PtNode of
// this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount);
@@ -230,97 +251,27 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0;
}
-// This function gets the position of the terminal node of the exact matching word in the
+// This function gets the position of the terminal PtNode of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
-int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- int pos = getRootPosition();
- int wordPos = 0;
-
- while (true) {
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- if (wordPos >= length) return NOT_A_DICT_POS;
- int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
- &pos);
- const int wChar = forceLowerCaseSearch
- ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
- while (true) {
- // If there are no more PtNodes in this array, it means we could not
- // find a matching character for this depth, therefore there is no match.
- if (0 >= ptNodeCount) return NOT_A_DICT_POS;
- const int ptNodePos = pos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
- &pos);
- if (character == wChar) {
- // This is the correct PtNode. Only one PtNode may start with the same char within
- // a PtNode array, so either we found our match in this array, or there is
- // no match and we can return NOT_A_DICT_POS. So we will check all the
- // characters in this PtNode indeed does match.
- if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
- character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
- &pos);
- while (NOT_A_CODE_POINT != character) {
- ++wordPos;
- // If we shoot the length of the word we search for, or if we find a single
- // character that does not match, as explained above, it means the word is
- // not in the dictionary (by virtue of this PtNode being the only one to
- // match the word on the first character, but not matching the whole word).
- if (wordPos >= length) return NOT_A_DICT_POS;
- if (inWord[wordPos] != character) return NOT_A_DICT_POS;
- character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mDictRoot, &pos);
- }
- }
- // If we come here we know that so far, we do match. Either we are on a terminal
- // and we match the length, in which case we found it, or we traverse children.
- // If we don't match the length AND don't have children, then a word in the
- // dictionary fully matches a prefix of the searched word but not the full word.
- ++wordPos;
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- if (wordPos == length) {
- return ptNodePos;
- }
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- return NOT_A_DICT_POS;
- }
- // We have children and we are still shorter than the word we are searching for, so
- // we need to traverse children. Put the pointer on the children position, and
- // break
- pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
- flags, &pos);
- break;
- } else {
- // This PtNode does not match, so skip the remaining part and go to the next.
- if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
- &pos);
- }
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
- flags, &pos);
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- mShortcutListPolicy.skipAllShortcuts(&pos);
- }
- if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- mBigramListPolicy.skipAllBigrams(&pos);
- }
- }
- --ptNodeCount;
- }
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const int ptNodePos =
+ readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
}
+ return ptNodePos;
}
int PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
+ // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
+ // probability, the worse the precision. The theoritical maximum error in resulting probability
+ // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
+ // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (bigramProbability == NOT_A_PROBABILITY) {
@@ -335,99 +286,144 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
- return NOT_A_PROBABILITY;
- }
- if (PatriciaTrieReadingUtils::isNotAWord(flags)
- || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
+ const PtNodeParams ptNodeParams =
+ mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
// If this is not a word, or if it's a blacklisted entry, it should behave as
// having no probability outside of the suggestion process (where it should be used
// for shortcuts).
return NOT_A_PROBABILITY;
}
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
- mDictRoot, &pos), NOT_A_PROBABILITY);
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- return NOT_A_DICT_POS;
- }
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
- }
- return pos;
+ return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
- return NOT_A_DICT_POS;
+ return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos();
+}
+
+int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
+ const int ptNodePos, DicNodeVector *childDicNodes) const {
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
+ getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
+ &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ // Skip PtNodes don't start with Unicode code point because they represent non-word information.
+ if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
+ childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
+ PatriciaTrieReadingUtils::isTerminal(flags),
+ PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags)
+ || PatriciaTrieReadingUtils::isNotAWord(flags),
+ mergedNodeCodePointCount, mergedNodeCodePoints);
}
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ return siblingPos;
+}
+
+const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty was called for invalid word.");
+ return WordProperty();
}
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
+ const PtNodeParams ptNodeParams =
+ mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ // Fetch bigram information.
+ std::vector<BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
+ while (bigramsIt.hasNext()) {
+ // Fetch the next bigram information and forward the iterator.
+ bigramsIt.next();
+ // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
+ if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
+ int word1Probability = NOT_A_PROBABILITY;
+ const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + word1CodePointCount);
+ const int probability = getProbability(word1Probability, bigramsIt.getProbability());
+ bigrams.emplace_back(&word1, probability,
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
+ }
}
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- mShortcutListPolicy.skipAllShortcuts(&pos);;
+ // Fetch shortcut information.
+ std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTargetCodePoints[MAX_WORD_LENGTH];
+ ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
+ bool hasNext = true;
+ while (hasNext) {
+ const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
+ hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
+ const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
+ mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
+ const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
+ shortcutTargetCodePoints + shortcutTargetLength);
+ const int shortcutProbability =
+ ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
+ shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
+ }
}
- return pos;
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
- const int ptNodePos, DicNodeVector *childDicNodes) const {
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- int mergedNodeCodePoints[MAX_WORD_LENGTH];
- const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
- const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
- : NOT_A_PROBABILITY;
- const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- mDictRoot, flags, &pos) : NOT_A_DICT_POS;
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
+int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
+ if (token == 0) {
+ // Start iterating the dictionary.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
}
- if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- getBigramsStructurePolicy()->skipAllBigrams(&pos);
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
}
- if (mergedNodeCodePointCount <= 0) {
- AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
- ASSERT(false);
- return pos;
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
+ MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
}
- childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
- PatriciaTrieReadingUtils::isTerminal(flags),
- PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
- PatriciaTrieReadingUtils::isBlacklisted(flags) ||
- PatriciaTrieReadingUtils::isNotAWord(flags),
- mergedNodeCodePointCount, mergedNodeCodePoints);
- return pos;
+ return nextToken;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 0f8662aea..ec8407408 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -17,13 +17,17 @@
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
#define LATINIME_PATRICIA_TRIE_POLICY_H
-#include <stdint.h>
+#include <cstdint>
+#include <vector>
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,28 +37,28 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
- mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
-
- ~PatriciaTriePolicy() {
- delete mBuffer;
- }
+ PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
+ : mMmappedBuffer(std::move(mmappedBuffer)),
+ mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
+ mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
+ mPtNodeArrayReader(mDictRoot, mDictBufferSize),
+ mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildNodes(const DicNode *const dicNode,
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(const int *const inWord,
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -77,34 +81,43 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy;
}
- bool addUnigramWord(const int *const word, const int length, const int probability) {
+ bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+
+ bool removeUnigramEntry(const int *const word, const int length) {
// This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
return false;
}
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability) {
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
// This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
}
- bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1) {
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
+ const int length) {
// This method should not be called for non-updatable dictionary.
- AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
return false;
}
- void flush(const char *const filePath) {
+ bool flush(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return false;
}
- void flushWithGC(const char *const filePath) {
+ bool flushWithGC(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
}
bool needsToRunGC(const bool mindsBlockByGC) const {
@@ -113,7 +126,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- void getProperty(const char *const query, char *const outResult,
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) {
// getProperty is not supported for this class.
if (maxResultLength > 0) {
@@ -121,15 +134,29 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
}
+ const WordProperty getWordProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ bool isCorrupted() const {
+ return mIsCorrupted;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- const MmappedBuffer *const mBuffer;
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const int mDictBufferSize;
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
+ const Ver2ParticiaTrieNodeReader mPtNodeReader;
+ const Ver2PtNodeArrayReader mPtNodeArrayReader;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
+ mutable bool mIsCorrupted;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
index d73f73953..8e16ccc05 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
@@ -17,11 +17,11 @@
#ifndef LATINIME_SHORTCUT_LIST_POLICY_H
#define LATINIME_SHORTCUT_LIST_POLICY_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..c1e938710
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver2ParticiaTrieNodeReader::fetchPtNodeParamsInBufferFromPtNodePos(
+ const int ptNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mDictSize);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy,
+ mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
+ &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ if (mergedNodeCodePointCount <= 0) {
+ AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints,
+ probability, childrenPos, shortcutPos, bigramPos, siblingPos);
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
new file mode 100644
index 000000000..f0725b66d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+
+#include <cstdint>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+class Ver2ParticiaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy),
+ mShortuctPolicy(shortcutPolicy) {}
+
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+ const DictionaryBigramsStructurePolicy *const mBigramPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
new file mode 100644
index 000000000..b46617d96
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ int readingPos = ptNodeArrayPos;
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictBuffer, &readingPos);
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ // Ver2 dicts don't have forward links.
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
new file mode 100644
index 000000000..548272148
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
+
+#include <cstdint>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class Ver2PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 000000000..146cab6c2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+
+#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ }
+ if (outProbability) {
+ if (bigramEntry.hasHistoricalInfo()) {
+ *outProbability =
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
+ mHeaderPolicy);
+ } else {
+ *outProbability = bigramEntry.getProbability();
+ }
+ }
+ if (outHasNext) {
+ *outHasNext = bigramEntry.hasNext();
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ // 1. The word has no bigrams yet.
+ // 2. The word has bigrams, and there is the target in the list.
+ // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
+ // 4. The word has bigrams. We have to append new bigram entry to the list.
+ // 5. Same as 4, but the list is the last entry of the content file.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Case 1. PtNode that doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
+ bigramProperty);
+ // Write an entry.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
+ &writingPos)) {
+ AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
+ return false;
+ }
+ if (!mBigramDictContent->writeTerminator(writingPos)) {
+ AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ int tailEntryPos = NOT_A_DICT_POS;
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
+ &tailEntryPos);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5. Add new entry to the bigram list.
+ const int contentTailPos = mBigramDictContent->getContentTailPos();
+ // If the tail entry is at the tail of content buffer, the new entry can be written without
+ // link (Case 5).
+ const bool canAppendEntry =
+ contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
+ const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
+ int writingPos = newEntryPos;
+ // Write new entry at the tail position of the bigram content.
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &newBigramEntry, bigramProperty);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
+ &writingPos)) {
+ AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
+ return false;
+ }
+ if (!mBigramDictContent->writeTerminator(writingPos)) {
+ AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
+ return false;
+ }
+ if (!canAppendEntry) {
+ // Update link of the current tail entry.
+ if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
+ AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
+ tailEntryPos, newEntryPos);
+ return false;
+ }
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
+ const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
+ // entry is updated.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &updatedBigramEntry, bigramProperty);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
+ nullptr /* outTailEntryPos */);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return true;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ // Invalidate bigram entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ } else if (bigramEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy);
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
+ const BigramEntry updatedBigramEntry =
+ bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ *outBigramCount += 1;
+ } else {
+ // Remove entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ }
+ } else {
+ *outBigramCount += 1;
+ }
+ }
+ return true;
+}
+
+int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return 0;
+ }
+ int bigramCount = 0;
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.isValid()) {
+ bigramCount++;
+ }
+ }
+ return bigramCount;
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos, int *const outTailEntryPos) const {
+ if (outTailEntryPos) {
+ *outTailEntryPos = NOT_A_DICT_POS;
+ }
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while (true) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
+ if (!bigramEntry.hasNext()) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ break;
+ }
+ if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (!bigramEntry.isValid()) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ }
+ return invalidEntryPos;
+}
+
+const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
+ const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
+ bigramProperty->getLevel(), bigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
+ &historicalInfoForUpdate, mHeaderPolicy);
+ return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
+ } else {
+ return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
new file mode 100644
index 000000000..55ba613a5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
+
+namespace latinime {
+
+class BigramDictContent;
+class BigramProperty;
+class HeaderPolicy;
+class TerminalPositionLookupTable;
+
+class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable,
+ const HeaderPolicy *const headerPolicy)
+ : mBigramDictContent(bigramDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable),
+ mHeaderPolicy(headerPolicy) {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const;
+
+ void skipAllBigrams(int *const pos) const {
+ // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+ }
+
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
+
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount);
+
+ int getBigramEntryConut(const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
+
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
+ int *const outTailEntryPos) const;
+
+ const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const;
+
+ BigramDictContent *const mBigramDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+ const HeaderPolicy *const mHeaderPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
new file mode 100644
index 000000000..d7e1952b5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
+
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
+ ASSERT(false);
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = NOT_A_TIMESTAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ }
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (isLink) {
+ const int linkedEntryPos = targetTerminalId;
+ if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
+ // Bigram list terminator is found.
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ *bigramEntryPos = linkedEntryPos;
+ return getBigramEntryAndAdvancePosition(bigramEntryPos);
+ }
+ // hasNext is always true because we should continue to read the next entry until the terminator
+ // is found.
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
+ } else {
+ return BigramEntry(true /* hasNext */, probability, targetTerminalId);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
+ bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
+ bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
+ bigramEntryToWrite->getHistoricalInfo()->getLevel(),
+ bigramEntryToWrite->getHistoricalInfo()->getCount(),
+ entryWritingPos);
+}
+
+bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ timestamp);
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(level,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ level);
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(count,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ count);
+ return false;
+ }
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ probability);
+ return false;
+ }
+ }
+ const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, targetTerminalId);
+ return false;
+ }
+ return true;
+}
+
+bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
+ const int targetTerminalId = linkedEntryPos;
+ int pos = writingPos;
+ return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
+ NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
+ 0 /* count */, &pos);
+}
+
+bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalBigramListPos =
+ originalBigramDictContent->getBigramListHeadPos(it->first);
+ if (originalBigramListPos == NOT_A_DICT_POS) {
+ // This terminal does not have a bigram list.
+ continue;
+ }
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ int bigramEntryCount = 0;
+ // Copy bigram list with GC from original content.
+ if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
+ terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
+ return false;
+ }
+ if (bigramEntryCount == 0) {
+ // All bigram entries are useless. This terminal does not have a bigram list.
+ continue;
+ }
+ *outBigramEntryCount += bigramEntryCount;
+ // Set bigram list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns whether GC for the bigram list was succeeded or not.
+bool BigramDictContent::runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ while (hasNext) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (!originalBigramEntry.isValid()) {
+ continue;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
+ if (it == terminalIdMap->end()) {
+ // Target word has been removed.
+ continue;
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
+ return false;
+ }
+ *outEntryCount += 1;
+ }
+ if (*outEntryCount > 0) {
+ if (!writeTerminator(writingPos)) {
+ AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
new file mode 100644
index 000000000..033f18e9e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BIGRAM_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class BigramDictContent : public SparseTableDictContent {
+ public:
+ BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ BigramDictContent(const bool hasHistoricalInfo)
+ : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ int getContentTailPos() const {
+ return getContentBuffer()->getTailPosition();
+ }
+
+ const BigramEntry getBigramEntry(const int bigramEntryPos) const {
+ int readingPos = bigramEntryPos;
+ return getBigramEntryAndAdvancePosition(&readingPos);
+ }
+
+ const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
+
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
+ int writingPos = getContentBuffer()->getTailPosition();
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
+ int writingPos = entryWritingPos;
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
+ int *const entryWritingPos);
+
+ bool writeTerminator(const int writingPos) {
+ // Terminator is a link to the invalid position.
+ return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
+ }
+
+ bool writeLink(const int linkedPos, const int writingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION);
+ }
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount);
+
+ int getBigramEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ }
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ static const int INVALID_LINKED_ENTRY_POS;
+
+ bool writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos);
+
+ bool runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount);
+
+ bool mHasHistoricalInfo;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
new file mode 100644
index 000000000..2b0cbd93b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_ENTRY_H
+#define LATINIME_BIGRAM_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class BigramEntry {
+ public:
+ BigramEntry(const BigramEntry& bigramEntry)
+ : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
+ mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
+ mTargetTerminalId(targetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability,
+ const HistoricalInfo *const historicalInfo, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
+ mTargetTerminalId(targetTerminalId) {}
+
+ const BigramEntry getInvalidatedEntry() const {
+ return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+
+ const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
+ return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
+ return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
+ }
+
+ const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
+ return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateHistoricalInfoAndGetEntry(
+ const HistoricalInfo *const historicalInfo) const {
+ return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
+ }
+
+ bool isValid() const {
+ return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ }
+
+ bool hasNext() const {
+ return mHasNext;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ int getTargetTerminalId() const {
+ return mTargetTerminalId;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
+
+ const bool mHasNext;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+ const int mTargetTerminalId;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
index 4ae474e0c..0c2f47073 100644
--- a/native/jni/src/suggest/core/dictionary/bloom_filter.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
@@ -14,12 +14,23 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/bloom_filter.h"
+#ifndef LATINIME_DICT_CONTENT_H
+#define LATINIME_DICT_CONTENT_H
+
+#include "defines.h"
namespace latinime {
-// Must be smaller than BIGRAM_FILTER_BYTE_SIZE * 8, and preferably prime. 1021 is the largest
-// prime under 128 * 8.
-const int BloomFilter::BIGRAM_FILTER_MODULO = 1021;
+class DictContent {
+ public:
+ virtual ~DictContent() {}
+ virtual bool isValid() const = 0;
+
+ protected:
+ DictContent() {}
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictContent);
+};
} // namespace latinime
+#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
new file mode 100644
index 000000000..3b7c70efd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ // This method can be called with invalid terminal id during GC.
+ return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
+ }
+ const BufferWithExtendableBuffer *const buffer = getBuffer();
+ int entryPos = getEntryPos(terminalId);
+ const int flags = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
+ const int probability = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
+ if (mHasHistoricalInfo) {
+ const int timestamp = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
+ const int level = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
+ const int count = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, probability, &historicalInfo);
+ } else {
+ return ProbabilityEntry(flags, probability);
+ }
+}
+
+bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
+ const ProbabilityEntry *const probabilityEntry) {
+ if (terminalId < 0) {
+ return false;
+ }
+ const int entryPos = getEntryPos(terminalId);
+ if (terminalId >= mSize) {
+ ProbabilityEntry dummyEntry;
+ // Write new entry.
+ int writingPos = getBuffer()->getTailPosition();
+ while (writingPos <= entryPos) {
+ // Fulfilling with dummy entries until writingPos.
+ if (!writeEntry(&dummyEntry, writingPos)) {
+ AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
+ return false;
+ }
+ writingPos += getEntrySize();
+ mSize++;
+ }
+ }
+ return writeEntry(probabilityEntry, entryPos);
+}
+
+bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
+ for (int i = 0; i < mSize; ++i) {
+ const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
+ if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
+ return false;
+ }
+ }
+ return probabilityDictContentToWrite.flush(dictPath,
+ Ver4DictConstants::FREQ_FILE_EXTENSION);
+ } else {
+ return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
+ }
+}
+
+bool ProbabilityDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent) {
+ mSize = 0;
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityDictContent->getProbabilityEntry(it->first);
+ if (!setProbabilityEntry(it->second, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
+ return false;
+ }
+ mSize++;
+ }
+ return true;
+}
+
+int ProbabilityDictContent::getEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE;
+ }
+}
+
+int ProbabilityDictContent::getEntryPos(const int terminalId) const {
+ return terminalId * getEntrySize();
+}
+
+bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
+ const int entryPos) {
+ BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
+ int writingPos = entryPos;
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
+ AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
+ AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
new file mode 100644
index 000000000..b065bc954
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_PROBABILITY_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class ProbabilityEntry;
+
+class ProbabilityDictContent : public SingleDictContent {
+ public:
+ ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
+ mHasHistoricalInfo(hasHistoricalInfo),
+ mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
+
+ ProbabilityDictContent(const bool hasHistoricalInfo)
+ : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
+
+ const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
+
+ bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
+
+ int getEntrySize() const;
+
+ int getEntryPos(const int terminalId) const;
+
+ bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
+
+ bool mHasHistoricalInfo;
+ int mSize;
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
new file mode 100644
index 000000000..36ba82be1
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_ENTRY_H
+#define LATINIME_PROBABILITY_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class ProbabilityEntry {
+ public:
+ ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
+ : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
+ mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
+
+ // Dummy entry
+ ProbabilityEntry()
+ : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
+
+ // Entry without historical information
+ ProbabilityEntry(const int flags, const int probability)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
+
+ // Entry with historical information.
+ ProbabilityEntry(const int flags, const int probability,
+ const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+
+ const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
+ return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
+ }
+
+ const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
+ const HistoricalInfo *const historicalInfo) const {
+ return ProbabilityEntry(mFlags, mProbability, historicalInfo);
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ int getFlags() const {
+ return mFlags;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
+
+ const int mFlags;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
new file mode 100644
index 000000000..64d7bc0a5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const {
+ const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
+ if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) {
+ AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d",
+ *shortcutEntryPos, shortcutListBuffer->getTailPosition());
+ ASSERT(false);
+ if (outhasNext) {
+ *outhasNext = false;
+ }
+ if (outCodePointCount) {
+ *outCodePointCount = 0;
+ }
+ return;
+ }
+
+ const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ if (outProbability) {
+ *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
+ }
+ if (outhasNext) {
+ *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ }
+ if (outCodePoint && outCodePointCount) {
+ shortcutListBuffer->readCodePointsAndAdvancePosition(
+ maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
+ }
+}
+
+int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+}
+
+bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
+}
+
+bool ShortcutDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalShortcutListPos =
+ originalShortcutDictContent->getShortcutListHeadPos(it->first);
+ if (originalShortcutListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const int shortcutListPos = getContentBuffer()->getTailPosition();
+ // Copy shortcut list from original content.
+ if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
+ shortcutListPos)) {
+ AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
+ originalShortcutListPos, shortcutListPos);
+ return false;
+ }
+ // Set shortcut list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
+ AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
+ it->second, shortcutListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
+ const int shortcutListListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
+}
+
+bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
+ return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
+}
+
+bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int writingPos = toPos;
+ int codePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ int probability = 0;
+ int codePointCount = 0;
+ sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
+ codePoints, &codePointCount, &probability, &hasNext, &readingPos);
+ if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
+ hasNext, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = shortcutListBuffer->readUint(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
+ return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+}
+
+bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
+ if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
+ return false;
+ }
+ if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
+ true /* writesTerminator */, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
+ return false;
+ }
+ return true;
+}
+
+// Find a shortcut entry that has specified target and return its position.
+int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int targetCodePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ const int entryPos = readingPos;
+ int probability = 0;
+ int targetCodePointCount = 0;
+ getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
+ &probability, &hasNext, &readingPos);
+ if (targetCodePointCount != codePointCount) {
+ continue;
+ }
+ bool matched = true;
+ for (int i = 0; i < codePointCount; ++i) {
+ if (targetCodePointsToFind[i] != targetCodePoints[i]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ return entryPos;
+ }
+ }
+ return NOT_A_DICT_POS;
+}
+
+int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
+ const bool hasNext) const {
+ return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
new file mode 100644
index 000000000..eaafc27bc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_SHORTCUT_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class ShortcutDictContent : public SparseTableDictContent {
+ public:
+ ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ ShortcutDictContent()
+ : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
+ const int shortcutEntryPos) {
+ int readingPos = shortcutEntryPos;
+ return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
+ outCodePointCount, outProbability, outhasNext, &readingPos);
+ }
+
+ void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const;
+
+ // Returns head position of shortcut list for a PtNode specified by terminalId.
+ int getShortcutListHeadPos(const int terminalId) const;
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent);
+
+ bool createNewShortcutList(const int terminalId);
+
+ bool copyShortcutList(const int shortcutListPos, const int toPos);
+
+ bool setProbability(const int probability, const int shortcutEntryPos);
+
+ bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
+ const int probability, const bool hasNext, const int shortcutEntryPos) {
+ int writingPos = shortcutEntryPos;
+ return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
+ hasNext, &writingPos);
+ }
+
+ bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos);
+
+ int findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
+
+ bool copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
+
+ int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
+};
+} // namespace latinime
+#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
new file mode 100644
index 000000000..215642234
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SINGLE_DICT_CONTENT_H
+#define LATINIME_SINGLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class SingleDictContent : public DictContent {
+ public:
+ SingleDictContent(const char *const dictPath, const char *const contentFileName,
+ const bool isUpdatable)
+ : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
+ mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mIsValid(mMmappedBuffer) {}
+
+ SingleDictContent()
+ : mMmappedBuffer(nullptr),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {}
+
+ virtual ~SingleDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
+ return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ contentFileNameSuffix, &mExpandableContentBuffer);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ const bool mIsValid;
+};
+} // namespace latinime
+#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
new file mode 100644
index 000000000..63c6ea3a4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+
+namespace latinime {
+
+bool SparseTableDictContent::flush(const char *const dictPath,
+ const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
+ const char *const contentFileNameSuffix) const {
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
+ &mExpandableLookupTableBuffer)){
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
+ &mExpandableAddressTableBuffer)) {
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
+ &mExpandableContentBuffer)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
new file mode 100644
index 000000000..fb6c88eef
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent : public DictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
+ const char *const lookupTableFileName, const char *const addressTableFileName,
+ const char *const contentFileName, const bool isUpdatable,
+ const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
+ mAddressTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
+ mContentBuffer(
+ MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableLookupTableBuffer(
+ mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
+ mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
+ mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
+ mContentBuffer ? mContentBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize),
+ mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {}
+
+ SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(),
+ mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableLookupTableBuffer.isNearSizeLimit()
+ || mExpandableAddressTableBuffer.isNearSizeLimit()
+ || mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
+ const SparseTable *getAddressLookupTable() const {
+ return &mAddressLookupTable;
+ }
+
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getContentBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
+ const char *const addressTableFileName, const char *const contentFileName) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mContentBuffer;
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ SparseTable mAddressLookupTable;
+ const bool mIsValid;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
new file mode 100644
index 000000000..0b17a009d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ return NOT_A_DICT_POS;
+ }
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+ return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
+ NOT_A_DICT_POS : terminalPos;
+}
+
+bool TerminalPositionLookupTable::setTerminalPtNodePosition(
+ const int terminalId, const int terminalPtNodePos) {
+ if (terminalId < 0) {
+ return NOT_A_DICT_POS;
+ }
+ while (terminalId >= mSize) {
+ // Write new entry.
+ if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
+ return false;
+ }
+ mSize++;
+ }
+ const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
+ terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
+ return getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+}
+
+bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
+ // If the used buffer size is smaller than the actual buffer size, regenerate the lookup
+ // table and write the new table to the file.
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ TerminalPositionLookupTable lookupTableToWrite;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPtNodePosition = getTerminalPtNodePosition(i);
+ if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
+ AKLOGE("Cannot set terminal position to lookupTableToWrite."
+ " terminalId: %d, position: %d", i, terminalPtNodePosition);
+ return false;
+ }
+ }
+ return lookupTableToWrite.flush(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ } else {
+ // We can simply use this lookup table because the buffer size has not been
+ // changed.
+ return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ }
+}
+
+bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
+ int removedEntryCount = 0;
+ int nextNewTerminalId = 0;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
+ if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
+ // This entry is a garbage.
+ removedEntryCount++;
+ } else {
+ // Give a new terminal id to the entry.
+ if (!getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
+ getEntryPos(nextNewTerminalId))) {
+ return false;
+ }
+ // Memorize the mapping to the old terminal id to the new terminal id.
+ terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
+ nextNewTerminalId++;
+ }
+ }
+ mSize = nextNewTerminalId;
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
new file mode 100644
index 000000000..816059560
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+
+#include <unordered_map>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class TerminalPositionLookupTable : public SingleDictContent {
+ public:
+ typedef std::unordered_map<int, int> TerminalIdMap;
+
+ TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
+ : SingleDictContent(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
+ mSize(getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
+
+ TerminalPositionLookupTable() : mSize(0) {}
+
+ int getTerminalPtNodePosition(const int terminalId) const;
+
+ bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
+
+ int getNextTerminalId() const {
+ return mSize;
+ }
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
+
+ int getEntryPos(const int terminalId) const {
+ return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ }
+
+ int mSize;
+};
+} // namespace latinime
+#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
new file mode 100644
index 000000000..790273541
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mShortcutDictContent(shortcutDictContent) {}
+
+ ~Ver4ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ // The first shortcut entry is located at the head position of the shortcut list.
+ return pos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ int probability = 0;
+ mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
+ outCodePoint, outCodePointCount, &probability, outHasNext, pos);
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
+ }
+
+ bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
+ const int probability) {
+ const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (shortcutListPos == NOT_A_DICT_POS) {
+ // Create shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
+ false /* hasNext */, writingPos);
+ }
+ const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
+ codePoints, codePointCount);
+ if (entryPos == NOT_A_DICT_POS) {
+ // Add new entry to the shortcut list.
+ // Create new shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
+ codePointCount, probability, true /* hasNext */, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
+ writingPos);
+ return false;
+ }
+ return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
+ }
+ // Overwrite existing entry.
+ bool hasNext = false;
+ mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
+ 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
+ if (!mShortcutDictContent->writeShortcutEntry(codePoints,
+ codePointCount, probability, hasNext, entryPos)) {
+ AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
+ entryPos);
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
+
+ ShortcutDictContent *const mShortcutDictContent;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
new file mode 100644
index 000000000..5aa6b9a92
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+
+#include <cerrno>
+#include <cstring>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
+ const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ if (!headerBuffer) {
+ ASSERT(false);
+ AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
+ return Ver4DictBuffersPtr(nullptr);
+ }
+ // TODO: take only dictDirPath, and open both header and trie files in the constructor below
+ const bool isUpdatable = headerBuffer->isUpdatable();
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
+ formatVersion));
+}
+
+bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const {
+ // Create temporary directory.
+ const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ char tmpDirPath[tmpDirPathBufSize];
+ FileUtils::getFilePathWithSuffix(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
+ tmpDirPath);
+ if (FileUtils::existsDir(tmpDirPath)) {
+ if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
+ ASSERT(false);
+ return false;
+ }
+ }
+ umask(S_IWGRP | S_IWOTH);
+ if (mkdir(tmpDirPath, S_IRWXU) == -1) {
+ AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
+ return false;
+ }
+ // Get dictionary base path.
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
+ char dictPath[dictPathBufSize];
+ FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
+
+ // Write header file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
+ AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION);
+ return false;
+ }
+ // Write trie file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
+ AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION);
+ return false;
+ }
+ // Write dictionary contents.
+ if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
+ AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mProbabilityDictContent.flushToFile(dictPath)) {
+ AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mBigramDictContent.flushToFile(dictPath)) {
+ AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mShortcutDictContent.flushToFile(dictPath)) {
+ AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ // Remove existing dictionary.
+ if (!FileUtils::removeDirAndFiles(dictDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ // Rename temporary directory.
+ if (rename(tmpDirPath, dictDirPath) != 0) {
+ AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ return true;
+}
+
+Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
+ MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion)
+ : mHeaderBuffer(std::move(headerBuffer)),
+ mDictBuffer(MmappedBuffer::openBuffer(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
+ mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
+ mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
+ mDictBuffer ? mDictBuffer->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mTerminalPositionLookupTable(dictPath, isUpdatable),
+ mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
+ mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
+ mShortcutDictContent(dictPath, isUpdatable),
+ mIsUpdatable(isUpdatable) {}
+
+Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
+ : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
+ mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
+ mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
+ mIsUpdatable(true) {}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
new file mode 100644
index 000000000..df177c14a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_BUFFER_H
+#define LATINIME_VER4_DICT_BUFFER_H
+
+#include <memory>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class Ver4DictBuffers {
+ public:
+ typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ MmappedBuffer::MmappedBufferPtr headerBuffer,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
+ const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid()
+ && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
+ && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mExpandableTrieBuffer.isNearSizeLimit()
+ || mTerminalPositionLookupTable.isNearSizeLimit()
+ || mProbabilityDictContent.isNearSizeLimit()
+ || mBigramDictContent.isNearSizeLimit()
+ || mShortcutDictContent.isNearSizeLimit();
+ }
+
+ AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
+ return &mExpandableHeaderBuffer;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ bool flush(const char *const dictDirPath) const {
+ return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
+ }
+
+ bool flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
+
+ Ver4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
+
+ const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mExpandableHeaderBuffer;
+ BufferWithExtendableBuffer mExpandableTrieBuffer;
+ TerminalPositionLookupTable mTerminalPositionLookupTable;
+ ProbabilityDictContent mProbabilityDictContent;
+ BigramDictContent mBigramDictContent;
+ ShortcutDictContent mShortcutDictContent;
+ const int mIsUpdatable;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
new file mode 100644
index 000000000..345cabbf9
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+// These values MUST match the definitions in FormatSpec.java.
+const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
+const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
+const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
+// tat = Terminal Address Table
+const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
+const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
+const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
+const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
+const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
+ ".shortcut_index_shortcut";
+
+// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
+const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
+// Extended region size, which is not GCed region size in dict file + additional buffer size, is
+// limited to 1MB to prevent from inefficient traversing.
+const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
+
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
+const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
+const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
+const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
+const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+
+const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
+const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
+const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
new file mode 100644
index 000000000..b4effca9c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_CONSTANTS_H
+#define LATINIME_VER4_DICT_CONSTANTS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// TODO: Create PtConstants under the pt_common and move some constant values there.
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const TRIE_FILE_EXTENSION;
+ static const char *const HEADER_FILE_EXTENSION;
+ static const char *const FREQ_FILE_EXTENSION;
+ static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_FILE_EXTENSION;
+ static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_FILE_EXTENSION;
+ static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+
+ static const int MAX_DICTIONARY_SIZE;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ static const int NOT_A_TERMINAL_ADDRESS;
+ static const int TERMINAL_ID_FIELD_SIZE;
+ static const int TIME_STAMP_FIELD_SIZE;
+ static const int WORD_LEVEL_FIELD_SIZE;
+ static const int WORD_COUNT_FIELD_SIZE;
+
+ static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+
+ static const int BIGRAM_FLAGS_FIELD_SIZE;
+ static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_IS_LINK_MASK;
+ static const int BIGRAM_PROBABILITY_MASK;
+ // Used when bigram list has time stamp.
+ static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
+
+ static const int SHORTCUT_FLAGS_FIELD_SIZE;
+ static const int SHORTCUT_PROBABILITY_MASK;
+ static const int SHORTCUT_HAS_NEXT_MASK;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..0a435e91c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ const ProbabilityEntry probabilityEntry =
+ mProbabilityDictContent->getProbabilityEntry(terminalId);
+ if (probabilityEntry.hasHistoricalInfo()) {
+ probability = ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPtReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newSiblingNodePos);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
new file mode 100644
index 000000000..22ed4a6c0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class HeaderPolicy;
+class ProbabilityDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form probabilityBuffer.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const ProbabilityDictContent *const probabilityDictContent,
+ const HeaderPolicy *const headerPolicy)
+ : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
+ mHeaderPolicy(headerPolicy) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const ProbabilityDictContent *const mProbabilityDictContent;
+ const HeaderPolicy *const mHeaderPolicy;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
new file mode 100644
index 000000000..3d8da9173
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+
+#include "suggest/core/dictionary/property/unigram_property.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->isTerminal()) {
+ // The PtNode is a terminal. Delete entry from the terminal position lookup table.
+ return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
+ } else {
+ return true;
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
+ // Update children's parent position.
+ mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
+ while (!mReadingHelper.isEnd()) {
+ const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ mReadingHelper.readNextSiblingNode(childPtNodeParams);
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ false /* isDeleted */, true /* willBecomeNonTerminal */);
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
+ AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ // Update flags.
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty) {
+ // Update probability and historical information.
+ // TODO: Update other information in the unigram property.
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
+ unigramProperty);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ if (originalProbabilityEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
+ AKLOGE("Cannot write updated probability entry. terminalId: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
+ if (!isValid) {
+ if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
+ return false;
+ }
+ }
+ *outNeedsToKeepPtNode = isValid;
+ } else {
+ // No need to update probability.
+ *outNeedsToKeepPtNode = true;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ newChildrenPosition, &childrenPosFieldPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId) {
+ return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
+ toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
+ return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
+ ptNodeWritingPos);
+}
+
+
+bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
+ int *const ptNodeWritingPos) {
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
+ ptNodeWritingPos)) {
+ return false;
+ }
+ // Write probability.
+ ProbabilityEntry newProbabilityEntry;
+ const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
+ &newProbabilityEntry, unigramProperty);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
+ &probabilityEntryToWrite);
+}
+
+bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
+ if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+ AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
+ sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
+ return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId());
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
+ return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
+ sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) {
+ int parentPos = toBeUpdatedPtNodeParams->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
+ return false;
+ }
+
+ // Counts bigram entries.
+ if (outBigramEntryCount) {
+ *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
+ targetCodePoints, targetCodePointCount, shortcutProbability)) {
+ AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos) {
+ const int nodePos = *ptNodeWritingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
+ 0 /* nodeFlags */, ptNodeWritingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
+ return false;
+ }
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!ptNodeParams->willBecomeNonTerminal()) {
+ if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ terminalId = ptNodeParams->getTerminalId();
+ } else if (ptNodeParams->isTerminal()) {
+ // Write terminal information using a new terminal id.
+ // Get a new unused terminal id.
+ terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
+ }
+ }
+ const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (isTerminal) {
+ // Update the lookup table.
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ terminalId, nodePos)) {
+ return false;
+ }
+ // Write terminal Id.
+ if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
+ Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
+ return false;
+ }
+ if (outTerminalId) {
+ *outTerminalId = terminalId;
+ }
+ }
+ // Write children position
+ if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
+ return false;
+ }
+ return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ isTerminal, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
+ unigramProperty->getLevel(), unigramProperty->getCount());
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalProbabilityEntry->getHistoricalInfo(),
+ unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
+ return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
+ &updatedHistoricalInfo);
+ } else {
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(
+ unigramProperty->getProbability());
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
+ const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
+ const bool hasMultipleChars) {
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
+ false /* hasShortcutTargets */, false /* hasBigrams */, hasMultipleChars,
+ CHILDREN_POSITION_FIELD_SIZE);
+ if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
+ AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
new file mode 100644
index 000000000..162dc9b1d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class HeaderPolicy;
+class Ver4BigramListPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PtNodeArrayReader;
+class Ver4ShortcutListPolicy;
+
+/*
+ * This class is used for helping to writes nodes of ver4 patricia trie.
+ */
+class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
+ public:
+ Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
+ Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
+ const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader,
+ Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
+ : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
+ mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
+ mShortcutPolicy(shortcutPolicy) {}
+
+ virtual ~Ver4PatriciaTrieNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const UnigramProperty *const unigramProperty);
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition);
+
+ bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId);
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos);
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewBigram);
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam);
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount);
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
+
+ bool writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos);
+
+ // Create updated probability entry using given unigram property. In addition to the
+ // probability, this method updates historical information if needed.
+ // TODO: Update flags belonging to the unigram property.
+ const ProbabilityEntry createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry,
+ const UnigramProperty *const unigramProperty) const;
+
+ bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
+ const bool isTerminal, const bool hasMultipleChars);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mTrieBuffer;
+ Ver4DictBuffers *const mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ DynamicPtReadingHelper mReadingHelper;
+ Ver4BigramListPolicy *const mBigramPolicy;
+ Ver4ShortcutListPolicy *const mShortcutPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
new file mode 100644
index 000000000..46107d92a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,522 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+
+#include <vector>
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/property/bigram_property.h"
+#include "suggest/core/dictionary/property/unigram_property.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "suggest/core/session/prev_words_info.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
+// BinaryDictionaryDecayingTests.
+const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
+const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy->isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
+ }
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ if (ptNodeParams.representsNonWordInfo()) {
+ // Skip PtNodes that represent non-word information.
+ continue;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ }
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
+ }
+ return codePointCount;
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const int ptNodePos =
+ readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+ if (readingHelper.isError()) {
+ mIsCorrupted = true;
+ AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
+ }
+ return ptNodePos;
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy->isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return bigramProbability;
+ }
+ }
+}
+
+int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
+ shortcut.getTargetCodePoints()->size());
+ return false;
+ }
+ }
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ bool addedNewUnigram = false;
+ int codePointsToAdd[MAX_WORD_LENGTH];
+ int codePointCountToAdd = length;
+ memmove(codePointsToAdd, word, sizeof(int) * length);
+ if (unigramProperty->representsBeginningOfSentence()) {
+ codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
+ codePointCountToAdd, MAX_WORD_LENGTH);
+ }
+ if (codePointCountToAdd <= 0) {
+ return false;
+ }
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
+ unigramProperty, &addedNewUnigram)) {
+ if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
+ mUnigramCount++;
+ }
+ if (unigramProperty->getShortcuts().size() > 0) {
+ // Add shortcut target.
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
+ return false;
+ }
+ for (const auto &shortcut : unigramProperty->getShortcuts()) {
+ if (!mUpdatingHelper.addShortcutTarget(wordPos,
+ shortcut.getTargetCodePoints()->data(),
+ shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
+ "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
+ shortcut.getProbability());
+ return false;
+ }
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (!mNodeWriter.markPtNodeAsDeleted(&ptNodeParams)) {
+ AKLOGE("Cannot remove unigram. ptNodePos: %d", ptNodePos);
+ return false;
+ }
+ if (!ptNodeParams.representsNonWordInfo()) {
+ mUnigramCount--;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
+ return false;
+ }
+ if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
+ AKLOGE("The word is too long to insert the ngram to the dictionary. "
+ "length: %d", bigramProperty->getTargetCodePoints()->size());
+ return false;
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
+ const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ const UnigramProperty beginningOfSentenceUnigramProperty(
+ true /* representsBeginningOfSentence */, true /* isNotAWord */,
+ false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
+ NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
+ if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
+ prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
+ &beginningOfSentenceUnigramProperty)) {
+ AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
+ return false;
+ }
+ // Refresh Terminal PtNode positions.
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSearch */);
+ } else {
+ return false;
+ }
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(
+ bigramProperty->getTargetCodePoints()->data(),
+ bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewBigram = false;
+ if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
+ &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const int *const word, const int length) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ if (!prevWordsInfo->isValid()) {
+ AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
+ return false;
+ }
+ if (length > MAX_WORD_LENGTH) {
+ AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
+ }
+ int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
+ false /* tryLowerCaseSerch */);
+ // TODO: Support N-gram.
+ if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
+ AKLOGE("Cannot flush the dictionary to file.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
+ AKLOGE("Cannot flush the dictionary to file with GC.");
+ mIsCorrupted = true;
+ return false;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ if (!mBuffers->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBuffers->isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
+ > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size of the trie exceeds the limit.
+ return true;
+ } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy->isDecayingDict()) {
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ mHeaderPolicy);
+ }
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
+ char *const outResult, const int maxResultLength) {
+ const int compareLength = queryLength + 1 /* terminator */;
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getUnigramCountHardLimit(
+ mHeaderPolicy->getMaxUnigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ?
+ ForgettingCurveUtils::getBigramCountHardLimit(
+ mHeaderPolicy->getMaxBigramCount()) :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ }
+}
+
+const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("getWordProperty is called for invalid word.");
+ return WordProperty();
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
+ ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ ptNodeParams.getTerminalId());
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch bigram information.
+ std::vector<BigramProperty> bigrams;
+ const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
+ if (bigramListPos != NOT_A_DICT_POS) {
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ const int word1TerminalId = bigramEntry.getTargetTerminalId();
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
+ if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
+ bigramEntry.getProbability();
+ bigrams.emplace_back(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount());
+ }
+ }
+ // Fetch shortcut information.
+ std::vector<UnigramProperty::ShortcutProperty> shortcuts;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ const ShortcutDictContent *const shortcutDictContent =
+ mBuffers->getShortcutDictContent();
+ bool hasNext = true;
+ while (hasNext) {
+ int shortcutTargetLength = 0;
+ int shortcutProbability = NOT_A_PROBABILITY;
+ shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
+ const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
+ shortcuts.emplace_back(&target, shortcutProbability);
+ }
+ }
+ const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount(), &shortcuts);
+ return WordProperty(&codePointVector, &unigramProperty, &bigrams);
+}
+
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
+ if (token == 0) {
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
new file mode 100644
index 000000000..5d66a2cce
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+
+#include <vector>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
+ : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
+ mDictBuffer(mBuffers->getWritableTrieBuffer()),
+ mBigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
+ mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable()),
+ mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
+ mPtNodeArrayReader(mDictBuffer),
+ mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
+ &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
+ mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
+ mWritingHelper(mBuffers.get()),
+ mUnigramCount(mHeaderPolicy->getUnigramCount()),
+ mBigramCount(mHeaderPolicy->getBigramCount()),
+ mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutPolicy;
+ }
+
+ bool addUnigramEntry(const int *const word, const int length,
+ const UnigramProperty *const unigramProperty);
+
+ bool removeUnigramEntry(const int *const word, const int length);
+
+ bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
+ const BigramProperty *const bigramProperty);
+
+ bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
+ const int length1);
+
+ bool flush(const char *const filePath);
+
+ bool flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength);
+
+ const WordProperty getWordProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
+
+ bool isCorrupted() const {
+ return mIsCorrupted;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ static const char *const UNIGRAM_COUNT_QUERY;
+ static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
+ // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
+ // prevent the dictionary from overflowing.
+ static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+ const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ BufferWithExtendableBuffer *const mDictBuffer;
+ Ver4BigramListPolicy mBigramPolicy;
+ Ver4ShortcutListPolicy mShortcutPolicy;
+ Ver4PatriciaTrieNodeReader mNodeReader;
+ Ver4PtNodeArrayReader mPtNodeArrayReader;
+ Ver4PatriciaTrieNodeWriter mNodeWriter;
+ DynamicPtUpdatingHelper mUpdatingHelper;
+ Ver4PatriciaTrieWritingHelper mWritingHelper;
+ int mUnigramCount;
+ int mBigramCount;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
+ mutable bool mIsCorrupted;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..254022db4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..466ff55d5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <cstdint>
+
+#include "defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..40fdfa068
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+
+#include <cstring>
+#include <queue>
+
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
+ const int unigramCount, const int bigramCount) const {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ AKLOGE("Cannot write header structure to buffer. "
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
+ "extendedRegionSize: %d", false, unigramCount, bigramCount,
+ extendedRegionSize);
+ return false;
+ }
+ return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const dictDirPath) {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
+ Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
+ return false;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ return false;
+ }
+ return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
+ mBuffers->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
+ Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
+ mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+
+ DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ &ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount();
+ const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
+ if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
+ if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
+ AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
+ maxUnigramCount);
+ return false;
+ }
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
+ const int maxBigramCount = headerPolicy->getMaxBigramCount();
+ if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
+ if (!truncateBigrams(maxBigramCount)) {
+ AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
+ return false;
+ }
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+ DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ &newShortcutPolicy);
+ // Re-assign terminal IDs for valid terminal PtNodes.
+ TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
+ if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
+ &terminalIdMap)) {
+ return false;
+ }
+ // Run GC for probability dict content.
+ if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
+ mBuffers->getProbabilityDictContent())) {
+ return false;
+ }
+ // Run GC for bigram dict content.
+ if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
+ mBuffers->getBigramDictContent(), outBigramCount)) {
+ return false;
+ }
+ // Run GC for shortcut dict content.
+ if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+ DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
+ const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
+ if (terminalPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ probabilityEntry.getProbability();
+ priorityQueue.push(DictProbability(terminalPos, probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+
+ // Delete unigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
+ const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
+ const PtNodeParams ptNodeParams =
+ ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
+ if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ bigramEntry.getProbability();
+ priorityQueue.push(DictProbability(entryPos, probability,
+ bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+ }
+
+ // Delete bigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
+ const int entryPos = priorityQueue.top().getDictPos();
+ const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
+ const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
+ AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isTerminal()) {
+ return true;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ mTerminalIdMap->find(ptNodeParams->getTerminalId());
+ if (it == mTerminalIdMap->end()) {
+ AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
+ ptNodeParams->getTerminalId(), mTerminalIdMap->size());
+ return false;
+ }
+ if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
+ AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..bb464ad28
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class HeaderPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PatriciaTrieNodeWriter;
+
+class Ver4PatriciaTrieWritingHelper {
+ public:
+ Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
+ : mBuffers(buffers) {}
+
+ bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
+ const int bigramCount) const;
+
+ // This method cannot be const because the original dictionary buffer will be updated to detect
+ // useless PtNodes during GC.
+ bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
+
+ class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
+ : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
+
+ Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
+ const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbability {
+ public:
+ DictProbability(const int dictPos, const int probability, const int timestamp)
+ : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
+
+ int getDictPos() const {
+ return mDictPos;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
+
+ int mDictPos;
+ int mProbability;
+ int mTimestamp;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbabilityComparator {
+ public:
+ bool operator()(const DictProbability &left, const DictProbability &right) {
+ if (left.getProbability() != right.getProbability()) {
+ return left.getProbability() > right.getProbability();
+ }
+ if (left.getTimestamp() != right.getTimestamp()) {
+ return left.getTimestamp() < right.getTimestamp();
+ }
+ return left.getDictPos() > right.getDictPos();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
+ };
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+
+ bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
+
+ bool truncateBigrams(const int maxBigramCount);
+
+ Ver4DictBuffers *const mBuffers;
+};
+} // namespace latinime
+
+#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
new file mode 100644
index 000000000..b014c523d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = ptNodeArrayPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &readingPos);
+ if (usesAdditionalBuffer) {
+ readingPos += mBuffer->getOriginalBufferSize();
+ }
+ if (ptNodeCountInArray < 0) {
+ AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
+ return false;
+ }
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = forwordLinkPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int nextPtNodeArrayOffset =
+ DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
+ if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
+ *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
+ } else {
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
new file mode 100644
index 000000000..d81808efc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER4_PT_NODE_ARRAY_READER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PT_NODE_ARRAY_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f692882f2..259dae4c6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,11 +18,42 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
+uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
+ const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
+ return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
+}
+
+uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
+ int *const pos) const {
+ const int value = readUint(size, *pos);
+ *pos += size;
+ return value;
+}
+
+void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
+ getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+}
+
+bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
+ int writingPos = pos;
+ return writeUintAndAdvancePosition(data, size, &writingPos);
+}
+
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
@@ -46,7 +77,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
}
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
- const int codePointCount, const bool writesTerminator ,int *const pos) {
+ const int codePointCount, const bool writesTerminator, int *const pos) {
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
codePoints, codePointCount, writesTerminator);
if (!checkAndPrepareWriting(*pos, size)) {
@@ -100,4 +131,21 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return true;
}
+bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) {
+ int copyingPos = 0;
+ const int tailPos = sourceBuffer->getTailPosition();
+ const int maxDataChunkSize = sizeof(uint32_t);
+ while (copyingPos < tailPos) {
+ const int remainingSize = tailPos - copyingPos;
+ const int copyingSize = (remainingSize >= maxDataChunkSize) ?
+ maxDataChunkSize : remainingSize;
+ const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos);
+ if (!writeUint(data, copyingSize, copyingPos)) {
+ return false;
+ }
+ copyingPos += copyingSize;
+ }
+ return true;
+}
+
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 9dc34823c..a2e88a46c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -18,7 +18,7 @@
#define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
#include <cstddef>
-#include <stdint.h>
+#include <cstdint>
#include <vector>
#include "defines.h"
@@ -32,10 +32,18 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
+ static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
+
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
- const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
+ const int maxAdditionalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
+ // Without original buffer.
+ BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
+ : mOriginalBuffer(0), mOriginalBufferSize(0),
+ mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
@@ -63,6 +71,13 @@ class BufferWithExtendableBuffer {
}
}
+ uint32_t readUint(const int size, const int pos) const;
+
+ uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
+
+ void readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const;
+
AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize;
}
@@ -78,15 +93,18 @@ class BufferWithExtendableBuffer {
* Writing is allowed for original buffer, already written region of additional buffer and the
* tail of additional buffer.
*/
+ bool writeUint(const uint32_t data, const int size, const int pos);
+
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
const bool writesTerminator, int *const pos);
+ bool copy(const BufferWithExtendableBuffer *const sourceBuffer);
+
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 0c1576818..c0a9fcb1d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_BYTE_ARRAY_UTILS_H
#define LATINIME_BYTE_ARRAY_UTILS_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
@@ -114,6 +114,24 @@ class ByteArrayUtils {
return buffer[(*pos)++];
}
+ static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
+ const int size, const int pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ return ByteArrayUtils::readUint8(buffer, pos);
+ case 2:
+ return ByteArrayUtils::readUint16(buffer, pos);
+ case 3:
+ return ByteArrayUtils::readUint24(buffer, pos);
+ case 4:
+ return ByteArrayUtils::readUint32(buffer, pos);
+ default:
+ return 0;
+ }
+ }
+
/**
* Code Point Reading
*
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 994826fa8..1d202c3dd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -17,73 +17,96 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include <cstdio>
-#include <cstring>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp";
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
- const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- switch (dictVersion) {
- case 3:
- return createEmptyV3DictFile(filePath, attributeMap);
+ const int dictVersion, const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ TimeKeeper::setCurrentTime();
+ const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
+ switch (formatVersion) {
+ case FormatUtils::VERSION_4:
+ return createEmptyV4DictFile<backward::v402::Ver4DictConstants,
+ backward::v402::Ver4DictBuffers,
+ backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>(
+ filePath, localeAsCodePointVector, attributeMap, formatVersion);
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV:
+ return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr>(
+ filePath, localeAsCodePointVector, attributeMap, formatVersion);
default:
- // Only version 3 dictionary is supported for now.
+ AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
+ filePath, dictVersion);
return false;
}
}
-/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
- const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
- headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
- 0 /* extendedRegionSize */);
- BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
+template<class DictConstants, class DictBuffers, class DictBuffersPtr>
+/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion) {
+ HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap);
+ DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
+ DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
+ headerPolicy.fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ 0 /* unigramCount */, 0 /* bigramCount */,
+ 0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
+ if (!DynamicPtWritingUtils::writeEmptyDictionary(
+ dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
return false;
}
- return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
+ return dictBuffers->flush(dirPath);
}
-/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
- BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
- const int tmpFileNameBufSize = strlen(filePath)
- + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
- // Name of a temporary file used for writing that is a connected string of original name and
- // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
- char tmpFileName[tmpFileNameBufSize];
- snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath,
- TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
- FILE *const file = fopen(tmpFileName, "wb");
- if (!file) {
- AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
+/* static */ bool DictFileWritingUtils::flushBufferToFileWithSuffix(const char *const basePath,
+ const char *const suffix, const BufferWithExtendableBuffer *const buffer) {
+ const int filePathBufSize = FileUtils::getFilePathWithSuffixBufSize(basePath, suffix);
+ char filePath[filePathBufSize];
+ FileUtils::getFilePathWithSuffix(basePath, suffix, filePathBufSize, filePath);
+ return flushBufferToFile(filePath, buffer);
+}
+
+/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
+ const BufferWithExtendableBuffer *const buffer) {
+ const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ if (fd == -1) {
+ AKLOGE("File %s cannot be opened. errno: %d", filePath, errno);
ASSERT(false);
return false;
}
- // Write the dictionary header.
- if (!writeBufferToFile(file, dictHeader)) {
- remove(tmpFileName);
- AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
+ FILE *const file = fdopen(fd, "wb");
+ if (!file) {
+ AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno);
ASSERT(false);
return false;
}
- // Write the dictionary body.
- if (!writeBufferToFile(file, dictBody)) {
- remove(tmpFileName);
- AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
+ if (!writeBufferToFile(file, buffer)) {
+ remove(filePath);
+ AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
+ buffer->getTailPosition());
ASSERT(false);
return false;
}
fclose(file);
- rename(tmpFileName, filePath);
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index bd4ac66fd..0dd1256ee 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
namespace latinime {
@@ -28,20 +29,31 @@ class BufferWithExtendableBuffer;
class DictFileWritingUtils {
public:
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+
static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
- const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
- static bool flushAllHeaderAndBodyToFile(const char *const filePath,
- BufferWithExtendableBuffer *const dictHeader,
- BufferWithExtendableBuffer *const dictBody);
+ static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix,
+ const BufferWithExtendableBuffer *const buffer);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
- static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+ static bool createEmptyV401DictFile(const char *const filePath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
+ static bool createEmptyV4DictFile(const char *const filePath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
- static bool createEmptyV3DictFile(const char *const filePath,
- const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+ static bool flushBufferToFile(const char *const filePath,
+ const BufferWithExtendableBuffer *const buffer);
static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp
new file mode 100644
index 000000000..fb80f38c5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+#include <cstdio>
+#include <cstring>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace latinime {
+
+// Returns -1 on error.
+/* static */ int FileUtils::getFileSize(const char *const filePath) {
+ const int fd = open(filePath, O_RDONLY);
+ if (fd == -1) {
+ return -1;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return static_cast<int>(statBuf.st_size);
+}
+
+/* static */ bool FileUtils::existsDir(const char *const dirPath) {
+ DIR *const dir = opendir(dirPath);
+ if (dir == NULL) {
+ return false;
+ }
+ closedir(dir);
+ return true;
+}
+
+// Remove a directory and all files in the directory.
+/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) {
+ return removeDirAndFiles(dirPath, 5 /* maxTries */);
+}
+
+// Remove a directory and all files in the directory, trying up to maxTimes.
+/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath, const int maxTries) {
+ DIR *const dir = opendir(dirPath);
+ if (dir == NULL) {
+ AKLOGE("Cannot open dir %s.", dirPath);
+ return true;
+ }
+ struct dirent *dirent;
+ while ((dirent = readdir(dir)) != NULL) {
+ if (dirent->d_type == DT_DIR) {
+ continue;
+ }
+ if (strcmp(dirent->d_name, ".") == 0 || strcmp(dirent->d_name, "..") == 0) {
+ continue;
+ }
+ const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name);
+ char filePath[filePathBufSize];
+ getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath);
+ if (remove(filePath) != 0) {
+ AKLOGE("Cannot remove file %s.", filePath);
+ closedir(dir);
+ return false;
+ }
+ }
+ closedir(dir);
+ if (remove(dirPath) != 0) {
+ if (maxTries > 0) {
+ // On NFS, deleting files sometimes creates new files. I'm not sure what the
+ // correct way of dealing with this is, but for the time being, this seems to work.
+ removeDirAndFiles(dirPath, maxTries - 1);
+ } else {
+ AKLOGE("Cannot remove directory %s.", dirPath);
+ return false;
+ }
+ }
+ return true;
+}
+
+/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath,
+ const char *const suffix) {
+ return strlen(filePath) + strlen(suffix) + 1 /* terminator */;
+}
+
+/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath,
+ const char *const suffix, const int filePathBufSize, char *const outFilePath) {
+ snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix);
+}
+
+/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath,
+ const char *const fileName) {
+ return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */;
+}
+
+/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName,
+ const int filePathBufSize, char *const outFilePath) {
+ snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
+}
+
+/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath,
+ const char *const suffix, const int outDirPathBufSize, char *const outDirPath) {
+ const int filePathLength = strlen(filePath);
+ const int suffixLength = strlen(suffix);
+ if (filePathLength <= suffixLength) {
+ AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).",
+ filePath, filePathLength, suffix, suffixLength);
+ return false;
+ }
+ const int resultFilePathLength = filePathLength - suffixLength;
+ if (outDirPathBufSize <= resultFilePathLength) {
+ AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d",
+ filePath, suffix, outDirPathBufSize);
+ return false;
+ }
+ if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) {
+ AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix);
+ return false;
+ }
+ snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath);
+ return true;
+}
+
+/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize,
+ char *const outDirPath) {
+ for (int i = strlen(filePath) - 1; i >= 0; --i) {
+ if (filePath[i] == '/') {
+ if (i >= outDirPathBufSize) {
+ AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d",
+ filePath, outDirPathBufSize);
+ ASSERT(false);
+ return;
+ }
+ snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath);
+ return;
+ }
+ }
+}
+
+/* static */ void FileUtils::getBasename(const char *const filePath,
+ const int outNameBufSize, char *const outName) {
+ const int filePathBufSize = strlen(filePath) + 1 /* terminator */;
+ char filePathBuf[filePathBufSize];
+ snprintf(filePathBuf, filePathBufSize, "%s", filePath);
+ const char *const baseName = basename(filePathBuf);
+ const int baseNameLength = strlen(baseName);
+ if (baseNameLength >= outNameBufSize) {
+ AKLOGE("outNameBufSize is too small. filePath: %s, outNameBufSize: %d",
+ filePath, outNameBufSize);
+ return;
+ }
+ snprintf(outName, baseNameLength + 1 /* terminator */, "%s", baseName);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
new file mode 100644
index 000000000..4f1b93a6a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FILE_UTILS_H
+#define LATINIME_FILE_UTILS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class FileUtils {
+ public:
+ // Returns -1 on error.
+ static int getFileSize(const char *const filePath);
+
+ static bool existsDir(const char *const dirPath);
+
+ // Remove a directory and all files in the directory.
+ static bool removeDirAndFiles(const char *const dirPath);
+
+ static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix);
+
+ static void getFilePathWithSuffix(const char *const filePath, const char *const suffix,
+ const int filePathBufSize, char *const outFilePath);
+
+ static int getFilePathBufSize(const char *const dirPath, const char *const fileName);
+
+ static void getFilePath(const char *const dirPath, const char *const fileName,
+ const int filePathBufSize, char *const outFilePath);
+
+ // Returns whether the filePath have the suffix.
+ static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix,
+ const int dirPathBufSize, char *const outDirPath);
+
+ static void getDirPath(const char *const filePath, const int dirPathBufSize,
+ char *const outDirPath);
+
+ static void getBasename(const char *const filePath, const int outNameBufSize,
+ char *const outName);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
+
+ static bool removeDirAndFiles(const char *const dirPath, const int maxTries);
+};
+} // namespace latinime
+#endif /* LATINIME_FILE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 1632fd072..fed0ae77e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,141 +14,225 @@
* limitations under the License.
*/
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+#include <algorithm>
#include <cmath>
-#include <ctime>
#include <stdlib.h>
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
-const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000;
-const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
-const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
-const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
-
-const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
-const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
-const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
-const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
-// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
-// duration of the decay is approximately 66hours.
-const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
-const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
-ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
+const int ForgettingCurveUtils::MAX_LEVEL = 3;
+const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1;
+const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
+const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
-void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
- mCurrentTime = time(0);
-}
+const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
+const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
-/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbability) {
- if (encodedUnigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
- return backoff(decodeProbability(encodedUnigramProbability));
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+
+// TODO: Revise the logic to decide the initial probability depending on the given probability.
+/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
+ const int timestamp = newHistoricalInfo->getTimeStamp();
+ if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
+ // Add entry as a valid word.
+ const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
+ } else if (!originalHistoricalInfo->isValid()
+ || originalHistoricalInfo->getLevel() < newHistoricalInfo->getLevel()
+ || (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel()
+ && originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) {
+ // Initial information.
+ const int level = clampToValidLevelRange(newHistoricalInfo->getLevel());
+ const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy);
+ return HistoricalInfo(timestamp, level, count);
} else {
- const int unigramProbability = decodeProbability(encodedUnigramProbability);
- const int bigramProbability = decodeProbability(encodedBigramProbability);
- return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
+ const int updatedCount = originalHistoricalInfo->getCount() + 1;
+ if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
+ // The count exceeds the max value the level can be incremented.
+ if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
+ // The level is already max.
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount());
+ } else {
+ // Level up.
+ return HistoricalInfo(timestamp,
+ originalHistoricalInfo->getLevel() + 1, 0 /* count */);
+ }
+ } else {
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
+ }
}
}
-// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
-// (i.e. unigram probability + bigram probability delta).
-/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
- const int originalEncodedProbability, const int newProbability) {
- if (originalEncodedProbability == NOT_A_PROBABILITY) {
- // The bigram relation is not in this dictionary.
- if (newProbability == NOT_A_PROBABILITY) {
- // The bigram target is not in other dictionaries.
- return 0;
- } else {
- return MIN_VALID_ENCODED_PROBABILITY;
- }
+/* static */ int ForgettingCurveUtils::decodeProbability(
+ const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
+ const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
+ headerPolicy->getForgettingCurveDurationToLevelDown());
+ return sProbabilityTable.getProbability(
+ headerPolicy->getForgettingCurveProbabilityValuesTableId(),
+ clampToValidLevelRange(historicalInfo->getLevel()),
+ clampToValidTimeStepCountRange(elapsedTimeStepCount));
+}
+
+/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
+ const int bigramProbability) {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return std::min(backoff(unigramProbability), MAX_PROBABILITY);
} else {
- if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
- return MIN_VALID_ENCODED_PROBABILITY;
- }
- return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
+ // TODO: Investigate better way to handle bigram probability.
+ return std::min(std::max(unigramProbability,
+ bigramProbability + MULTIPLIER_TWO_IN_PROBABILITY_SCALE), MAX_PROBABILITY);
}
}
-/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
- return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
+/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
+ const HeaderPolicy *const headerPolicy) {
+ return historicalInfo->getLevel() > 0
+ || getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
+ headerPolicy->getForgettingCurveDurationToLevelDown())
+ < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
-/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
- const DictionaryHeaderStructurePolicy *const headerPolicy) {
- const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
- const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
- int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
- // TODO: Implement the decay in more proper way.
- for (int i = 0; i < decayIterationCount; ++i) {
- const float currentRate = static_cast<float>(currentEncodedProbability)
- / static_cast<float>(MAX_ENCODED_PROBABILITY);
- const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
- const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
- if (thresholdToDecay < randValue) {
- currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
- 0);
- }
+/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo,
+ const HeaderPolicy *const headerPolicy) {
+ if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ return HistoricalInfo();
+ }
+ const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
+ const int elapsedTimeStep = getElapsedTimeStepCount(
+ originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds);
+ if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
+ // No need to update historical info.
+ return *originalHistoricalInfo;
}
- return currentEncodedProbability;
+ // Level down.
+ const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
+ originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
+ const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() +
+ levelDownAmount * durationToLevelDownInSeconds;
+ return HistoricalInfo(adjustedTimestampInSeconds,
+ originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
}
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
- const int unigramCount, const int bigramCount,
- const DictionaryHeaderStructurePolicy *const headerPolicy) {
- if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
+ const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
+ if (unigramCount >= getUnigramCountHardLimit(headerPolicy->getMaxUnigramCount())) {
// Unigram count exceeds the limit.
return true;
- } else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
+ } else if (bigramCount >= getBigramCountHardLimit(headerPolicy->getMaxBigramCount())) {
// Bigram count exceeds the limit.
return true;
}
if (mindsBlockByDecay) {
return false;
}
- if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS
+ < TimeKeeper::peekCurrentTime()) {
// Time to decay.
return true;
}
return false;
}
-/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
- if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else {
- return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
- }
-}
-
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else {
- return max(unigramProbability - 8, 0);
+ // See TODO comments in ForgettingCurveUtils::getProbability().
+ return unigramProbability;
+}
+
+/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp,
+ const int durationToLevelDownInSeconds) {
+ const int elapsedTimeInSeconds = TimeKeeper::peekCurrentTime() - timestamp;
+ const int timeStepDurationInSeconds =
+ durationToLevelDownInSeconds / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ return elapsedTimeInSeconds / timeStepDurationInSeconds;
+}
+
+/* static */ int ForgettingCurveUtils::clampToVisibleEntryLevelRange(const int level) {
+ return std::min(std::max(level, MIN_VISIBLE_LEVEL), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count,
+ const HeaderPolicy *const headerPolicy) {
+ return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) {
+ return std::min(std::max(level, 0), MAX_LEVEL);
+}
+
+/* static */ int ForgettingCurveUtils::clampToValidTimeStepCountRange(const int timeStepCount) {
+ return std::min(std::max(timeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT);
+}
+
+const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 4;
+const int ForgettingCurveUtils::ProbabilityTable::WEAK_PROBABILITY_TABLE_ID = 0;
+const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID = 1;
+const int ForgettingCurveUtils::ProbabilityTable::STRONG_PROBABILITY_TABLE_ID = 2;
+const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_PROBABILITY_TABLE_ID = 3;
+const int ForgettingCurveUtils::ProbabilityTable::WEAK_MAX_PROBABILITY = 127;
+const int ForgettingCurveUtils::ProbabilityTable::MODEST_BASE_PROBABILITY = 32;
+const int ForgettingCurveUtils::ProbabilityTable::STRONG_BASE_PROBABILITY = 35;
+const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_BASE_PROBABILITY = 40;
+
+
+ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
+ mTables.resize(PROBABILITY_TABLE_COUNT);
+ for (int tableId = 0; tableId < PROBABILITY_TABLE_COUNT; ++tableId) {
+ mTables[tableId].resize(MAX_LEVEL + 1);
+ for (int level = 0; level <= MAX_LEVEL; ++level) {
+ mTables[tableId][level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const float initialProbability = getBaseProbabilityForLevel(tableId, level);
+ const float endProbability = getBaseProbabilityForLevel(tableId, level - 1);
+ for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT;
+ ++timeStepCount) {
+ if (level == 0) {
+ mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY;
+ continue;
+ }
+ const float probability = initialProbability
+ * powf(initialProbability / endProbability,
+ -1.0f * static_cast<float>(timeStepCount)
+ / static_cast<float>(MAX_ELAPSED_TIME_STEP_COUNT + 1));
+ mTables[tableId][level][timeStepCount] =
+ std::min(std::max(static_cast<int>(probability), 1), MAX_PROBABILITY);
+ }
+ }
}
}
-ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
- // Table entry is as follows:
- // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
- // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
- mTable.resize(MAX_ENCODED_PROBABILITY + 1);
- for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
- const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
- static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
- mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+/* static */ int ForgettingCurveUtils::ProbabilityTable::getBaseProbabilityForLevel(
+ const int tableId, const int level) {
+ if (tableId == WEAK_PROBABILITY_TABLE_ID) {
+ // Max probability is 127.
+ return static_cast<float>(WEAK_MAX_PROBABILITY / (1 << (MAX_LEVEL - level)));
+ } else if (tableId == MODEST_PROBABILITY_TABLE_ID) {
+ // Max probability is 128.
+ return static_cast<float>(MODEST_BASE_PROBABILITY * (level + 1));
+ } else if (tableId == STRONG_PROBABILITY_TABLE_ID) {
+ // Max probability is 140.
+ return static_cast<float>(STRONG_BASE_PROBABILITY * (level + 1));
+ } else if (tableId == AGGRESSIVE_PROBABILITY_TABLE_ID) {
+ // Max probability is 160.
+ return static_cast<float>(AGGRESSIVE_BASE_PROBABILITY * (level + 1));
+ } else {
+ return NOT_A_PROBABILITY;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 2ad423874..3ff80aeec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -20,48 +20,43 @@
#include <vector>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
-class DictionaryHeaderStructurePolicy;
+class HeaderPolicy;
-// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
-// required to introduced to each terminal PtNode and bigram entry.
-// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
- class TimeKeeper {
- public:
- TimeKeeper() : mCurrentTime(0) {}
- void setCurrentTime();
- int peekCurrentTime() const { return mCurrentTime; };
-
- private:
- DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
+ static const HistoricalInfo createUpdatedHistoricalInfo(
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy);
- int mCurrentTime;
- };
+ static const HistoricalInfo createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo,
+ const HeaderPolicy *const headerPolicy);
- static const int MAX_UNIGRAM_COUNT;
- static const int MAX_UNIGRAM_COUNT_AFTER_GC;
- static const int MAX_BIGRAM_COUNT;
- static const int MAX_BIGRAM_COUNT_AFTER_GC;
-
- static TimeKeeper sTimeKeeper;
+ static int decodeProbability(const HistoricalInfo *const historicalInfo,
+ const HeaderPolicy *const headerPolicy);
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
- static int getUpdatedEncodedProbability(const int originalEncodedProbability,
- const int newProbability);
+ static bool needsToKeep(const HistoricalInfo *const historicalInfo,
+ const HeaderPolicy *const headerPolicy);
- static int isValidEncodedProbability(const int encodedProbability);
+ static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
+ const int bigramCount, const HeaderPolicy *const headerPolicy);
- static int getEncodedProbabilityToSave(const int encodedProbability,
- const DictionaryHeaderStructurePolicy *const headerPolicy);
+ AK_FORCE_INLINE static int getUnigramCountHardLimit(const int maxUnigramCount) {
+ return static_cast<int>(static_cast<float>(maxUnigramCount)
+ * UNIGRAM_COUNT_HARD_LIMIT_WEIGHT);
+ }
- static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
- const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
+ AK_FORCE_INLINE static int getBigramCountHardLimit(const int maxBigramCount) {
+ return static_cast<int>(static_cast<float>(maxBigramCount)
+ * BIGRAM_COUNT_HARD_LIMIT_WEIGHT);
+ }
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
@@ -70,31 +65,49 @@ class ForgettingCurveUtils {
public:
ProbabilityTable();
- int getProbability(const int encodedProbability) const {
- if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
- return NOT_A_PROBABILITY;
- }
- return mTable[encodedProbability];
+ int getProbability(const int tableId, const int level,
+ const int elapsedTimeStepCount) const {
+ return mTables[tableId][level][elapsedTimeStepCount];
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
- std::vector<int> mTable;
+ static const int PROBABILITY_TABLE_COUNT;
+ static const int WEAK_PROBABILITY_TABLE_ID;
+ static const int MODEST_PROBABILITY_TABLE_ID;
+ static const int STRONG_PROBABILITY_TABLE_ID;
+ static const int AGGRESSIVE_PROBABILITY_TABLE_ID;
+
+ static const int WEAK_MAX_PROBABILITY;
+ static const int MODEST_BASE_PROBABILITY;
+ static const int STRONG_BASE_PROBABILITY;
+ static const int AGGRESSIVE_BASE_PROBABILITY;
+
+ std::vector<std::vector<std::vector<int> > > mTables;
+
+ static int getBaseProbabilityForLevel(const int tableId, const int level);
};
- static const int MAX_COMPUTED_PROBABILITY;
- static const int MAX_ENCODED_PROBABILITY;
- static const int MIN_VALID_ENCODED_PROBABILITY;
- static const int ENCODED_PROBABILITY_STEP;
- static const float MIN_PROBABILITY_TO_DECAY;
+ static const int MULTIPLIER_TWO_IN_PROBABILITY_SCALE;
static const int DECAY_INTERVAL_SECONDS;
- static const ProbabilityTable sProbabilityTable;
+ static const int MAX_LEVEL;
+ static const int MIN_VISIBLE_LEVEL;
+ static const int MAX_ELAPSED_TIME_STEP_COUNT;
+ static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+
+ static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT;
+ static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT;
- static int decodeProbability(const int encodedProbability);
+ static const ProbabilityTable sProbabilityTable;
static int backoff(const int unigramProbability);
+ static int getElapsedTimeStepCount(const int timestamp, const int durationToLevelDown);
+ static int clampToVisibleEntryLevelRange(const int level);
+ static int clampToValidLevelRange(const int level);
+ static int clampToValidCountRange(const int count, const HeaderPolicy *const headerPolicy);
+ static int clampToValidTimeStepCountRange(const int timeStepCount);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 1d77d5c27..1916ea560 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -25,6 +25,20 @@ const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
+/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
+ switch (formatVersion) {
+ case VERSION_2:
+ return VERSION_2;
+ case VERSION_4_ONLY_FOR_TESTING:
+ return VERSION_4_ONLY_FOR_TESTING;
+ case VERSION_4:
+ return VERSION_4;
+ case VERSION_4_DEV:
+ return VERSION_4_DEV;
+ default:
+ return UNKNOWN_VERSION;
+ }
+}
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
const uint8_t *const dict, const int dictSize) {
// The magic number is stored big-endian.
@@ -36,18 +50,15 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
case MAGIC_NUMBER:
- // Version 2 header is as follows:
+ // The layout of the header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
// Dictionary format version number (2 bytes)
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
- if (ByteArrayUtils::readUint16(dict, 4) == 2) {
- return VERSION_2;
- } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
- return VERSION_3;
- } else {
- return UNKNOWN_VERSION;
- }
+ // Conceptually this converts the hardcoded value of the bytes in the file into
+ // the symbolic value we use in the code. But we want the constants to be the
+ // same so we use them for both here.
+ return getFormatVersion(ByteArrayUtils::readUint16(dict, 4));
default:
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 79ed0de29..55ad5799f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_FORMAT_UTILS_H
#define LATINIME_FORMAT_UTILS_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
@@ -29,15 +29,19 @@ namespace latinime {
class FormatUtils {
public:
enum FORMAT_VERSION {
- VERSION_2,
- VERSION_3,
- UNKNOWN_VERSION
+ // These MUST have the same values as the relevant constants in FormatSpec.java.
+ VERSION_2 = 2,
+ VERSION_4_ONLY_FOR_TESTING = 399,
+ VERSION_4 = 402,
+ VERSION_4_DEV = 403,
+ UNKNOWN_VERSION = -1
};
// 32 bit magic number is stored at the beginning of the dictionary header to reject
// unsupported or obsolete dictionary formats.
static const uint32_t MAGIC_NUMBER;
+ static FORMAT_VERSION getFormatVersion(const int formatVersion);
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
new file mode 100644
index 000000000..428ca8626
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HISTORICAL_INFO_H
+#define LATINIME_HISTORICAL_INFO_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class HistoricalInfo {
+ public:
+ // Invalid historical info.
+ HistoricalInfo()
+ : mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0) {}
+
+ HistoricalInfo(const int timestamp, const int level, const int count)
+ : mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ bool isValid() const {
+ return mTimestamp != NOT_A_TIMESTAMP;
+ }
+
+ int getTimeStamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
+
+ const int mTimestamp;
+ const int mLevel;
+ const int mCount;
+};
+} // namespace latinime
+#endif /* LATINIME_HISTORICAL_INFO_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
new file mode 100644
index 000000000..4a126ff85
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const int bufferOffset, const int bufferSize,
+ const bool isUpdatable) {
+ const int mmapFd = open(path, O_RDONLY);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return nullptr;
+ }
+ const int pagesize = sysconf(_SC_PAGESIZE);
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return nullptr;
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return nullptr;
+ }
+ return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
+ mmapFd, isUpdatable));
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const bool isUpdatable) {
+ const int fileSize = FileUtils::getFileSize(path);
+ if (fileSize == -1) {
+ return nullptr;
+ } else if (fileSize == 0) {
+ return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
+ } else {
+ return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
+ }
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const dirPath, const char *const fileName, const bool isUpdatable) {
+ const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
+ char filePath[filePathBufferSize];
+ const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
+ fileName);
+ if (filePathLength >= filePathBufferSize) {
+ return nullptr;
+ }
+ return openBuffer(filePath, isUpdatable);
+}
+
+MmappedBuffer::~MmappedBuffer() {
+ if (mAlignedSize == 0) {
+ return;
+ }
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6b69116eb..8460087ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -17,11 +17,8 @@
#ifndef LATINIME_MMAPPED_BUFFER_H
#define LATINIME_MMAPPED_BUFFER_H
-#include <cerrno>
-#include <fcntl.h>
-#include <stdint.h>
-#include <sys/mman.h>
-#include <unistd.h>
+#include <cstdint>
+#include <memory>
#include "defines.h"
@@ -29,46 +26,18 @@ namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
- const int bufferSize, const bool isUpdatable) {
- const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
- const int mmapFd = open(path, openMode);
- if (mmapFd < 0) {
- AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
- return 0;
- }
- const int pagesize = getpagesize();
- const int offset = bufferOffset % pagesize;
- int alignedOffset = bufferOffset - offset;
- int alignedSize = bufferSize + offset;
- const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
- void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
- alignedOffset);
- if (mmappedBuffer == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
- close(mmapFd);
- return 0;
- }
- uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
- if (!buffer) {
- AKLOGE("DICT: buffer is null");
- close(mmapFd);
- return 0;
- }
- return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
- isUpdatable);
- }
+ typedef std::unique_ptr<const MmappedBuffer> MmappedBufferPtr;
- ~MmappedBuffer() {
- int ret = munmap(mMmappedBuffer, mAlignedSize);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
- }
- ret = close(mMmapFd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
- }
- }
+ static MmappedBufferPtr openBuffer(const char *const path,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable);
+
+ // Mmap entire file.
+ static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable);
+
+ static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName,
+ const bool isUpdatable);
+
+ ~MmappedBuffer();
AK_FORCE_INLINE uint8_t *getBuffer() const {
return mBuffer;
@@ -89,6 +58,11 @@ class MmappedBuffer {
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+ // Empty file. We have to handle an empty file as a valid part of a dictionary.
+ AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
+ : mBuffer(nullptr), mBufferSize(0), mMmappedBuffer(nullptr), mAlignedSize(0),
+ mMmapFd(0), mIsUpdatable(isUpdatable) {}
+
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h
index 21fe355b8..3b339e61a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h
@@ -17,12 +17,11 @@
#ifndef LATINIME_PROBABILITY_UTILS_H
#define LATINIME_PROBABILITY_UTILS_H
-#include <stdint.h>
-
#include "defines.h"
namespace latinime {
+// TODO: Quit using bigram probability to indicate the delta.
class ProbabilityUtils {
public:
static AK_FORCE_INLINE int backoff(const int unigramProbability) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
new file mode 100644
index 000000000..d336306b9
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+const int SparseTable::NOT_EXIST = -1;
+const int SparseTable::INDEX_SIZE = 4;
+
+bool SparseTable::contains(const int id) const {
+ const int readingPos = getPosInIndexTable(id);
+ if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
+ return false;
+ }
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
+ return index != NOT_EXIST;
+}
+
+uint32_t SparseTable::get(const int id) const {
+ const int indexTableReadingPos = getPosInIndexTable(id);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
+ const int contentTableReadingPos = getPosInContentTable(id, index);
+ if (contentTableReadingPos < 0
+ || contentTableReadingPos >= mContentTableBuffer->getTailPosition()) {
+ AKLOGE("contentTableReadingPos(%d) is invalid. id: %d, index: %d",
+ contentTableReadingPos, id, index);
+ return NOT_A_DICT_POS;
+ }
+ const int contentValue = mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
+ return contentValue == NOT_EXIST ? NOT_A_DICT_POS : contentValue;
+}
+
+bool SparseTable::set(const int id, const uint32_t value) {
+ const int posInIndexTable = getPosInIndexTable(id);
+ // Extends the index table if needed.
+ int tailPos = mIndexTableBuffer->getTailPosition();
+ while (tailPos <= posInIndexTable) {
+ if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
+ AKLOGE("cannot extend index table. tailPos: %d to: %d", tailPos, posInIndexTable);
+ return false;
+ }
+ }
+ if (contains(id)) {
+ // The entry is already in the content table.
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
+ if (!mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index))) {
+ AKLOGE("cannot update value %d. pos: %d, tailPos: %d, mDataSize: %d", value,
+ getPosInContentTable(id, index), mContentTableBuffer->getTailPosition(),
+ mDataSize);
+ return false;
+ }
+ return true;
+ }
+ // The entry is not in the content table.
+ // Create new entry in the content table.
+ const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
+ if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
+ AKLOGE("cannot write index %d. pos %d", index, posInIndexTable);
+ return false;
+ }
+ // Write a new block that containing the entry to be set.
+ int writingPos = getPosInContentTable(0 /* id */, index);
+ for (int i = 0; i < mBlockSize; ++i) {
+ if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, mDataSize,
+ &writingPos)) {
+ AKLOGE("cannot write content table to extend. writingPos: %d, tailPos: %d, "
+ "mDataSize: %d", writingPos, mContentTableBuffer->getTailPosition(), mDataSize);
+ return false;
+ }
+ }
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+}
+
+int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
+ return contentTablePos / mDataSize / mBlockSize;
+}
+
+int SparseTable::getPosInIndexTable(const int id) const {
+ return (id / mBlockSize) * INDEX_SIZE;
+}
+
+int SparseTable::getPosInContentTable(const int id, const int index) const {
+ const int offset = id % mBlockSize;
+ return (index * mBlockSize + offset) * mDataSize;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
new file mode 100644
index 000000000..fca8120f1
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_H
+#define LATINIME_SPARSE_TABLE_H
+
+#include <cstdint>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+// Note that there is a corresponding implementation in SparseTable.java.
+// TODO: Support multiple content buffers.
+class SparseTable {
+ public:
+ SparseTable(BufferWithExtendableBuffer *const indexTableBuffer,
+ BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize,
+ const int dataSize)
+ : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer),
+ mBlockSize(blockSize), mDataSize(dataSize) {}
+
+ bool contains(const int id) const;
+
+ uint32_t get(const int id) const;
+
+ bool set(const int id, const uint32_t value);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
+
+ int getIndexFromContentTablePos(const int contentTablePos) const;
+
+ int getPosInIndexTable(const int id) const;
+
+ int getPosInContentTable(const int id, const int index) const;
+
+ static const int NOT_EXIST;
+ static const int INDEX_SIZE;
+
+ BufferWithExtendableBuffer *const mIndexTableBuffer;
+ BufferWithExtendableBuffer *const mContentTableBuffer;
+ const int mBlockSize;
+ const int mDataSize;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index 104eb2a7a..3fc566e7a 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -22,6 +22,12 @@ const float ScoringParams::MAX_SPATIAL_DISTANCE = 1.0f;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY = 40;
const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120;
const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
+
+const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f;
+const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f;
+const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f;
+const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
+
// TODO: Unlimit max cache dic node size
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
@@ -31,8 +37,9 @@ const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
const float ScoringParams::PROXIMITY_COST = 0.0694f;
const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f;
const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f;
-const float ScoringParams::OMISSION_COST = 0.4676f;
-const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.399f;
+const float ScoringParams::INTENTIONAL_OMISSION_COST = 0.1f;
+const float ScoringParams::OMISSION_COST = 0.467f;
+const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.345f;
const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f;
const float ScoringParams::INSERTION_COST = 0.7248f;
const float ScoringParams::TERMINAL_INSERTION_COST = 0.8128f;
@@ -40,18 +47,18 @@ const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
-const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.339f;
+const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.334f;
const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.4576f;
const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
-const float ScoringParams::COST_NEW_WORD = 0.0312f;
+const float ScoringParams::COST_NEW_WORD = 0.0314f;
const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
-const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.4836f;
-const float ScoringParams::COST_LOOKAHEAD = 0.00624f;
-const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.06836f;
+const float ScoringParams::COST_FIRST_COMPLETION = 0.4836f;
+const float ScoringParams::COST_COMPLETION = 0.00624f;
+const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.0683f;
const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
-const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f;
+const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index 7d4b5c3c7..b12de6d87 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -32,6 +32,11 @@ class ScoringParams {
static const int MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
static const int THRESHOLD_SHORT_WORD_LENGTH;
+ static const float EXACT_MATCH_PROMOTION;
+ static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+
// Numerically optimized parameters (currently for tap typing only).
// TODO: add ability to modify these constants programmatically.
// TODO: explore optimization of gesture parameters.
@@ -39,6 +44,7 @@ class ScoringParams {
static const float PROXIMITY_COST;
static const float FIRST_CHAR_PROXIMITY_COST;
static const float FIRST_PROXIMITY_COST;
+ static const float INTENTIONAL_OMISSION_COST;
static const float OMISSION_COST;
static const float OMISSION_COST_SAME_CHAR;
static const float OMISSION_COST_FIRST_CHAR;
@@ -54,8 +60,8 @@ class ScoringParams {
static const float COST_NEW_WORD;
static const float COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE;
static const float DISTANCE_WEIGHT_LANGUAGE;
- static const float COST_FIRST_LOOKAHEAD;
- static const float COST_LOOKAHEAD;
+ static const float COST_FIRST_COMPLETION;
+ static const float COST_COMPLETION;
static const float HAS_PROXIMITY_TERMINAL_COST;
static const float HAS_EDIT_CORRECTION_TERMINAL_COST;
static const float HAS_MULTI_WORD_TERMINAL_COST;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 56ffcc93e..66ea62406 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -18,7 +18,9 @@
#define LATINIME_TYPING_SCORING_H
#include "defines.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/policy/scoring.h"
+#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/policyimpl/typing/scoring_params.h"
namespace latinime {
@@ -30,40 +32,40 @@ class TypingScoring : public Scoring {
public:
static const TypingScoring *getInstance() { return &sInstance; }
- AK_FORCE_INLINE bool getMostProbableString(
- const DicTraverseSession *const traverseSession, const int terminalSize,
- const float languageWeight, int *const outputCodePoints, int *const type,
- int *const freq) const {
- return false;
- }
-
- AK_FORCE_INLINE void safetyNetForMostProbableString(const int terminalSize,
- const int maxScore, int *const outputCodePoints, int *const frequencies) const {
- }
-
- AK_FORCE_INLINE void searchWordWithDoubleLetter(DicNode *terminals,
- const int terminalSize, int *doubleLetterTerminalIndex,
- DoubleLetterLevel *doubleLetterLevel) const {
- }
+ AK_FORCE_INLINE void getMostProbableString(const DicTraverseSession *const traverseSession,
+ const float languageWeight, SuggestionResults *const outSuggestionResults) const {}
AK_FORCE_INLINE float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession,
- DicNode *const terminals, const int size) const {
+ DicNode *const terminals, const int size) const {
return 1.0f;
}
- AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance,
- const int inputSize, const bool forceCommit) const {
+ AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize,
+ const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
+ const bool boostExactMatches) const {
const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
+ static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
- const float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE
- - compoundDistance / maxDistance
- + (forceCommit ? ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD : 0.0f);
+ float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance;
+ if (forceCommit) {
+ score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD;
+ }
+ if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
+ score += ScoringParams::EXACT_MATCH_PROMOTION;
+ if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) {
+ score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) {
+ score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
+ score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+ }
+ }
return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);
}
- AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(const int terminalIndex,
- const int doubleLetterTerminalIndex,
- const DoubleLetterLevel doubleLetterLevel) const {
+ AK_FORCE_INLINE float getDoubleLetterDemotionDistanceCost(
+ const DicNode *const terminalDicNode) const {
return 0.0f;
}
@@ -71,6 +73,16 @@ class TypingScoring : public Scoring {
return false;
}
+ AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
+ return true;
+ }
+
+ AK_FORCE_INLINE bool sameAsTyped(const DicTraverseSession *const traverseSession,
+ const DicNode *const dicNode) const {
+ return traverseSession->getProximityInfoState(0)->sameAsTyped(
+ dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(TypingScoring);
static const TypingScoring sInstance;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 007c19e0a..cb3dfac70 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -17,7 +17,7 @@
#ifndef LATINIME_TYPING_TRAVERSAL_H
#define LATINIME_TYPING_TRAVERSAL_H
-#include <stdint.h>
+#include <cstdint>
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
@@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int point0Index = dicNode->getInputIndex(0);
- return dicNode->isTerminalWordNode()
+ return dicNode->isTerminalDicNode()
&& traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index);
}
@@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) {
return false;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return false;
}
const int16_t pointIndex = dicNode->getInputIndex(0);
@@ -137,25 +137,19 @@ class TypingTraversal : public Traversal {
return ScoringParams::MAX_SPATIAL_DISTANCE;
}
- AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
- return true;
- }
-
AK_FORCE_INLINE int getDefaultExpandDicNodeSize() const {
return DicNodeVector::DEFAULT_NODES_SIZE_FOR_OPTIMIZATION;
}
- AK_FORCE_INLINE bool sameAsTyped(
- const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
- return traverseSession->getProximityInfoState(0)->sameAsTyped(
- dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
- }
-
AK_FORCE_INLINE int getMaxCacheSize(const int inputSize) const {
return (inputSize <= 1) ? ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT
: ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
}
+ AK_FORCE_INLINE int getTerminalCacheSize() const {
+ return MAX_RESULTS;
+ }
+
AK_FORCE_INLINE bool isPossibleOmissionChildNode(
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
@@ -172,9 +166,8 @@ class TypingTraversal : public Traversal {
if (probability < ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY) {
return false;
}
- const int c = dicNode->getOutputWordBuf()[0];
const bool shortCappedWord = dicNode->getNodeCodePointCount()
- < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && CharUtils::isAsciiUpper(c);
+ < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && dicNode->isFirstCharUppercase();
return !shortCappedWord
|| probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
}
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 5b6b5e874..54f65c786 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -23,39 +23,64 @@ namespace latinime {
const TypingWeighting TypingWeighting::sInstance;
-ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
+ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
switch (correctionType) {
case CT_MATCH:
if (isProximityDicNode(traverseSession, dicNode)) {
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ } else if (dicNode->isInDigraph()) {
+ return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
} else {
- return ET_NOT_AN_ERROR;
+ // Compare the node code point with original primary code point on the keyboard.
+ const ProximityInfoState *const pInfoState =
+ traverseSession->getProximityInfoState(0);
+ const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ dicNode->getInputIndex(0));
+ const int nodeCodePoint = dicNode->getNodeCodePoint();
+ if (primaryOriginalCodePoint == nodeCodePoint) {
+ // Node code point is same as original code point on the keyboard.
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ CharUtils::toLowerCase(nodeCodePoint)) {
+ // Only cases of the code points are different.
+ return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
+ CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Node code point is a variant of original code point.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
+ } else {
+ // Node code point is a variant of original code point and the cases are also
+ // different.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
+ | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ }
}
+ break;
case CT_ADDITIONAL_PROXIMITY:
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
case CT_OMISSION:
if (parentDicNode->canBeIntentionalOmission()) {
- return ET_INTENTIONAL_OMISSION;
+ return ErrorTypeUtils::INTENTIONAL_OMISSION;
} else {
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
}
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
- return ET_NEW_WORD;
+ return ErrorTypeUtils::NEW_WORD;
case CT_TERMINAL:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
case CT_COMPLETION:
- return ET_COMPLETION;
+ return ErrorTypeUtils::COMPLETION;
default:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
}
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 9f0a331e3..84077174d 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -53,12 +54,15 @@ class TypingWeighting : public Weighting {
float getOmissionCost(const DicNode *const parentDicNode, const DicNode *const dicNode) const {
const bool isZeroCostOmission = parentDicNode->isZeroCostOmission();
+ const bool isIntentionalOmission = parentDicNode->canBeIntentionalOmission();
const bool sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode);
// If the traversal omitted the first letter then the dicNode should now be on the second.
const bool isFirstLetterOmission = dicNode->getNodeCodePointCount() == 2;
float cost = 0.0f;
if (isZeroCostOmission) {
cost = 0.0f;
+ } else if (isIntentionalOmission) {
+ cost = ScoringParams::INTENTIONAL_OMISSION_COST;
} else if (isFirstLetterOmission) {
cost = ScoringParams::OMISSION_COST_FIRST_CHAR;
} else {
@@ -71,8 +75,6 @@ class TypingWeighting : public Weighting {
float getMatchedCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, DicNode_InputStateG *inputStateG) const {
const int pointIndex = dicNode->getInputIndex(0);
- // Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on
- // the keyboard (like accented letters)
const float normalizedSquaredLength = traverseSession->getProximityInfoState(0)
->getPointToKeyLength(pointIndex,
CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()));
@@ -167,8 +169,8 @@ class TypingWeighting : public Weighting {
const bool firstCompletion = dicNode->getInputIndex(0)
== traverseSession->getInputSize();
// TODO: Change the cost for the first completion for the gesture?
- const float cost = firstCompletion ? ScoringParams::COST_FIRST_LOOKAHEAD
- : ScoringParams::COST_LOOKAHEAD;
+ const float cost = firstCompletion ? ScoringParams::COST_FIRST_COMPLETION
+ : ScoringParams::COST_COMPLETION;
return cost;
}
@@ -204,7 +206,7 @@ class TypingWeighting : public Weighting {
return cost * traverseSession->getMultiWordCostMultiplier();
}
- ErrorType getErrorType(const CorrectionType correctionType,
+ ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const;
diff --git a/native/jni/src/suggest/policyimpl/utils/edit_distance.h b/native/jni/src/suggest/policyimpl/utils/edit_distance.h
index 0871c37ce..4cfd0b3f3 100644
--- a/native/jni/src/suggest/policyimpl/utils/edit_distance.h
+++ b/native/jni/src/suggest/policyimpl/utils/edit_distance.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_EDIT_DISTANCE_H
#define LATINIME_EDIT_DISTANCE_H
+#include <algorithm>
+
#include "defines.h"
#include "suggest/policyimpl/utils/edit_distance_policy.h"
@@ -38,13 +40,13 @@ class EditDistance {
for (int i = 0; i < beforeLength; ++i) {
for (int j = 0; j < afterLength; ++j) {
- dp[(afterLength + 1) * (i + 1) + (j + 1)] = min(
+ dp[(afterLength + 1) * (i + 1) + (j + 1)] = std::min(
dp[(afterLength + 1) * i + (j + 1)] + policy->getInsertionCost(i, j),
- min(dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j),
- dp[(afterLength + 1) * i + j]
- + policy->getSubstitutionCost(i, j)));
+ std::min(
+ dp[(afterLength + 1) * (i + 1) + j] + policy->getDeletionCost(i, j),
+ dp[(afterLength + 1) * i + j] + policy->getSubstitutionCost(i, j)));
if (policy->allowTransposition(i, j)) {
- dp[(afterLength + 1) * (i + 1) + (j + 1)] = min(
+ dp[(afterLength + 1) * (i + 1) + (j + 1)] = std::min(
dp[(afterLength + 1) * (i + 1) + (j + 1)],
dp[(afterLength + 1) * (i - 1) + (j - 1)]
+ policy->getTranspositionCost(i, j));
diff --git a/native/jni/src/utils/autocorrection_threshold_utils.cpp b/native/jni/src/utils/autocorrection_threshold_utils.cpp
index 1f8ee0814..349786a27 100644
--- a/native/jni/src/utils/autocorrection_threshold_utils.cpp
+++ b/native/jni/src/utils/autocorrection_threshold_utils.cpp
@@ -16,6 +16,7 @@
#include "utils/autocorrection_threshold_utils.h"
+#include <algorithm>
#include <cmath>
#include "defines.h"
@@ -99,7 +100,7 @@ const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2;
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
: static_cast<float>(MAX_INITIAL_SCORE)
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
- static_cast<float>(min(beforeLength, afterLength - spaceCount)))
+ static_cast<float>(std::min(beforeLength, afterLength - spaceCount)))
* static_cast<float>(FULL_WORD_MULTIPLIER);
return (static_cast<float>(score) / maxScore) * weight;
diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp
index 0e7039610..b17e0847d 100644
--- a/native/jni/src/utils/char_utils.cpp
+++ b/native/jni/src/utils/char_utils.cpp
@@ -22,6 +22,9 @@
namespace latinime {
+const int CharUtils::MIN_UNICODE_CODE_POINT = 0;
+const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF;
+
struct LatinCapitalSmallPair {
unsigned short capital;
unsigned short small;
@@ -1118,7 +1121,8 @@ static int compare_pair_capital(const void *a, const void *b) {
/* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
/* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
/* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
- /* U+0130 */ 0x0049, 0x0131, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B,
+ // U+0131: Manually changed from 0131 to 0049
+ /* U+0130 */ 0x0049, 0x0049, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B,
/* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C,
/* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E,
// U+0141: Manually changed from 0141 to 004C
@@ -1273,4 +1277,6 @@ static int compare_pair_capital(const void *a, const void *b) {
/* U+04F0 */ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04F6, 0x04F7,
/* U+04F8 */ 0x042B, 0x044B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF,
};
+
+/* static */ const std::vector<int> CharUtils::EMPTY_STRING(1 /* size */, '\0' /* value */);
} // namespace latinime
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 41663c81a..63786502b 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -18,6 +18,8 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
+#include <cstring>
+#include <vector>
#include "defines.h"
@@ -85,11 +87,36 @@ class CharUtils {
return spaceCount;
}
+ static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) {
+ return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT;
+ }
+
static unsigned short latin_tolower(const unsigned short c);
+ static const std::vector<int> EMPTY_STRING;
+
+ // Returns updated code point count. Returns 0 when the code points cannot be marked as a
+ // Beginning-of-Sentence.
+ static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
+ const int codePointCount, const int maxCodePoint) {
+ if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Marker has already been attached.
+ return codePointCount;
+ }
+ if (codePointCount >= maxCodePoint) {
+ // the code points cannot be marked as a Beginning-of-Sentence.
+ return 0;
+ }
+ memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount);
+ codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE;
+ return codePointCount + 1;
+ }
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
+ static const int MIN_UNICODE_CODE_POINT;
+ static const int MAX_UNICODE_CODE_POINT;
+
/**
* Table mapping most combined Latin, Greek, and Cyrillic characters
* to their base characters. If c is in range, BASE_CHARS[c] == c
diff --git a/native/jni/src/utils/hash_map_compat.h b/native/jni/src/utils/hash_map_compat.h
deleted file mode 100644
index a1e982bc4..000000000
--- a/native/jni/src/utils/hash_map_compat.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_HASH_MAP_COMPAT_H
-#define LATINIME_HASH_MAP_COMPAT_H
-
-// TODO: Use std::unordered_map that has been standardized in C++11
-
-#ifdef __APPLE__
-#include <ext/hash_map>
-#else // __APPLE__
-#include <hash_map>
-#endif // __APPLE__
-
-#ifdef __SGI_STL_PORT
-#define hash_map_compat stlport::hash_map
-#else // __SGI_STL_PORT
-#define hash_map_compat __gnu_cxx::hash_map
-#endif // __SGI_STL_PORT
-
-#endif // LATINIME_HASH_MAP_COMPAT_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_release_listener.h b/native/jni/src/utils/jni_data_utils.cpp
index 2ca4f21bd..5555293d5 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_release_listener.h
+++ b/native/jni/src/utils/jni_data_utils.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012 The Android Open Source Project
+ * Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,22 +14,11 @@
* limitations under the License.
*/
-#ifndef LATINIME_DIC_NODE_RELEASE_LISTENER_H
-#define LATINIME_DIC_NODE_RELEASE_LISTENER_H
-
-#include "defines.h"
+#include "utils/jni_data_utils.h"
namespace latinime {
-class DicNode;
+const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD;
+const int JniDataUtils::CODE_POINT_NULL = 0;
-class DicNodeReleaseListener {
- public:
- DicNodeReleaseListener() {}
- virtual ~DicNodeReleaseListener() {}
- virtual void onReleased(DicNode *dicNode) = 0;
- private:
- DISALLOW_COPY_AND_ASSIGN(DicNodeReleaseListener);
-};
} // namespace latinime
-#endif // LATINIME_DIC_NODE_RELEASE_LISTENER_H
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
new file mode 100644
index 000000000..cb82d3c3b
--- /dev/null
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_JNI_DATA_UTILS_H
+#define LATINIME_JNI_DATA_UTILS_H
+
+#include <vector>
+
+#include "defines.h"
+#include "jni.h"
+#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "utils/char_utils.h"
+
+namespace latinime {
+
+class JniDataUtils {
+ public:
+ static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
+ if (!array) {
+ outVector->clear();
+ return;
+ }
+ const jsize arrayLength = env->GetArrayLength(array);
+ outVector->resize(arrayLength);
+ env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
+ }
+
+ static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
+ jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
+ DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
+ const int keyCount = env->GetArrayLength(attributeKeyStringArray);
+ for (int i = 0; i < keyCount; i++) {
+ jstring keyString = static_cast<jstring>(
+ env->GetObjectArrayElement(attributeKeyStringArray, i));
+ const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
+ char keyChars[keyUtf8Length + 1];
+ env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
+ keyChars[keyUtf8Length] = '\0';
+ DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
+ HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
+
+ jstring valueString = static_cast<jstring>(
+ env->GetObjectArrayElement(attributeValueStringArray, i));
+ const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
+ char valueChars[valueUtf8Length + 1];
+ env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
+ valueChars[valueUtf8Length] = '\0';
+ DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
+ HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
+ attributeMap[key] = value;
+ }
+ return attributeMap;
+ }
+
+ static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
+ const int maxLength, const int *const codePoints, const int codePointCount,
+ const bool needsNullTermination) {
+ const int codePointBufSize = std::min(maxLength, codePointCount);
+ int outputCodePonts[codePointBufSize];
+ int outputCodePointCount = 0;
+ for (int i = 0; i < codePointBufSize; ++i) {
+ const int codePoint = codePoints[i];
+ int codePointToOutput = codePoint;
+ if (!CharUtils::isInUnicodeSpace(codePoint)) {
+ if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Just skip Beginning-of-Sentence marker.
+ continue;
+ }
+ codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
+ } else if (codePoint >= 0x01 && codePoint <= 0x1F) {
+ // Control code.
+ codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
+ }
+ outputCodePonts[outputCodePointCount++] = codePointToOutput;
+ }
+ env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
+ outputCodePonts);
+ if (needsNullTermination && outputCodePointCount < maxLength) {
+ env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
+ 1 /* len */, &CODE_POINT_NULL);
+ }
+ }
+
+ static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+ jbooleanArray isBeginningOfSentenceArray) {
+ int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+ int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays);
+ for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) {
+ prevWordCodePointCount[i] = 0;
+ isBeginningOfSentence[i] = false;
+ if (prevWordsCount <= static_cast<int>(i)) {
+ continue;
+ }
+ jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
+ if (!prevWord) {
+ continue;
+ }
+ jsize prevWordLength = env->GetArrayLength(prevWord);
+ if (prevWordLength > MAX_WORD_LENGTH) {
+ continue;
+ }
+ env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
+ prevWordCodePointCount[i] = prevWordLength;
+ jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
+ env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
+ &isBeginningOfSentenceBoolean);
+ isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
+ }
+ return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+ MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ }
+
+ static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
+ const jboolean value) {
+ env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
+ }
+
+ static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
+ env->SetIntArrayRegion(array, index, 1 /* len */, &value);
+ }
+
+ static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
+ const float value) {
+ env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
+
+ static const int CODE_POINT_REPLACEMENT_CHARACTER;
+ static const int CODE_POINT_NULL;
+};
+} // namespace latinime
+#endif // LATINIME_JNI_DATA_UTILS_H
diff --git a/native/jni/src/utils/time_keeper.cpp b/native/jni/src/utils/time_keeper.cpp
new file mode 100644
index 000000000..026284060
--- /dev/null
+++ b/native/jni/src/utils/time_keeper.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/time_keeper.h"
+
+#include <ctime>
+
+namespace latinime {
+
+int TimeKeeper::sCurrentTime;
+bool TimeKeeper::sSetForTesting;
+
+/* static */ void TimeKeeper::setCurrentTime() {
+ if (!sSetForTesting) {
+ sCurrentTime = time(0);
+ }
+}
+
+/* static */ void TimeKeeper::startTestModeWithForceCurrentTime(const int currentTime) {
+ sCurrentTime = currentTime;
+ sSetForTesting = true;
+}
+
+/* static */ void TimeKeeper::stopTestMode() {
+ sSetForTesting = false;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/utils/time_keeper.h b/native/jni/src/utils/time_keeper.h
new file mode 100644
index 000000000..d066757e4
--- /dev/null
+++ b/native/jni/src/utils/time_keeper.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TIME_KEEPER_H
+#define LATINIME_TIME_KEEPER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class TimeKeeper {
+ public:
+ static void setCurrentTime();
+
+ static void startTestModeWithForceCurrentTime(const int currentTime);
+
+ static void stopTestMode();
+
+ static int peekCurrentTime() { return sCurrentTime; };
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TimeKeeper);
+
+ static int sCurrentTime;
+ static bool sSetForTesting;
+};
+} // namespace latinime
+#endif /* LATINIME_TIME_KEEPER_H */