From a424ff06ec367d1be4cc05a72b7384d9f9834787 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Wed, 31 Oct 2012 17:34:47 +0900 Subject: Switch the AOSP word lists to the combined format. This will help with managing the word lists. Bug: 7388859 Change-Id: I89f049569b177d3027fe56d6c67eaca27d44dc7d --- dictionaries/cs_wordlist.combined.gz | Bin 0 -> 945721 bytes dictionaries/cs_wordlist.xml.gz | Bin 934317 -> 0 bytes dictionaries/da_wordlist.combined.gz | Bin 0 -> 1016252 bytes dictionaries/da_wordlist.xml.gz | Bin 1028556 -> 0 bytes dictionaries/de_wordlist.combined.gz | Bin 0 -> 1594831 bytes dictionaries/de_wordlist.xml.gz | Bin 1264849 -> 0 bytes dictionaries/el_wordlist.combined.gz | Bin 0 -> 1132398 bytes dictionaries/el_wordlist.xml.gz | Bin 1111767 -> 0 bytes dictionaries/en_GB_wordlist.combined.gz | Bin 0 -> 859526 bytes dictionaries/en_US_wordlist.combined.gz | Bin 0 -> 876850 bytes dictionaries/en_gb_wordlist.xml.gz | Bin 873666 -> 0 bytes dictionaries/en_us_wordlist.xml.gz | Bin 891237 -> 0 bytes dictionaries/en_whitelist.xml.gz | Bin 816 -> 0 bytes dictionaries/en_wordlist.combined.gz | Bin 0 -> 901400 bytes dictionaries/en_wordlist.xml.gz | Bin 916081 -> 0 bytes dictionaries/es_wordlist.combined.gz | Bin 0 -> 948224 bytes dictionaries/es_wordlist.xml.gz | Bin 938245 -> 0 bytes dictionaries/fi_wordlist.combined.gz | Bin 0 -> 1267592 bytes dictionaries/fi_wordlist.xml.gz | Bin 1247226 -> 0 bytes dictionaries/fr_wordlist.combined.gz | Bin 0 -> 1106063 bytes dictionaries/fr_wordlist.xml.gz | Bin 1118441 -> 0 bytes dictionaries/hr_wordlist.combined.gz | Bin 0 -> 1010674 bytes dictionaries/hr_wordlist.xml.gz | Bin 996565 -> 0 bytes dictionaries/it_wordlist.combined.gz | Bin 0 -> 931870 bytes dictionaries/it_wordlist.xml.gz | Bin 922548 -> 0 bytes dictionaries/lt_wordlist.combined.gz | Bin 0 -> 977866 bytes dictionaries/lt_wordlist.xml.gz | Bin 963678 -> 0 bytes dictionaries/lv_wordlist.combined.gz | Bin 0 -> 963904 bytes dictionaries/lv_wordlist.xml.gz | Bin 947300 -> 0 bytes dictionaries/nb_wordlist.combined.gz | Bin 0 -> 964442 bytes dictionaries/nb_wordlist.xml.gz | Bin 944838 -> 0 bytes dictionaries/nl_wordlist.combined.gz | Bin 0 -> 1050110 bytes dictionaries/nl_wordlist.xml.gz | Bin 1031994 -> 0 bytes dictionaries/pl_wordlist.combined.gz | Bin 0 -> 1086804 bytes dictionaries/pl_wordlist.xml.gz | Bin 1073754 -> 0 bytes dictionaries/pt_BR_wordlist.combined.gz | Bin 0 -> 876891 bytes dictionaries/pt_PT_wordlist.combined.gz | Bin 0 -> 1102007 bytes dictionaries/pt_br_wordlist.xml.gz | Bin 868558 -> 0 bytes dictionaries/pt_pt_wordlist.xml.gz | Bin 1092967 -> 0 bytes dictionaries/ru_wordlist.combined.gz | Bin 0 -> 1394258 bytes dictionaries/ru_wordlist.xml.gz | Bin 1367381 -> 0 bytes dictionaries/sample.combined | 38 ++++++++++++++++++++++++++++++++ dictionaries/sample.xml | 17 -------------- dictionaries/sl_wordlist.combined.gz | Bin 0 -> 313077 bytes dictionaries/sl_wordlist.xml.gz | Bin 310873 -> 0 bytes dictionaries/sr_wordlist.combined.gz | Bin 0 -> 1049710 bytes dictionaries/sr_wordlist.xml.gz | Bin 1027560 -> 0 bytes dictionaries/sv_wordlist.combined.gz | Bin 0 -> 1137493 bytes dictionaries/sv_wordlist.xml.gz | Bin 1115582 -> 0 bytes dictionaries/tr_wordlist.combined.gz | Bin 0 -> 924020 bytes dictionaries/tr_wordlist.xml.gz | Bin 909030 -> 0 bytes 51 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 dictionaries/cs_wordlist.combined.gz delete mode 100644 dictionaries/cs_wordlist.xml.gz create mode 100644 dictionaries/da_wordlist.combined.gz delete mode 100644 dictionaries/da_wordlist.xml.gz create mode 100644 dictionaries/de_wordlist.combined.gz delete mode 100644 dictionaries/de_wordlist.xml.gz create mode 100644 dictionaries/el_wordlist.combined.gz delete mode 100644 dictionaries/el_wordlist.xml.gz create mode 100644 dictionaries/en_GB_wordlist.combined.gz create mode 100644 dictionaries/en_US_wordlist.combined.gz delete mode 100644 dictionaries/en_gb_wordlist.xml.gz delete mode 100644 dictionaries/en_us_wordlist.xml.gz delete mode 100644 dictionaries/en_whitelist.xml.gz create mode 100644 dictionaries/en_wordlist.combined.gz delete mode 100644 dictionaries/en_wordlist.xml.gz create mode 100644 dictionaries/es_wordlist.combined.gz delete mode 100644 dictionaries/es_wordlist.xml.gz create mode 100644 dictionaries/fi_wordlist.combined.gz delete mode 100644 dictionaries/fi_wordlist.xml.gz create mode 100644 dictionaries/fr_wordlist.combined.gz delete mode 100644 dictionaries/fr_wordlist.xml.gz create mode 100644 dictionaries/hr_wordlist.combined.gz delete mode 100644 dictionaries/hr_wordlist.xml.gz create mode 100644 dictionaries/it_wordlist.combined.gz delete mode 100644 dictionaries/it_wordlist.xml.gz create mode 100644 dictionaries/lt_wordlist.combined.gz delete mode 100644 dictionaries/lt_wordlist.xml.gz create mode 100644 dictionaries/lv_wordlist.combined.gz delete mode 100644 dictionaries/lv_wordlist.xml.gz create mode 100644 dictionaries/nb_wordlist.combined.gz delete mode 100644 dictionaries/nb_wordlist.xml.gz create mode 100644 dictionaries/nl_wordlist.combined.gz delete mode 100644 dictionaries/nl_wordlist.xml.gz create mode 100644 dictionaries/pl_wordlist.combined.gz delete mode 100644 dictionaries/pl_wordlist.xml.gz create mode 100644 dictionaries/pt_BR_wordlist.combined.gz create mode 100644 dictionaries/pt_PT_wordlist.combined.gz delete mode 100644 dictionaries/pt_br_wordlist.xml.gz delete mode 100644 dictionaries/pt_pt_wordlist.xml.gz create mode 100644 dictionaries/ru_wordlist.combined.gz delete mode 100644 dictionaries/ru_wordlist.xml.gz create mode 100644 dictionaries/sample.combined delete mode 100644 dictionaries/sample.xml create mode 100644 dictionaries/sl_wordlist.combined.gz delete mode 100644 dictionaries/sl_wordlist.xml.gz create mode 100644 dictionaries/sr_wordlist.combined.gz delete mode 100644 dictionaries/sr_wordlist.xml.gz create mode 100644 dictionaries/sv_wordlist.combined.gz delete mode 100644 dictionaries/sv_wordlist.xml.gz create mode 100644 dictionaries/tr_wordlist.combined.gz delete mode 100644 dictionaries/tr_wordlist.xml.gz diff --git a/dictionaries/cs_wordlist.combined.gz b/dictionaries/cs_wordlist.combined.gz new file mode 100644 index 000000000..8cbf2e961 Binary files /dev/null and b/dictionaries/cs_wordlist.combined.gz differ diff --git a/dictionaries/cs_wordlist.xml.gz b/dictionaries/cs_wordlist.xml.gz deleted file mode 100644 index f99148b07..000000000 Binary files a/dictionaries/cs_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/da_wordlist.combined.gz b/dictionaries/da_wordlist.combined.gz new file mode 100644 index 000000000..1cccb8632 Binary files /dev/null and b/dictionaries/da_wordlist.combined.gz differ diff --git a/dictionaries/da_wordlist.xml.gz b/dictionaries/da_wordlist.xml.gz deleted file mode 100644 index a3d4318e2..000000000 Binary files a/dictionaries/da_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/de_wordlist.combined.gz b/dictionaries/de_wordlist.combined.gz new file mode 100644 index 000000000..5db1aa4f3 Binary files /dev/null and b/dictionaries/de_wordlist.combined.gz differ diff --git a/dictionaries/de_wordlist.xml.gz b/dictionaries/de_wordlist.xml.gz deleted file mode 100644 index a4267b35a..000000000 Binary files a/dictionaries/de_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/el_wordlist.combined.gz b/dictionaries/el_wordlist.combined.gz new file mode 100644 index 000000000..b61da8918 Binary files /dev/null and b/dictionaries/el_wordlist.combined.gz differ diff --git a/dictionaries/el_wordlist.xml.gz b/dictionaries/el_wordlist.xml.gz deleted file mode 100644 index af1d71d47..000000000 Binary files a/dictionaries/el_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz new file mode 100644 index 000000000..b5909c2da Binary files /dev/null and b/dictionaries/en_GB_wordlist.combined.gz differ diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz new file mode 100644 index 000000000..03ea2b787 Binary files /dev/null and b/dictionaries/en_US_wordlist.combined.gz differ diff --git a/dictionaries/en_gb_wordlist.xml.gz b/dictionaries/en_gb_wordlist.xml.gz deleted file mode 100644 index 274424cf2..000000000 Binary files a/dictionaries/en_gb_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/en_us_wordlist.xml.gz b/dictionaries/en_us_wordlist.xml.gz deleted file mode 100644 index b5054ef3c..000000000 Binary files a/dictionaries/en_us_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/en_whitelist.xml.gz b/dictionaries/en_whitelist.xml.gz deleted file mode 100644 index cf70a1a84..000000000 Binary files a/dictionaries/en_whitelist.xml.gz and /dev/null differ diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz new file mode 100644 index 000000000..62c454049 Binary files /dev/null and b/dictionaries/en_wordlist.combined.gz differ diff --git a/dictionaries/en_wordlist.xml.gz b/dictionaries/en_wordlist.xml.gz deleted file mode 100644 index 6e57f4212..000000000 Binary files a/dictionaries/en_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/es_wordlist.combined.gz b/dictionaries/es_wordlist.combined.gz new file mode 100644 index 000000000..b0a137c4a Binary files /dev/null and b/dictionaries/es_wordlist.combined.gz differ diff --git a/dictionaries/es_wordlist.xml.gz b/dictionaries/es_wordlist.xml.gz deleted file mode 100644 index aaf0aa348..000000000 Binary files a/dictionaries/es_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/fi_wordlist.combined.gz b/dictionaries/fi_wordlist.combined.gz new file mode 100644 index 000000000..32dc126c0 Binary files /dev/null and b/dictionaries/fi_wordlist.combined.gz differ diff --git a/dictionaries/fi_wordlist.xml.gz b/dictionaries/fi_wordlist.xml.gz deleted file mode 100644 index b03565c4f..000000000 Binary files a/dictionaries/fi_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz new file mode 100644 index 000000000..95a87e630 Binary files /dev/null and b/dictionaries/fr_wordlist.combined.gz differ diff --git a/dictionaries/fr_wordlist.xml.gz b/dictionaries/fr_wordlist.xml.gz deleted file mode 100644 index 3134a040a..000000000 Binary files a/dictionaries/fr_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/hr_wordlist.combined.gz b/dictionaries/hr_wordlist.combined.gz new file mode 100644 index 000000000..573c3e8ac Binary files /dev/null and b/dictionaries/hr_wordlist.combined.gz differ diff --git a/dictionaries/hr_wordlist.xml.gz b/dictionaries/hr_wordlist.xml.gz deleted file mode 100644 index 13998d9d5..000000000 Binary files a/dictionaries/hr_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/it_wordlist.combined.gz b/dictionaries/it_wordlist.combined.gz new file mode 100644 index 000000000..d143bc425 Binary files /dev/null and b/dictionaries/it_wordlist.combined.gz differ diff --git a/dictionaries/it_wordlist.xml.gz b/dictionaries/it_wordlist.xml.gz deleted file mode 100644 index a75553d45..000000000 Binary files a/dictionaries/it_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/lt_wordlist.combined.gz b/dictionaries/lt_wordlist.combined.gz new file mode 100644 index 000000000..03cfa8426 Binary files /dev/null and b/dictionaries/lt_wordlist.combined.gz differ diff --git a/dictionaries/lt_wordlist.xml.gz b/dictionaries/lt_wordlist.xml.gz deleted file mode 100644 index 8f00f6393..000000000 Binary files a/dictionaries/lt_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/lv_wordlist.combined.gz b/dictionaries/lv_wordlist.combined.gz new file mode 100644 index 000000000..6b2ee77d6 Binary files /dev/null and b/dictionaries/lv_wordlist.combined.gz differ diff --git a/dictionaries/lv_wordlist.xml.gz b/dictionaries/lv_wordlist.xml.gz deleted file mode 100644 index 453ebb518..000000000 Binary files a/dictionaries/lv_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/nb_wordlist.combined.gz b/dictionaries/nb_wordlist.combined.gz new file mode 100644 index 000000000..0644fc93a Binary files /dev/null and b/dictionaries/nb_wordlist.combined.gz differ diff --git a/dictionaries/nb_wordlist.xml.gz b/dictionaries/nb_wordlist.xml.gz deleted file mode 100644 index 91813b66c..000000000 Binary files a/dictionaries/nb_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/nl_wordlist.combined.gz b/dictionaries/nl_wordlist.combined.gz new file mode 100644 index 000000000..748c5ed77 Binary files /dev/null and b/dictionaries/nl_wordlist.combined.gz differ diff --git a/dictionaries/nl_wordlist.xml.gz b/dictionaries/nl_wordlist.xml.gz deleted file mode 100644 index 72ebc6136..000000000 Binary files a/dictionaries/nl_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/pl_wordlist.combined.gz b/dictionaries/pl_wordlist.combined.gz new file mode 100644 index 000000000..638c8eef5 Binary files /dev/null and b/dictionaries/pl_wordlist.combined.gz differ diff --git a/dictionaries/pl_wordlist.xml.gz b/dictionaries/pl_wordlist.xml.gz deleted file mode 100644 index 5909a5f7f..000000000 Binary files a/dictionaries/pl_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz new file mode 100644 index 000000000..6f7952004 Binary files /dev/null and b/dictionaries/pt_BR_wordlist.combined.gz differ diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz new file mode 100644 index 000000000..66ed025fb Binary files /dev/null and b/dictionaries/pt_PT_wordlist.combined.gz differ diff --git a/dictionaries/pt_br_wordlist.xml.gz b/dictionaries/pt_br_wordlist.xml.gz deleted file mode 100644 index 53d4e4298..000000000 Binary files a/dictionaries/pt_br_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/pt_pt_wordlist.xml.gz b/dictionaries/pt_pt_wordlist.xml.gz deleted file mode 100644 index e3e8c3ac3..000000000 Binary files a/dictionaries/pt_pt_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/ru_wordlist.combined.gz b/dictionaries/ru_wordlist.combined.gz new file mode 100644 index 000000000..62ae12e90 Binary files /dev/null and b/dictionaries/ru_wordlist.combined.gz differ diff --git a/dictionaries/ru_wordlist.xml.gz b/dictionaries/ru_wordlist.xml.gz deleted file mode 100644 index 877f0608e..000000000 Binary files a/dictionaries/ru_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/sample.combined b/dictionaries/sample.combined new file mode 100644 index 000000000..4fa595e1e --- /dev/null +++ b/dictionaries/sample.combined @@ -0,0 +1,38 @@ +# This is a sample wordlist that can be converted to a binary dictionary +# for use by the Latin IME. +# The file is essentially a CSV file, with indent level denoting nesting. +# +# The file starts with a single CSV line with the header attributes. Whatever +# the content, these are included as is in the binary file. The first attribute +# of the file should be `dictionary'. Usual fields are `locale', `description', +# `date', `version', `options'. +# +# Each word has a `word' entry and at least a `f' argument denoting its +# probability, as an integer between 0 and 255 on a logarithmic scale, with +# 255 meaning 1 and each decrement in 1 dividing probability by 1.15. +# As a special case, a weight of 0 is taken to mean profanity - words that +# should not be considered a typo, but that should never be suggested +# explicitly. An entry may be made not a word by adding a `not_a_word' +# field with a value of `true'. The main reason for putting such entries +# into the dictionary is to add shortcut targets and maybe a whitelist +# replacement. +# +# Each word may or may not have any number of shortcut target lines +# starting with a `shortcut' entry and having at least a `f' frequency +# value between 0 and 14, or the special value `whitelist' which becomes +# 15, which is then taken to be the whitelist target of this word. +# +# Each word may also have any number of bigram lines starting with a +# `bigram' entry containing the following word whose frequency should +# override the unigram frequency when following the word this bigram is +# for. +# +dictionary=main:en,locale=en,description=Sample wordlist,date=1351495318,version=1 + word=sample,f=200 + bigram=wordlist,f=243 + word=wordlist,f=180 + word=shortcut,f=176 + shortcut=target,f=10 + word=witelisted,f=10,not_a_word=true + shortcut=whitelisted,f=whitelist + word=profanity,f=0 diff --git a/dictionaries/sample.xml b/dictionaries/sample.xml deleted file mode 100644 index ad98f2b6f..000000000 --- a/dictionaries/sample.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - this - is - sample - wordlist - diff --git a/dictionaries/sl_wordlist.combined.gz b/dictionaries/sl_wordlist.combined.gz new file mode 100644 index 000000000..845b55a8b Binary files /dev/null and b/dictionaries/sl_wordlist.combined.gz differ diff --git a/dictionaries/sl_wordlist.xml.gz b/dictionaries/sl_wordlist.xml.gz deleted file mode 100644 index 3927b698e..000000000 Binary files a/dictionaries/sl_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/sr_wordlist.combined.gz b/dictionaries/sr_wordlist.combined.gz new file mode 100644 index 000000000..c15bc48fd Binary files /dev/null and b/dictionaries/sr_wordlist.combined.gz differ diff --git a/dictionaries/sr_wordlist.xml.gz b/dictionaries/sr_wordlist.xml.gz deleted file mode 100644 index c2eea681f..000000000 Binary files a/dictionaries/sr_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/sv_wordlist.combined.gz b/dictionaries/sv_wordlist.combined.gz new file mode 100644 index 000000000..ec399fbf5 Binary files /dev/null and b/dictionaries/sv_wordlist.combined.gz differ diff --git a/dictionaries/sv_wordlist.xml.gz b/dictionaries/sv_wordlist.xml.gz deleted file mode 100644 index caa63c435..000000000 Binary files a/dictionaries/sv_wordlist.xml.gz and /dev/null differ diff --git a/dictionaries/tr_wordlist.combined.gz b/dictionaries/tr_wordlist.combined.gz new file mode 100644 index 000000000..3e6ca3263 Binary files /dev/null and b/dictionaries/tr_wordlist.combined.gz differ diff --git a/dictionaries/tr_wordlist.xml.gz b/dictionaries/tr_wordlist.xml.gz deleted file mode 100644 index 35999208e..000000000 Binary files a/dictionaries/tr_wordlist.xml.gz and /dev/null differ -- cgit v1.2.3-83-g751a