aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dictionaries/de_wordlist.combined.gzbin1292666 -> 1293426 bytes
-rw-r--r--dictionaries/en_GB_wordlist.combined.gzbin860708 -> 865437 bytes
-rw-r--r--dictionaries/en_US_wordlist.combined.gzbin877710 -> 883015 bytes
-rw-r--r--dictionaries/en_wordlist.combined.gzbin909096 -> 914050 bytes
-rw-r--r--dictionaries/es_wordlist.combined.gzbin1164983 -> 1165885 bytes
-rw-r--r--dictionaries/fr_wordlist.combined.gzbin1106371 -> 1108423 bytes
-rw-r--r--dictionaries/it_wordlist.combined.gzbin933697 -> 935024 bytes
-rw-r--r--dictionaries/nl_wordlist.combined.gzbin1053242 -> 1054497 bytes
-rw-r--r--dictionaries/pl_wordlist.combined.gzbin1089296 -> 1090690 bytes
-rw-r--r--dictionaries/pt_BR_wordlist.combined.gzbin878505 -> 879877 bytes
-rw-r--r--dictionaries/pt_PT_wordlist.combined.gzbin1105959 -> 1106730 bytes
-rw-r--r--dictionaries/ru_wordlist.combined.gzbin1394944 -> 1397626 bytes
-rw-r--r--dictionaries/sv_wordlist.combined.gzbin1139281 -> 1140879 bytes
-rw-r--r--dictionaries/tr_wordlist.combined.gzbin925948 -> 926338 bytes
-rw-r--r--java/res/raw/main_de.dictbin1606096 -> 1606096 bytes
-rw-r--r--java/res/raw/main_en.dictbin1070485 -> 1070485 bytes
-rw-r--r--java/res/raw/main_es.dictbin1377071 -> 1377071 bytes
-rw-r--r--java/res/raw/main_fr.dictbin1328940 -> 1328933 bytes
-rw-r--r--java/res/raw/main_it.dictbin1143338 -> 1143338 bytes
-rw-r--r--java/res/raw/main_pt_br.dictbin1092054 -> 1092054 bytes
-rw-r--r--java/res/raw/main_ru.dictbin1292026 -> 1292026 bytes
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java7
-rw-r--r--java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h4
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java28
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java16
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java15
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java3
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java3
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java51
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java2
38 files changed, 108 insertions, 82 deletions
diff --git a/dictionaries/de_wordlist.combined.gz b/dictionaries/de_wordlist.combined.gz
index 803211c01..92c95540c 100644
--- a/dictionaries/de_wordlist.combined.gz
+++ b/dictionaries/de_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz
index 1fa9b85ea..217660fc4 100644
--- a/dictionaries/en_GB_wordlist.combined.gz
+++ b/dictionaries/en_GB_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz
index 2e039ff05..8aed9c5e0 100644
--- a/dictionaries/en_US_wordlist.combined.gz
+++ b/dictionaries/en_US_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz
index e845346d6..7fe6618cf 100644
--- a/dictionaries/en_wordlist.combined.gz
+++ b/dictionaries/en_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/es_wordlist.combined.gz b/dictionaries/es_wordlist.combined.gz
index 3391e64b4..71e7309fc 100644
--- a/dictionaries/es_wordlist.combined.gz
+++ b/dictionaries/es_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz
index 1b9fd73f9..afe44a6d9 100644
--- a/dictionaries/fr_wordlist.combined.gz
+++ b/dictionaries/fr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/it_wordlist.combined.gz b/dictionaries/it_wordlist.combined.gz
index 5a5cbdc7a..ed58a12c5 100644
--- a/dictionaries/it_wordlist.combined.gz
+++ b/dictionaries/it_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/nl_wordlist.combined.gz b/dictionaries/nl_wordlist.combined.gz
index 37ba8ab42..19c3a7ea8 100644
--- a/dictionaries/nl_wordlist.combined.gz
+++ b/dictionaries/nl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pl_wordlist.combined.gz b/dictionaries/pl_wordlist.combined.gz
index ba71a5581..2b84eecfd 100644
--- a/dictionaries/pl_wordlist.combined.gz
+++ b/dictionaries/pl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz
index 02df1c1ee..7aac61e50 100644
--- a/dictionaries/pt_BR_wordlist.combined.gz
+++ b/dictionaries/pt_BR_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz
index bcd50ab03..5bf9a60e8 100644
--- a/dictionaries/pt_PT_wordlist.combined.gz
+++ b/dictionaries/pt_PT_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/ru_wordlist.combined.gz b/dictionaries/ru_wordlist.combined.gz
index 401ad08b0..5e9266221 100644
--- a/dictionaries/ru_wordlist.combined.gz
+++ b/dictionaries/ru_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sv_wordlist.combined.gz b/dictionaries/sv_wordlist.combined.gz
index b6ebab320..db44ae4c4 100644
--- a/dictionaries/sv_wordlist.combined.gz
+++ b/dictionaries/sv_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/tr_wordlist.combined.gz b/dictionaries/tr_wordlist.combined.gz
index 306cea184..d3c8825b9 100644
--- a/dictionaries/tr_wordlist.combined.gz
+++ b/dictionaries/tr_wordlist.combined.gz
Binary files differ
diff --git a/java/res/raw/main_de.dict b/java/res/raw/main_de.dict
index 45b288375..c3c2cbe46 100644
--- a/java/res/raw/main_de.dict
+++ b/java/res/raw/main_de.dict
Binary files differ
diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict
index 5bbb85761..b9e5bc77b 100644
--- a/java/res/raw/main_en.dict
+++ b/java/res/raw/main_en.dict
Binary files differ
diff --git a/java/res/raw/main_es.dict b/java/res/raw/main_es.dict
index fae131850..076d5aa8f 100644
--- a/java/res/raw/main_es.dict
+++ b/java/res/raw/main_es.dict
Binary files differ
diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict
index 19532d9bf..0e8686092 100644
--- a/java/res/raw/main_fr.dict
+++ b/java/res/raw/main_fr.dict
Binary files differ
diff --git a/java/res/raw/main_it.dict b/java/res/raw/main_it.dict
index ff11b9798..609ef13b7 100644
--- a/java/res/raw/main_it.dict
+++ b/java/res/raw/main_it.dict
Binary files differ
diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict
index 9fa50442a..c33865187 100644
--- a/java/res/raw/main_pt_br.dict
+++ b/java/res/raw/main_pt_br.dict
Binary files differ
diff --git a/java/res/raw/main_ru.dict b/java/res/raw/main_ru.dict
index 76b5f805a..d0af70730 100644
--- a/java/res/raw/main_ru.dict
+++ b/java/res/raw/main_ru.dict
Binary files differ
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 78d79ae50..5925bdc4e 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -171,14 +171,17 @@ public final class FormatSpec {
// ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
public static final int VERSION2 = 2;
public static final int VERSION201 = 201;
+ public static final int VERSION202 = 202;
public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
// Dictionary version used for testing.
public static final int VERSION4_ONLY_FOR_TESTING = 399;
public static final int VERSION401 = 401;
public static final int VERSION4 = 402;
public static final int VERSION4_DEV = 403;
- static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
- static final int MAXIMUM_SUPPORTED_VERSION = VERSION4_DEV;
+ static final int MINIMUM_SUPPORTED_STATIC_VERSION = VERSION202;
+ static final int MAXIMUM_SUPPORTED_STATIC_VERSION = VERSION202;
+ static final int MINIMUM_SUPPORTED_DYNAMIC_VERSION = VERSION4;
+ static final int MAXIMUM_SUPPORTED_DYNAMIC_VERSION = VERSION4_DEV;
// TODO: Make this value adaptative to content data, store it in the header, and
// use it in the reading code.
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
index 4e0f5f583..8699f2ce7 100644
--- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -36,7 +36,8 @@ public class CombinedFormatUtils {
public static final String WORD_TAG = "word";
public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence";
public static final String NOT_A_WORD_TAG = "not_a_word";
- public static final String BLACKLISTED_TAG = "blacklisted";
+ public static final String POSSIBLY_OFFENSIVE_TAG = "possibly_offensive";
+ public static final String TRUE_VALUE = "true";
public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
final StringBuilder builder = new StringBuilder();
@@ -61,13 +62,13 @@ public class CombinedFormatUtils {
builder.append(",");
builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
if (wordProperty.mIsBeginningOfSentence) {
- builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true");
+ builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=" + TRUE_VALUE);
}
if (wordProperty.mIsNotAWord) {
- builder.append("," + NOT_A_WORD_TAG + "=true");
+ builder.append("," + NOT_A_WORD_TAG + "=" + TRUE_VALUE);
}
if (wordProperty.mIsPossiblyOffensive) {
- builder.append("," + BLACKLISTED_TAG + "=true");
+ builder.append("," + POSSIBLY_OFFENSIVE_TAG + "=" + TRUE_VALUE);
}
builder.append("\n");
if (wordProperty.mHasShortcuts) {
@@ -111,4 +112,8 @@ public class CombinedFormatUtils {
}
return builder.toString();
}
+
+ public static boolean isLiteralTrue(final String value) {
+ return TRUE_VALUE.equalsIgnoreCase(value);
+ }
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 44c2f443f..abc7f9906 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -134,9 +134,11 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
// same so we use them for both here.
switch (mDictFormatVersion) {
case FormatUtils::VERSION_2:
- return FormatUtils::VERSION_2;
case FormatUtils::VERSION_201:
- return FormatUtils::VERSION_201;
+ AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+ return FormatUtils::UNKNOWN_VERSION;
+ case FormatUtils::VERSION_202:
+ return FormatUtils::VERSION_202;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 41a8b13b8..d69a53fce 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -111,7 +111,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
switch (version) {
case FormatUtils::VERSION_2:
case FormatUtils::VERSION_201:
- // Version 2 or 201 dictionary writing is not supported.
+ case FormatUtils::VERSION_202:
+ // None of the static dictionaries (v2x) support writing
return false;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 08e39ce43..9455222dd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -140,7 +140,7 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const {
- return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
+ return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
ptNodeParams.getProbability() == 0);
}
@@ -164,7 +164,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI
}
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
if (prevWordIds.empty()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index 372c9e36f..a19a384f4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -115,7 +115,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
switch (formatVersion) {
case FormatUtils::VERSION_2:
case FormatUtils::VERSION_201:
- AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
+ case FormatUtils::VERSION_202:
+ AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path);
break;
case FormatUtils::VERSION_4: {
return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
@@ -177,6 +178,9 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
case FormatUtils::VERSION_2:
case FormatUtils::VERSION_201:
+ AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+ break;
+ case FormatUtils::VERSION_202:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 585e87a24..e52706e07 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -144,17 +144,6 @@ class PtNodeParams {
return PatriciaTrieReadingUtils::isTerminal(mFlags);
}
- AK_FORCE_INLINE bool isBlacklisted() const {
- // Note: this method will be removed in the next change.
- // It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
- // * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
- // when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
- // * getWordAttributes will be changed to always return blacklisted=false and
- // isPossiblyOffensive according to the function below (instead of the current
- // behaviour of checking if the probability is zero)
- return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
- }
-
AK_FORCE_INLINE bool isPossiblyOffensive() const {
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 66fd18a52..59873612a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -14,7 +14,6 @@
* limitations under the License.
*/
-
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "defines.h"
@@ -317,8 +316,8 @@ const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(
const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const {
- return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
- ptNodeParams.getProbability() == 0);
+ return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
+ ptNodeParams.isPossiblyOffensive());
}
int PatriciaTriePolicy::getProbability(const int unigramProbability,
@@ -345,10 +344,9 @@ int PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams =
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
- if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
- // If this is not a word, or if it's a blacklisted entry, it should behave as
- // having no probability outside of the suggestion process (where it should be used
- // for shortcuts).
+ if (ptNodeParams.isNotAWord()) {
+ // If this is not a word, it should behave as having no probability outside of the
+ // suggestion process (where it should be used for shortcuts).
return NOT_A_PROBABILITY;
}
if (!prevWordIds.empty()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 0cffe569d..8b47147e1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -28,9 +28,11 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
switch (formatVersion) {
case VERSION_2:
- return VERSION_2;
case VERSION_201:
- return VERSION_201;
+ AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+ return UNKNOWN_VERSION;
+ case VERSION_202:
+ return VERSION_202;
case VERSION_4_ONLY_FOR_TESTING:
return VERSION_4_ONLY_FOR_TESTING;
case VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 96310086b..05bd7eb8a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,8 +31,12 @@ class FormatUtils {
public:
enum FORMAT_VERSION {
// These MUST have the same values as the relevant constants in FormatSpec.java.
+ // TODO: Remove VERSION_2 and VERSION_201 when we:
+ // * Confirm that old versions of LatinIME download old-format dictionaries
+ // * We no longer need the corresponding constants on the Java side for dicttool
VERSION_2 = 2,
VERSION_201 = 201,
+ VERSION_202 = 202,
VERSION_4_ONLY_FOR_TESTING = 399,
VERSION_4 = 402,
VERSION_4_DEV = 403,
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index a35fa13ce..d239f8dac 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -314,14 +314,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final String dictVersion = Long.toString(System.currentTimeMillis());
final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY;
final File file = BinaryDictUtils.getDictFile(dictName, dictVersion,
- BinaryDictUtils.VERSION201_OPTIONS, getContext().getCacheDir());
+ BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir());
// Write a test dictionary
final DictEncoder dictEncoder = new Ver2DictEncoder(file,
Ver2DictEncoder.CODE_POINT_TABLE_ON);
final FormatSpec.FormatOptions formatOptions =
new FormatSpec.FormatOptions(
- FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE);
+ FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION);
final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(),
BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
addUnigrams(words.size(), sourcedict, words, null /* shortcutMap */);
@@ -359,11 +359,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final List<String> results = new ArrayList<>();
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
- BinaryDictUtils.VERSION2_OPTIONS);
+ BinaryDictUtils.STATIC_OPTIONS);
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
- BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
+ BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP);
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
- BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
+ BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP);
for (final String result : results) {
Log.d(TAG, result);
}
@@ -373,11 +373,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final List<String> results = new ArrayList<>();
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
- BinaryDictUtils.VERSION2_OPTIONS);
+ BinaryDictUtils.STATIC_OPTIONS);
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
- BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
+ BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP);
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
- BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
+ BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP);
for (final String result : results) {
Log.d(TAG, result);
@@ -501,7 +501,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final ArrayList<String> results = new ArrayList<>();
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
- BinaryDictUtils.VERSION2_OPTIONS);
+ BinaryDictUtils.STATIC_OPTIONS);
for (final String result : results) {
Log.d(TAG, result);
@@ -512,7 +512,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final ArrayList<String> results = new ArrayList<>();
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
- BinaryDictUtils.VERSION2_OPTIONS);
+ BinaryDictUtils.STATIC_OPTIONS);
for (final String result : results) {
Log.d(TAG, result);
@@ -623,9 +623,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final ArrayList<String> results = new ArrayList<>();
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
- BinaryDictUtils.VERSION2_OPTIONS);
+ BinaryDictUtils.STATIC_OPTIONS);
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
- BinaryDictUtils.VERSION2_OPTIONS);
+ BinaryDictUtils.STATIC_OPTIONS);
for (final String result : results) {
Log.d(TAG, result);
@@ -633,7 +633,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
public void testVer2DictGetWordProperty() {
- final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
+ final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
final ArrayList<String> words = sWords;
final HashMap<String, List<String>> shortcuts = sShortcuts;
final String dictName = "testGetWordProperty";
@@ -669,7 +669,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
public void testVer2DictIteration() {
- final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
+ final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
final ArrayList<String> words = sWords;
final HashMap<String, List<String>> shortcuts = sShortcuts;
final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index 60e38250f..ce905c499 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -819,12 +819,18 @@ public class BinaryDictEncoderUtils {
final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray)
throws IOException, UnsupportedFormatException {
final int version = formatOptions.mVersion;
- if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
- || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
+ if ((version >= FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION &&
+ version <= FormatSpec.MAXIMUM_SUPPORTED_STATIC_VERSION) || (
+ version >= FormatSpec.MINIMUM_SUPPORTED_DYNAMIC_VERSION &&
+ version <= FormatSpec.MAXIMUM_SUPPORTED_DYNAMIC_VERSION)) {
+ // Dictionary is valid
+ } else {
throw new UnsupportedFormatException("Requested file format version " + version
- + ", but this implementation only supports versions "
- + FormatSpec.MINIMUM_SUPPORTED_VERSION + " through "
- + FormatSpec.MAXIMUM_SUPPORTED_VERSION);
+ + ", but this implementation only supports static versions "
+ + FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION + " through "
+ + FormatSpec.MAXIMUM_SUPPORTED_STATIC_VERSION + " and dynamic versions "
+ + FormatSpec.MINIMUM_SUPPORTED_DYNAMIC_VERSION + " through "
+ + FormatSpec.MAXIMUM_SUPPORTED_DYNAMIC_VERSION);
}
ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java
index 8eabf749d..9c1e4cf84 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java
@@ -28,13 +28,11 @@ public class BinaryDictUtils {
public static final String TEST_DICT_FILE_EXTENSION = ".testDict";
- public static final FormatSpec.FormatOptions VERSION2_OPTIONS =
- new FormatSpec.FormatOptions(FormatSpec.VERSION2);
- public static final FormatSpec.FormatOptions VERSION201_OPTIONS =
- new FormatSpec.FormatOptions(FormatSpec.VERSION201);
- public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITHOUT_TIMESTAMP =
+ public static final FormatSpec.FormatOptions STATIC_OPTIONS =
+ new FormatSpec.FormatOptions(FormatSpec.VERSION202);
+ public static final FormatSpec.FormatOptions DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP =
new FormatSpec.FormatOptions(FormatSpec.VERSION4, false /* hasTimestamp */);
- public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITH_TIMESTAMP =
+ public static final FormatSpec.FormatOptions DYNAMIC_OPTIONS_WITH_TIMESTAMP =
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* hasTimestamp */);
public static DictionaryOptions makeDictionaryOptions(final String id, final String version,
@@ -55,7 +53,8 @@ public class BinaryDictUtils {
public static File getDictFile(final String name, final String version,
final FormatOptions formatOptions, final File directory) {
if (formatOptions.mVersion == FormatSpec.VERSION2
- || formatOptions.mVersion == FormatSpec.VERSION201) {
+ || formatOptions.mVersion == FormatSpec.VERSION201
+ || formatOptions.mVersion == FormatSpec.VERSION202) {
return new File(directory, name + "." + version + TEST_DICT_FILE_EXTENSION);
} else if (formatOptions.mVersion == FormatSpec.VERSION4) {
return new File(directory, name + "." + version);
@@ -71,7 +70,7 @@ public class BinaryDictUtils {
file.mkdir();
}
return new Ver4DictEncoder(file);
- } else if (formatOptions.mVersion == FormatSpec.VERSION2) {
+ } else if (formatOptions.mVersion == FormatSpec.VERSION202) {
return new Ver2DictEncoder(file, Ver2DictEncoder.CODE_POINT_TABLE_OFF);
} else {
throw new RuntimeException("The format option has a wrong version : "
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index 457e7af8e..5c261a94d 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -178,7 +178,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
throw new IOException("Cannot read the dictionary header.");
}
if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
- header.mFormatOptions.mVersion != FormatSpec.VERSION201) {
+ header.mFormatOptions.mVersion != FormatSpec.VERSION201 &&
+ header.mFormatOptions.mVersion != FormatSpec.VERSION202) {
throw new UnsupportedFormatException("File header has a wrong version : "
+ header.mFormatOptions.mVersion);
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
index 2c2152be7..b52b8c485 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
@@ -124,7 +124,8 @@ public class Ver2DictEncoder implements DictEncoder {
@Override
public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
- if (formatOptions.mVersion > FormatSpec.VERSION201) {
+ // We no longer support anything but the latest version of v2.
+ if (formatOptions.mVersion != FormatSpec.VERSION202) {
throw new UnsupportedFormatException(
"The given format options has wrong version number : "
+ formatOptions.mVersion);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index 48d2e5922..955c5728c 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -98,6 +98,7 @@ public class CombinedInputOutput {
String word = null;
ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
boolean isNotAWord = false;
+ boolean isPossiblyOffensive = false;
ArrayList<WeightedString> bigrams = new ArrayList<>();
ArrayList<WeightedString> shortcuts = new ArrayList<>();
while (null != (line = reader.readLine())) {
@@ -106,7 +107,7 @@ public class CombinedInputOutput {
if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
- isNotAWord, false /* isPossiblyOffensive */);
+ isNotAWord, isPossiblyOffensive);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
@@ -114,27 +115,37 @@ public class CombinedInputOutput {
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
isNotAWord = false;
+ isPossiblyOffensive = false;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
- word = params[1];
- } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
- probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
- probabilityInfo.mTimestamp, probabilityInfo.mLevel,
- probabilityInfo.mCount);
- } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
- final String[] historicalInfoParams =
- params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
- if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
- throw new RuntimeException("Wrong format (historical info) : " + line);
- }
- probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
- Integer.parseInt(historicalInfoParams[0]),
- Integer.parseInt(historicalInfoParams[1]),
- Integer.parseInt(historicalInfoParams[2]));
- } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
- isNotAWord = "true".equals(params[1]);
+ switch (params[0]) {
+ case CombinedFormatUtils.WORD_TAG:
+ word = params[1];
+ break;
+ case CombinedFormatUtils.PROBABILITY_TAG:
+ probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+ probabilityInfo.mTimestamp, probabilityInfo.mLevel,
+ probabilityInfo.mCount);
+ break;
+ case CombinedFormatUtils.HISTORICAL_INFO_TAG:
+ final String[] historicalInfoParams = params[1].split(
+ CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : "
+ + line);
+ }
+ probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
+ Integer.parseInt(historicalInfoParams[0]),
+ Integer.parseInt(historicalInfoParams[1]),
+ Integer.parseInt(historicalInfoParams[2]));
+ break;
+ case CombinedFormatUtils.NOT_A_WORD_TAG:
+ isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]);
+ break;
+ case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG:
+ isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]);
+ break;
}
}
} else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
@@ -190,7 +201,7 @@ public class CombinedInputOutput {
}
if (null != word) {
dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
- false /* isPossiblyOffensive */);
+ isPossiblyOffensive);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mProbabilityInfo);
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 8f9e4a3a6..6187853c8 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -120,7 +120,7 @@ public class DictionaryMaker {
String inputCombined = null;
String outputBinary = null;
String outputCombined = null;
- int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
+ int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202.
// Don't use code point table by default.
int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;