diff options
38 files changed, 108 insertions, 82 deletions
diff --git a/dictionaries/de_wordlist.combined.gz b/dictionaries/de_wordlist.combined.gz Binary files differindex 803211c01..92c95540c 100644 --- a/dictionaries/de_wordlist.combined.gz +++ b/dictionaries/de_wordlist.combined.gz diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz Binary files differindex 1fa9b85ea..217660fc4 100644 --- a/dictionaries/en_GB_wordlist.combined.gz +++ b/dictionaries/en_GB_wordlist.combined.gz diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz Binary files differindex 2e039ff05..8aed9c5e0 100644 --- a/dictionaries/en_US_wordlist.combined.gz +++ b/dictionaries/en_US_wordlist.combined.gz diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz Binary files differindex e845346d6..7fe6618cf 100644 --- a/dictionaries/en_wordlist.combined.gz +++ b/dictionaries/en_wordlist.combined.gz diff --git a/dictionaries/es_wordlist.combined.gz b/dictionaries/es_wordlist.combined.gz Binary files differindex 3391e64b4..71e7309fc 100644 --- a/dictionaries/es_wordlist.combined.gz +++ b/dictionaries/es_wordlist.combined.gz diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz Binary files differindex 1b9fd73f9..afe44a6d9 100644 --- a/dictionaries/fr_wordlist.combined.gz +++ b/dictionaries/fr_wordlist.combined.gz diff --git a/dictionaries/it_wordlist.combined.gz b/dictionaries/it_wordlist.combined.gz Binary files differindex 5a5cbdc7a..ed58a12c5 100644 --- a/dictionaries/it_wordlist.combined.gz +++ b/dictionaries/it_wordlist.combined.gz diff --git a/dictionaries/nl_wordlist.combined.gz b/dictionaries/nl_wordlist.combined.gz Binary files differindex 37ba8ab42..19c3a7ea8 100644 --- a/dictionaries/nl_wordlist.combined.gz +++ b/dictionaries/nl_wordlist.combined.gz diff --git a/dictionaries/pl_wordlist.combined.gz b/dictionaries/pl_wordlist.combined.gz Binary files differindex ba71a5581..2b84eecfd 100644 --- a/dictionaries/pl_wordlist.combined.gz +++ b/dictionaries/pl_wordlist.combined.gz diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz Binary files differindex 02df1c1ee..7aac61e50 100644 --- a/dictionaries/pt_BR_wordlist.combined.gz +++ b/dictionaries/pt_BR_wordlist.combined.gz diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz Binary files differindex bcd50ab03..5bf9a60e8 100644 --- a/dictionaries/pt_PT_wordlist.combined.gz +++ b/dictionaries/pt_PT_wordlist.combined.gz diff --git a/dictionaries/ru_wordlist.combined.gz b/dictionaries/ru_wordlist.combined.gz Binary files differindex 401ad08b0..5e9266221 100644 --- a/dictionaries/ru_wordlist.combined.gz +++ b/dictionaries/ru_wordlist.combined.gz diff --git a/dictionaries/sv_wordlist.combined.gz b/dictionaries/sv_wordlist.combined.gz Binary files differindex b6ebab320..db44ae4c4 100644 --- a/dictionaries/sv_wordlist.combined.gz +++ b/dictionaries/sv_wordlist.combined.gz diff --git a/dictionaries/tr_wordlist.combined.gz b/dictionaries/tr_wordlist.combined.gz Binary files differindex 306cea184..d3c8825b9 100644 --- a/dictionaries/tr_wordlist.combined.gz +++ b/dictionaries/tr_wordlist.combined.gz diff --git a/java/res/raw/main_de.dict b/java/res/raw/main_de.dict Binary files differindex 45b288375..c3c2cbe46 100644 --- a/java/res/raw/main_de.dict +++ b/java/res/raw/main_de.dict diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict Binary files differindex 5bbb85761..b9e5bc77b 100644 --- a/java/res/raw/main_en.dict +++ b/java/res/raw/main_en.dict diff --git a/java/res/raw/main_es.dict b/java/res/raw/main_es.dict Binary files differindex fae131850..076d5aa8f 100644 --- a/java/res/raw/main_es.dict +++ b/java/res/raw/main_es.dict diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict Binary files differindex 19532d9bf..0e8686092 100644 --- a/java/res/raw/main_fr.dict +++ b/java/res/raw/main_fr.dict diff --git a/java/res/raw/main_it.dict b/java/res/raw/main_it.dict Binary files differindex ff11b9798..609ef13b7 100644 --- a/java/res/raw/main_it.dict +++ b/java/res/raw/main_it.dict diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict Binary files differindex 9fa50442a..c33865187 100644 --- a/java/res/raw/main_pt_br.dict +++ b/java/res/raw/main_pt_br.dict diff --git a/java/res/raw/main_ru.dict b/java/res/raw/main_ru.dict Binary files differindex 76b5f805a..d0af70730 100644 --- a/java/res/raw/main_ru.dict +++ b/java/res/raw/main_ru.dict diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 78d79ae50..5925bdc4e 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -171,14 +171,17 @@ public final class FormatSpec { // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType(). public static final int VERSION2 = 2; public static final int VERSION201 = 201; + public static final int VERSION202 = 202; public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201; // Dictionary version used for testing. public static final int VERSION4_ONLY_FOR_TESTING = 399; public static final int VERSION401 = 401; public static final int VERSION4 = 402; public static final int VERSION4_DEV = 403; - static final int MINIMUM_SUPPORTED_VERSION = VERSION2; - static final int MAXIMUM_SUPPORTED_VERSION = VERSION4_DEV; + static final int MINIMUM_SUPPORTED_STATIC_VERSION = VERSION202; + static final int MAXIMUM_SUPPORTED_STATIC_VERSION = VERSION202; + static final int MINIMUM_SUPPORTED_DYNAMIC_VERSION = VERSION4; + static final int MAXIMUM_SUPPORTED_DYNAMIC_VERSION = VERSION4_DEV; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java index 4e0f5f583..8699f2ce7 100644 --- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java @@ -36,7 +36,8 @@ public class CombinedFormatUtils { public static final String WORD_TAG = "word"; public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence"; public static final String NOT_A_WORD_TAG = "not_a_word"; - public static final String BLACKLISTED_TAG = "blacklisted"; + public static final String POSSIBLY_OFFENSIVE_TAG = "possibly_offensive"; + public static final String TRUE_VALUE = "true"; public static String formatAttributeMap(final HashMap<String, String> attributeMap) { final StringBuilder builder = new StringBuilder(); @@ -61,13 +62,13 @@ public class CombinedFormatUtils { builder.append(","); builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo)); if (wordProperty.mIsBeginningOfSentence) { - builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true"); + builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=" + TRUE_VALUE); } if (wordProperty.mIsNotAWord) { - builder.append("," + NOT_A_WORD_TAG + "=true"); + builder.append("," + NOT_A_WORD_TAG + "=" + TRUE_VALUE); } if (wordProperty.mIsPossiblyOffensive) { - builder.append("," + BLACKLISTED_TAG + "=true"); + builder.append("," + POSSIBLY_OFFENSIVE_TAG + "=" + TRUE_VALUE); } builder.append("\n"); if (wordProperty.mHasShortcuts) { @@ -111,4 +112,8 @@ public class CombinedFormatUtils { } return builder.toString(); } + + public static boolean isLiteralTrue(final String value) { + return TRUE_VALUE.equalsIgnoreCase(value); + } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 44c2f443f..abc7f9906 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -134,9 +134,11 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { // same so we use them for both here. switch (mDictFormatVersion) { case FormatUtils::VERSION_2: - return FormatUtils::VERSION_2; case FormatUtils::VERSION_201: - return FormatUtils::VERSION_201; + AKLOGE("Dictionary versions 2 and 201 are incompatible with this version"); + return FormatUtils::UNKNOWN_VERSION; + case FormatUtils::VERSION_202: + return FormatUtils::VERSION_202; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: return FormatUtils::VERSION_4_ONLY_FOR_TESTING; case FormatUtils::VERSION_4: diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 41a8b13b8..d69a53fce 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -111,7 +111,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; switch (version) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: - // Version 2 or 201 dictionary writing is not supported. + case FormatUtils::VERSION_202: + // None of the static dictionaries (v2x) support writing return false; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 08e39ce43..9455222dd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -140,7 +140,7 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const { - return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), + return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0); } @@ -164,7 +164,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI } const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); - if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { + if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } if (prevWordIds.empty()) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 372c9e36f..a19a384f4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -115,7 +115,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str switch (formatVersion) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: - AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path); + case FormatUtils::VERSION_202: + AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path); break; case FormatUtils::VERSION_4: { return newPolicyForV4Dict<backward::v402::Ver4DictConstants, @@ -177,6 +178,9 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: + AKLOGE("Dictionary versions 2 and 201 are incompatible with this version"); + break; + case FormatUtils::VERSION_202: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); case FormatUtils::VERSION_4_ONLY_FOR_TESTING: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 585e87a24..e52706e07 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -144,17 +144,6 @@ class PtNodeParams { return PatriciaTrieReadingUtils::isTerminal(mFlags); } - AK_FORCE_INLINE bool isBlacklisted() const { - // Note: this method will be removed in the next change. - // It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403. - // * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY - // when isBlacklisted (i.e. to only check if isNotAWord or isDeleted) - // * getWordAttributes will be changed to always return blacklisted=false and - // isPossiblyOffensive according to the function below (instead of the current - // behaviour of checking if the probability is zero) - return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags); - } - AK_FORCE_INLINE bool isPossiblyOffensive() const { return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 66fd18a52..59873612a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ - #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "defines.h" @@ -317,8 +316,8 @@ const WordAttributes PatriciaTriePolicy::getWordAttributesInContext( const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const { - return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), - ptNodeParams.getProbability() == 0); + return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(), + ptNodeParams.isPossiblyOffensive()); } int PatriciaTriePolicy::getProbability(const int unigramProbability, @@ -345,10 +344,9 @@ int PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); const PtNodeParams ptNodeParams = mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); - if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) { - // If this is not a word, or if it's a blacklisted entry, it should behave as - // having no probability outside of the suggestion process (where it should be used - // for shortcuts). + if (ptNodeParams.isNotAWord()) { + // If this is not a word, it should behave as having no probability outside of the + // suggestion process (where it should be used for shortcuts). return NOT_A_PROBABILITY; } if (!prevWordIds.empty()) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index 0cffe569d..8b47147e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -28,9 +28,11 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; /* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) { switch (formatVersion) { case VERSION_2: - return VERSION_2; case VERSION_201: - return VERSION_201; + AKLOGE("Dictionary versions 2 and 201 are incompatible with this version"); + return UNKNOWN_VERSION; + case VERSION_202: + return VERSION_202; case VERSION_4_ONLY_FOR_TESTING: return VERSION_4_ONLY_FOR_TESTING; case VERSION_4: diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 96310086b..05bd7eb8a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -31,8 +31,12 @@ class FormatUtils { public: enum FORMAT_VERSION { // These MUST have the same values as the relevant constants in FormatSpec.java. + // TODO: Remove VERSION_2 and VERSION_201 when we: + // * Confirm that old versions of LatinIME download old-format dictionaries + // * We no longer need the corresponding constants on the Java side for dicttool VERSION_2 = 2, VERSION_201 = 201, + VERSION_202 = 202, VERSION_4_ONLY_FOR_TESTING = 399, VERSION_4 = 402, VERSION_4_DEV = 403, diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index a35fa13ce..d239f8dac 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -314,14 +314,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final String dictVersion = Long.toString(System.currentTimeMillis()); final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY; final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, - BinaryDictUtils.VERSION201_OPTIONS, getContext().getCacheDir()); + BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir()); // Write a test dictionary final DictEncoder dictEncoder = new Ver2DictEncoder(file, Ver2DictEncoder.CODE_POINT_TABLE_ON); final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions( - FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE); + FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION); final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), sourcedict, words, null /* shortcutMap */); @@ -359,11 +359,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final List<String> results = new ArrayList<>(); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.VERSION2_OPTIONS); + BinaryDictUtils.STATIC_OPTIONS); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); + BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); + BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); } @@ -373,11 +373,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final List<String> results = new ArrayList<>(); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.VERSION2_OPTIONS); + BinaryDictUtils.STATIC_OPTIONS); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); + BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); + BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -501,7 +501,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final ArrayList<String> results = new ArrayList<>(); runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.VERSION2_OPTIONS); + BinaryDictUtils.STATIC_OPTIONS); for (final String result : results) { Log.d(TAG, result); @@ -512,7 +512,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final ArrayList<String> results = new ArrayList<>(); runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.VERSION2_OPTIONS); + BinaryDictUtils.STATIC_OPTIONS); for (final String result : results) { Log.d(TAG, result); @@ -623,9 +623,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final ArrayList<String> results = new ArrayList<>(); runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.VERSION2_OPTIONS); + BinaryDictUtils.STATIC_OPTIONS); runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.VERSION2_OPTIONS); + BinaryDictUtils.STATIC_OPTIONS); for (final String result : results) { Log.d(TAG, result); @@ -633,7 +633,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testVer2DictGetWordProperty() { - final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; + final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; final ArrayList<String> words = sWords; final HashMap<String, List<String>> shortcuts = sShortcuts; final String dictName = "testGetWordProperty"; @@ -669,7 +669,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testVer2DictIteration() { - final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; + final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; final ArrayList<String> words = sWords; final HashMap<String, List<String>> shortcuts = sShortcuts; final SparseArray<List<Integer>> bigrams = sEmptyBigrams; diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 60e38250f..ce905c499 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -819,12 +819,18 @@ public class BinaryDictEncoderUtils { final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray) throws IOException, UnsupportedFormatException { final int version = formatOptions.mVersion; - if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION - || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { + if ((version >= FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION && + version <= FormatSpec.MAXIMUM_SUPPORTED_STATIC_VERSION) || ( + version >= FormatSpec.MINIMUM_SUPPORTED_DYNAMIC_VERSION && + version <= FormatSpec.MAXIMUM_SUPPORTED_DYNAMIC_VERSION)) { + // Dictionary is valid + } else { throw new UnsupportedFormatException("Requested file format version " + version - + ", but this implementation only supports versions " - + FormatSpec.MINIMUM_SUPPORTED_VERSION + " through " - + FormatSpec.MAXIMUM_SUPPORTED_VERSION); + + ", but this implementation only supports static versions " + + FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION + " through " + + FormatSpec.MAXIMUM_SUPPORTED_STATIC_VERSION + " and dynamic versions " + + FormatSpec.MINIMUM_SUPPORTED_DYNAMIC_VERSION + " through " + + FormatSpec.MAXIMUM_SUPPORTED_DYNAMIC_VERSION); } ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java index 8eabf749d..9c1e4cf84 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java @@ -28,13 +28,11 @@ public class BinaryDictUtils { public static final String TEST_DICT_FILE_EXTENSION = ".testDict"; - public static final FormatSpec.FormatOptions VERSION2_OPTIONS = - new FormatSpec.FormatOptions(FormatSpec.VERSION2); - public static final FormatSpec.FormatOptions VERSION201_OPTIONS = - new FormatSpec.FormatOptions(FormatSpec.VERSION201); - public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITHOUT_TIMESTAMP = + public static final FormatSpec.FormatOptions STATIC_OPTIONS = + new FormatSpec.FormatOptions(FormatSpec.VERSION202); + public static final FormatSpec.FormatOptions DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP = new FormatSpec.FormatOptions(FormatSpec.VERSION4, false /* hasTimestamp */); - public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITH_TIMESTAMP = + public static final FormatSpec.FormatOptions DYNAMIC_OPTIONS_WITH_TIMESTAMP = new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* hasTimestamp */); public static DictionaryOptions makeDictionaryOptions(final String id, final String version, @@ -55,7 +53,8 @@ public class BinaryDictUtils { public static File getDictFile(final String name, final String version, final FormatOptions formatOptions, final File directory) { if (formatOptions.mVersion == FormatSpec.VERSION2 - || formatOptions.mVersion == FormatSpec.VERSION201) { + || formatOptions.mVersion == FormatSpec.VERSION201 + || formatOptions.mVersion == FormatSpec.VERSION202) { return new File(directory, name + "." + version + TEST_DICT_FILE_EXTENSION); } else if (formatOptions.mVersion == FormatSpec.VERSION4) { return new File(directory, name + "." + version); @@ -71,7 +70,7 @@ public class BinaryDictUtils { file.mkdir(); } return new Ver4DictEncoder(file); - } else if (formatOptions.mVersion == FormatSpec.VERSION2) { + } else if (formatOptions.mVersion == FormatSpec.VERSION202) { return new Ver2DictEncoder(file, Ver2DictEncoder.CODE_POINT_TABLE_OFF); } else { throw new RuntimeException("The format option has a wrong version : " diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index 457e7af8e..5c261a94d 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -178,7 +178,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder { throw new IOException("Cannot read the dictionary header."); } if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 && - header.mFormatOptions.mVersion != FormatSpec.VERSION201) { + header.mFormatOptions.mVersion != FormatSpec.VERSION201 && + header.mFormatOptions.mVersion != FormatSpec.VERSION202) { throw new UnsupportedFormatException("File header has a wrong version : " + header.mFormatOptions.mVersion); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java index 2c2152be7..b52b8c485 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java @@ -124,7 +124,8 @@ public class Ver2DictEncoder implements DictEncoder { @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { - if (formatOptions.mVersion > FormatSpec.VERSION201) { + // We no longer support anything but the latest version of v2. + if (formatOptions.mVersion != FormatSpec.VERSION202) { throw new UnsupportedFormatException( "The given format options has wrong version number : " + formatOptions.mVersion); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index 48d2e5922..955c5728c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -98,6 +98,7 @@ public class CombinedInputOutput { String word = null; ProbabilityInfo probabilityInfo = new ProbabilityInfo(0); boolean isNotAWord = false; + boolean isPossiblyOffensive = false; ArrayList<WeightedString> bigrams = new ArrayList<>(); ArrayList<WeightedString> shortcuts = new ArrayList<>(); while (null != (line = reader.readLine())) { @@ -106,7 +107,7 @@ public class CombinedInputOutput { if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (null != word) { dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, - isNotAWord, false /* isPossiblyOffensive */); + isNotAWord, isPossiblyOffensive); for (WeightedString s : bigrams) { dict.setBigram(word, s.mWord, s.mProbabilityInfo); } @@ -114,27 +115,37 @@ public class CombinedInputOutput { if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>(); if (!bigrams.isEmpty()) bigrams = new ArrayList<>(); isNotAWord = false; + isPossiblyOffensive = false; for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); - if (CombinedFormatUtils.WORD_TAG.equals(params[0])) { - word = params[1]; - } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { - probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), - probabilityInfo.mTimestamp, probabilityInfo.mLevel, - probabilityInfo.mCount); - } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { - final String[] historicalInfoParams = - params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); - if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { - throw new RuntimeException("Wrong format (historical info) : " + line); - } - probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability, - Integer.parseInt(historicalInfoParams[0]), - Integer.parseInt(historicalInfoParams[1]), - Integer.parseInt(historicalInfoParams[2])); - } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) { - isNotAWord = "true".equals(params[1]); + switch (params[0]) { + case CombinedFormatUtils.WORD_TAG: + word = params[1]; + break; + case CombinedFormatUtils.PROBABILITY_TAG: + probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), + probabilityInfo.mTimestamp, probabilityInfo.mLevel, + probabilityInfo.mCount); + break; + case CombinedFormatUtils.HISTORICAL_INFO_TAG: + final String[] historicalInfoParams = params[1].split( + CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); + if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { + throw new RuntimeException("Wrong format (historical info) : " + + line); + } + probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability, + Integer.parseInt(historicalInfoParams[0]), + Integer.parseInt(historicalInfoParams[1]), + Integer.parseInt(historicalInfoParams[2])); + break; + case CombinedFormatUtils.NOT_A_WORD_TAG: + isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]); + break; + case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG: + isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]); + break; } } } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) { @@ -190,7 +201,7 @@ public class CombinedInputOutput { } if (null != word) { dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord, - false /* isPossiblyOffensive */); + isPossiblyOffensive); for (WeightedString s : bigrams) { dict.setBigram(word, s.mWord, s.mProbabilityInfo); } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index 8f9e4a3a6..6187853c8 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -120,7 +120,7 @@ public class DictionaryMaker { String inputCombined = null; String outputBinary = null; String outputCombined = null; - int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201. + int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202. // Don't use code point table by default. int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF; |