aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp43
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h8
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp28
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h49
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h3
5 files changed, 73 insertions, 58 deletions
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
index f48386bba..5d14a0554 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
@@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
// The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older
// implementations. On this occasion, we made the magic number 32 bits long.
-const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
+const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
@@ -46,25 +46,28 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
}
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
- case FORMAT_VERSION_2_MAGIC_NUMBER:
- // Version 2 dictionaries are at least 12 bytes long.
- // If this dictionary has the version 2 magic number but is less than 12 bytes long,
- // then it's an unknown format and we need to avoid confidently reading the next bytes.
- if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) {
+ case HEADER_VERSION_2_MAGIC_NUMBER:
+ // Version 2 header are at least 12 bytes long.
+ // If this header has the version 2 magic number but is less than 12 bytes long,
+ // then it's an unknown format and we need to avoid confidently reading the next bytes.
+ if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
+ return UNKNOWN_VERSION;
+ }
+ // Version 2 header is as follows:
+ // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
+ // Version number (2 bytes)
+ // Options (2 bytes)
+ // Header size (4 bytes) : integer, big endian
+ if (ByteArrayUtils::readUint16(dict, 4) == 2) {
+ return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
+ // TODO: Support version 3 dictionary.
+ return UNKNOWN_VERSION;
+ } else {
+ return UNKNOWN_VERSION;
+ }
+ default:
return UNKNOWN_VERSION;
- }
- // Format 2 header is as follows:
- // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
- // Version number (2 bytes) 0x00 0x02
- // Options (2 bytes)
- // Header size (4 bytes) : integer, big endian
- if (ByteArrayUtils::readUint16(dict, 4) == 2) {
- return VERSION_2;
- } else {
- return UNKNOWN_VERSION;
- }
- default:
- return UNKNOWN_VERSION;
}
}
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
index 80067b255..830684c70 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
@@ -33,9 +33,9 @@ namespace latinime {
*/
class BinaryDictionaryFormatUtils {
public:
- // TODO: Support version 3 format.
enum FORMAT_VERSION {
- VERSION_2 = 1,
+ VERSION_2,
+ VERSION_3,
UNKNOWN_VERSION
};
@@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
- static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER;
- static const int FORMAT_VERSION_2_MINIMUM_SIZE;
+ static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
+ static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
index c4c4bedde..a57b0f859 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -26,10 +26,10 @@ namespace latinime {
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
@@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_2:
+ switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
+ case HEADER_VERSION_2:
// See the format of the header in the comment in
// BinaryDictionaryFormatUtils::detectFormatVersion()
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
- + VERSION_2_DICTIONARY_FLAG_SIZE);
+ VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ + VERSION_2_HEADER_FLAG_SIZE);
default:
return S_INT_MAX;
}
@@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::getFlags(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_2:
+ switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
+ case HEADER_VERSION_2:
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
+ VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
default:
return NO_FLAGS;
}
@@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const char *const key, int *outValue, const int outValueSize) {
- if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
+ if (outValueSize <= 0) {
return false;
}
const int headerSize = getHeaderSize(binaryDictionaryInfo);
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return false;
+ }
while (pos < headerSize) {
if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
index 94b9e124d..61748227e 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
@@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils {
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
}
- static AK_FORCE_INLINE bool hasHeaderAttributes(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
- // Only format 2 and above have header attributes as {key,value} string pairs.
- switch (format) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return true;
- break;
- default:
- return false;
- }
- }
-
static AK_FORCE_INLINE int getHeaderOptionsPosition(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
- switch (format) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
- + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
+ switch (getHeaderVersion(dictionaryFormat)) {
+ case HEADER_VERSION_2:
+ return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
break;
default:
- return 0;
+ return NOT_A_DICT_POS;
}
}
@@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
- static const int VERSION_2_MAGIC_NUMBER_SIZE;
- static const int VERSION_2_DICTIONARY_VERSION_SIZE;
- static const int VERSION_2_DICTIONARY_FLAG_SIZE;
- static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ enum HEADER_VERSION {
+ HEADER_VERSION_2,
+ UNKNOWN_HEADER_VERSION
+ };
+
+ static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
+ static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
+ static const int VERSION_2_HEADER_FLAG_SIZE;
+ static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
static const DictionaryFlags NO_FLAGS;
// Flags for special processing
@@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils {
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+
+ static HEADER_VERSION getHeaderVersion(
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
+ switch(formatVersion) {
+ case BinaryDictionaryFormatUtils::VERSION_2:
+ // Fall through
+ case BinaryDictionaryFormatUtils::VERSION_3:
+ return HEADER_VERSION_2;
+ default:
+ return UNKNOWN_HEADER_VERSION;
+ }
+ }
};
}
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
index c0e24fa4e..70dad67e8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
@@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory {
switch (dictionaryFormat) {
case BinaryDictionaryFormatUtils::VERSION_2:
return PatriciaTriePolicy::getInstance();
+ case BinaryDictionaryFormatUtils::VERSION_3:
+ // TODO: support version 3 dictionaries.
+ return 0;
default:
ASSERT(false);
return 0;