diff options
Diffstat (limited to 'native/src/binary_format.h')
-rw-r--r-- | native/src/binary_format.h | 22 |
1 files changed, 12 insertions, 10 deletions
diff --git a/native/src/binary_format.h b/native/src/binary_format.h index 6f65088db..1d74998f6 100644 --- a/native/src/binary_format.h +++ b/native/src/binary_format.h @@ -22,12 +22,12 @@ namespace latinime { class BinaryFormat { -private: + private: const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; -public: + public: const static int UNKNOWN_FORMAT = -1; const static int FORMAT_VERSION_1 = 1; const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1; @@ -61,7 +61,9 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) { } inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) { - return dict[(*pos)++]; + const int msb = dict[(*pos)++]; + if (msb < 0x80) return msb; + return ((msb & 0x7F) << 8) | dict[(*pos)++]; } inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) { @@ -145,15 +147,15 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) { - // This function skips all attributes. The format makes provision for future extension - // with other attributes (notably shortcuts) but for the time being, bigrams are the - // only attributes that may be found in a character group, so we only look at bigrams - // in this version. + // This function skips all attributes: shortcuts and bigrams. + int newPos = pos; + if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) { + newPos = skipAttributes(dict, newPos); + } if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) { - return skipAttributes(dict, pos); - } else { - return pos; + newPos = skipAttributes(dict, newPos); } + return newPos; } inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict, |