aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/binary_format.h
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/binary_format.h')
-rw-r--r--native/jni/src/binary_format.h61
1 files changed, 51 insertions, 10 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index ab033ad90..f59302460 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -40,16 +40,21 @@ class BinaryFormat {
// implementations. On this occasion, we made the magic number 32 bits long.
const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+ const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
+ const static int SHORTCUT_LIST_SIZE_SIZE = 2;
+
static int detectFormat(const uint8_t* const dict);
static unsigned int getHeaderSize(const uint8_t* const dict);
+ static unsigned int getFlags(const uint8_t* const dict);
static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t* const dict, int* pos);
static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos);
static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos);
static int skipOtherCharacters(const uint8_t* const dict, const int pos);
- static int skipAttributes(const uint8_t* const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos);
static int skipFrequency(const uint8_t flags, const int pos);
+ static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos);
+ static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos);
static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos);
static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags,
const int pos);
@@ -61,6 +66,15 @@ class BinaryFormat {
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
uint16_t* outWord);
+
+ // Flags for special processing
+ // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
+ // something very bad (like, the apocalypse) will happen. Please update both at the same time.
+ enum {
+ REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1,
+ REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
+ };
+ const static unsigned int NO_FLAGS = 0;
};
inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
@@ -77,7 +91,7 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
// Format 2 header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
// Version number (2 bytes) 0x00 0x02
- // Options (2 bytes) must be 0x00 0x00
+ // Options (2 bytes)
// Header size (4 bytes) : integer, big endian
return (dict[4] << 8) + dict[5];
default:
@@ -85,6 +99,15 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
}
}
+inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) {
+ switch (detectFormat(dict)) {
+ case 1:
+ return NO_FLAGS;
+ default:
+ return (dict[6] << 8) + dict[7];
+ }
+}
+
inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) {
switch (detectFormat(dict)) {
case 1:
@@ -157,12 +180,12 @@ static inline int attributeAddressSize(const uint8_t flags) {
*/
}
-inline int BinaryFormat::skipAttributes(const uint8_t* const dict, const int pos) {
+static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) {
int currentPos = pos;
- uint8_t flags = getFlagsAndForwardPointer(dict, &currentPos);
+ uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) {
currentPos += attributeAddressSize(flags);
- flags = getFlagsAndForwardPointer(dict, &currentPos);
+ flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
}
currentPos += attributeAddressSize(flags);
return currentPos;
@@ -174,6 +197,10 @@ static inline int childrenAddressSize(const uint8_t flags) {
/* See the note in attributeAddressSize. The same applies here */
}
+static inline int shortcutByteSize(const uint8_t* const dict, const int pos) {
+ return ((int)(dict[pos] << 8)) + (dict[pos + 1]);
+}
+
inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos) {
return pos + childrenAddressSize(flags);
}
@@ -182,16 +209,30 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
}
-inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags,
const int pos) {
- // This function skips all attributes: shortcuts and bigrams.
- int newPos = pos;
if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
- newPos = skipAttributes(dict, newPos);
+ return pos + shortcutByteSize(dict, pos);
+ } else {
+ return pos;
}
+}
+
+inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags,
+ const int pos) {
if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
- newPos = skipAttributes(dict, newPos);
+ return skipExistingBigrams(dict, pos);
+ } else {
+ return pos;
}
+}
+
+inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
+ const int pos) {
+ // This function skips all attributes: shortcuts and bigrams.
+ int newPos = pos;
+ newPos = skipShortcuts(dict, flags, newPos);
+ newPos = skipBigrams(dict, flags, newPos);
return newPos;
}