1 files changed, 119 insertions, 81 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 51bf8ebbc..d8f3e83dd 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -18,18 +18,49 @@
 #define LATINIME_BINARY_FORMAT_H
 
 #include <limits>
+#include <map>
 #include "bloom_filter.h"
-#include "unigram_dictionary.h"
+#include "char_utils.h"
 
 namespace latinime {
 
 class BinaryFormat {
- private:
-    const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
-    const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
-    const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-
  public:
+    // Mask and flags for children address type selection.
+    static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
+    static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
+    static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
+    static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
+    static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+
+    // Flag for single/multiple char group
+    static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
+
+    // Flag for terminal groups
+    static const int FLAG_IS_TERMINAL = 0x10;
+
+    // Flag for shortcut targets presence
+    static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
+    // Flag for bigram presence
+    static const int FLAG_HAS_BIGRAMS = 0x04;
+
+    // Attribute (bigram/shortcut) related flags:
+    // Flag for presence of more attributes
+    static const int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+    // Flag for sign of offset. If this flag is set, the offset value must be negated.
+    static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+
+    // Mask for attribute frequency, stored on 4 bits inside the flags byte.
+    static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
+    // The numeric value of the shortcut frequency that means 'whitelist'.
+    static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
+
+    // Mask and flags for attribute address type selection.
+    static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+    static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+    static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+    static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+
     const static int UNKNOWN_FORMAT = -1;
     // Originally, format version 1 had a 16-bit magic number, then the version number `01'
     // then options that must be 0. Hence the first 32-bits of the format are always as follow
@@ -44,29 +75,29 @@ class BinaryFormat {
     const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
     const static int SHORTCUT_LIST_SIZE_SIZE = 2;
 
-    static int detectFormat(const uint8_t* const dict);
-    static unsigned int getHeaderSize(const uint8_t* const dict);
-    static unsigned int getFlags(const uint8_t* const dict);
-    static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos);
-    static uint8_t getFlagsAndForwardPointer(const uint8_t* const dict, int* pos);
-    static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos);
-    static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos);
-    static int skipOtherCharacters(const uint8_t* const dict, const int pos);
+    static int detectFormat(const uint8_t *const dict);
+    static unsigned int getHeaderSize(const uint8_t *const dict);
+    static unsigned int getFlags(const uint8_t *const dict);
+    static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
+    static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
+    static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos);
+    static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
+    static int skipOtherCharacters(const uint8_t *const dict, const int pos);
     static int skipChildrenPosition(const uint8_t flags, const int pos);
     static int skipFrequency(const uint8_t flags, const int pos);
-    static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos);
-    static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos);
-    static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos);
-    static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags,
+    static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
+    static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
+    static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
             const int pos);
-    static int readChildrenPosition(const uint8_t* const dict, const uint8_t flags, const int pos);
+    static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
     static bool hasChildrenInFlags(const uint8_t flags);
-    static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
+    static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
             int *pos);
-    static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
-            const int length);
-    static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
-            uint16_t* outWord, int* outUnigramFrequency);
+    static int getAttributeFrequencyFromFlags(const int flags);
+    static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
+            const int length, const bool forceLowerCaseSearch);
+    static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
+            uint16_t *outWord, int *outUnigramFrequency);
     static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
     static int getProbability(const int position, const std::map<int, int> *bigramMap,
             const uint8_t *bigramFilter, const int unigramFreq);
@@ -79,9 +110,16 @@ class BinaryFormat {
         REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
     };
     const static unsigned int NO_FLAGS = 0;
+
+ private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
+    const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
+    const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
+    const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
+    static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
 };
 
-inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
+inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
     // The magic number is stored big-endian.
     const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3];
     switch (magicNumber) {
@@ -103,7 +141,7 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
     }
 }
 
-inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) {
+inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) {
     switch (detectFormat(dict)) {
     case 1:
         return NO_FLAGS;
@@ -112,7 +150,7 @@ inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) {
     }
 }
 
-inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) {
+inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
     switch (detectFormat(dict)) {
     case 1:
         return FORMAT_VERSION_1_HEADER_SIZE;
@@ -124,17 +162,17 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) {
     }
 }
 
-inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) {
+inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) {
     const int msb = dict[(*pos)++];
     if (msb < 0x80) return msb;
     return ((msb & 0x7F) << 8) | dict[(*pos)++];
 }
 
-inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) {
+inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) {
     return dict[(*pos)++];
 }
 
-inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos) {
+inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) {
     const int origin = *pos;
     const int32_t character = dict[origin];
     if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
@@ -153,12 +191,12 @@ inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const d
     }
 }
 
-inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t* const dict,
+inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const dict,
         const int pos) {
     return dict[pos];
 }
 
-inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const int pos) {
+inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
     int currentPos = pos;
     int32_t character = dict[currentPos++];
     while (CHARACTER_ARRAY_TERMINATOR != character) {
@@ -172,22 +210,22 @@ inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const in
 
 static inline int attributeAddressSize(const uint8_t flags) {
     static const int ATTRIBUTE_ADDRESS_SHIFT = 4;
-    return (flags & UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
+    return (flags & BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
     /* Note: this is a value-dependant optimization of what may probably be
        more readably written this way:
-       switch (flags * UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE) {
-       case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
-       case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
-       case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
+       switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
+       case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
+       case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
+       case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
        default: return 0;
        }
     */
 }
 
-static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) {
+static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) {
     int currentPos = pos;
     uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
-    while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) {
+    while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) {
         currentPos += attributeAddressSize(flags);
         flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
     }
@@ -197,11 +235,11 @@ static inline int skipExistingBigrams(const uint8_t* const dict, const int pos)
 
 static inline int childrenAddressSize(const uint8_t flags) {
     static const int CHILDREN_ADDRESS_SHIFT = 6;
-    return (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
+    return (BinaryFormat::MASK_GROUP_ADDRESS_TYPE & flags) >> CHILDREN_ADDRESS_SHIFT;
     /* See the note in attributeAddressSize. The same applies here */
 }
 
-static inline int shortcutByteSize(const uint8_t* const dict, const int pos) {
+static inline int shortcutByteSize(const uint8_t *const dict, const int pos) {
     return ((int)(dict[pos] << 8)) + (dict[pos + 1]);
 }
 
@@ -210,28 +248,28 @@ inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos
 }
 
 inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
-    return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
+    return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
 }
 
-inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
         const int pos) {
-    if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
+    if (FLAG_HAS_SHORTCUT_TARGETS & flags) {
         return pos + shortcutByteSize(dict, pos);
     } else {
         return pos;
     }
 }
 
-inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
         const int pos) {
-    if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
+    if (FLAG_HAS_BIGRAMS & flags) {
         return skipExistingBigrams(dict, pos);
     } else {
         return pos;
     }
 }
 
-inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
         const int pos) {
     // This function skips all attributes: shortcuts and bigrams.
     int newPos = pos;
@@ -240,7 +278,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint
     return newPos;
 }
 
-inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
+inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
         const uint8_t flags, const int pos) {
     int currentPos = pos;
     currentPos = skipChildrenPosition(flags, currentPos);
@@ -248,18 +286,18 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
     return currentPos;
 }
 
-inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const uint8_t flags,
+inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags,
         const int pos) {
     int offset = 0;
-    switch (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) {
-        case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+    switch (MASK_GROUP_ADDRESS_TYPE & flags) {
+        case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
             offset = dict[pos];
             break;
-        case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+        case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
             offset = dict[pos] << 8;
             offset += dict[pos + 1];
             break;
-        case UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+        case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
             offset = dict[pos] << 16;
             offset += dict[pos + 1] << 8;
             offset += dict[pos + 2];
@@ -273,42 +311,45 @@ inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const u
 }
 
 inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
-    return (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
-            != (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
+    return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
 }
 
-inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* const dict,
+inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
         const uint8_t flags, int *pos) {
     int offset = 0;
     const int origin = *pos;
-    switch (UnigramDictionary::MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
-        case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+    switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+        case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
             offset = dict[origin];
             *pos = origin + 1;
             break;
-        case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+        case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
             offset = dict[origin] << 8;
             offset += dict[origin + 1];
             *pos = origin + 2;
             break;
-        case UnigramDictionary::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+        case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
             offset = dict[origin] << 16;
             offset += dict[origin + 1] << 8;
             offset += dict[origin + 2];
             *pos = origin + 3;
             break;
     }
-    if (UnigramDictionary::FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
+    if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
         return origin - offset;
     } else {
         return origin + offset;
     }
 }
 
+inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
+    return flags & MASK_ATTRIBUTE_FREQUENCY;
+}
+
 // This function gets the byte position of the last chargroup of the exact matching word in the
 // dictionary. If no match is found, it returns NOT_VALID_WORD.
-inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
-        const int32_t* const inWord, const int length) {
+inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
+        const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
     int pos = 0;
     int wordPos = 0;
 
@@ -317,7 +358,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
         // there was no match (or we would have found it).
         if (wordPos > length) return NOT_VALID_WORD;
         int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
-        const int32_t wChar = inWord[wordPos];
+        const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
         while (true) {
             // If there are no more character groups in this node, it means we could not
             // find a matching character for this depth, therefore there is no match.
@@ -330,7 +371,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
                 // char within a node, so either we found our match in this node, or there is
                 // no match and we can return NOT_VALID_WORD. So we will check all the characters
                 // in this character group indeed does match.
-                if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                if (FLAG_HAS_MULTIPLE_CHARS & flags) {
                     character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
                     while (NOT_A_CHARACTER != character) {
                         ++wordPos;
@@ -348,14 +389,13 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
                 // If we don't match the length AND don't have children, then a word in the
                 // dictionary fully matches a prefix of the searched word but not the full word.
                 ++wordPos;
-                if (UnigramDictionary::FLAG_IS_TERMINAL & flags) {
+                if (FLAG_IS_TERMINAL & flags) {
                     if (wordPos == length) {
                         return charGroupPos;
                     }
-                    pos = BinaryFormat::skipFrequency(UnigramDictionary::FLAG_IS_TERMINAL, pos);
+                    pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos);
                 }
-                if (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
-                        == (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)) {
+                if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
                     return NOT_VALID_WORD;
                 }
                 // We have children and we are still shorter than the word we are searching for, so
@@ -365,7 +405,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
                 break;
             } else {
                 // This chargroup does not match, so skip the remaining part and go to the next.
-                if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                if (FLAG_HAS_MULTIPLE_CHARS & flags) {
                     pos = BinaryFormat::skipOtherCharacters(root, pos);
                 }
                 pos = BinaryFormat::skipFrequency(flags, pos);
@@ -394,8 +434,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
  * outUnigramFrequency: a pointer to an int to write the frequency into.
  * Return value : the length of the word, of 0 if the word was not found.
  */
-inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address,
-        const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) {
+inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
+        const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
     int pos = 0;
     int wordPos = 0;
 
@@ -418,7 +458,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
                 // We found the address. Copy the rest of the word in the buffer and return
                 // the length.
                 outWord[wordPos] = character;
-                if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                if (FLAG_HAS_MULTIPLE_CHARS & flags) {
                     int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
                     // We count chars in order to avoid infinite loops if the file is broken or
                     // if there is some other bug
@@ -433,7 +473,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
             }
             // We need to skip past this char group, so skip any remaining chars after the
             // first and possibly the frequency.
-            if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+            if (FLAG_HAS_MULTIPLE_CHARS & flags) {
                 pos = skipOtherCharacters(root, pos);
             }
             pos = skipFrequency(flags, pos);
@@ -441,8 +481,8 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
             // The fact that this group has children is very important. Since we already know
             // that this group does not match, if it has no children we know it is irrelevant
             // to what we are searching for.
-            const bool hasChildren = (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
-                    (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
+            const bool hasChildren = (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
+                    (MASK_GROUP_ADDRESS_TYPE & flags));
             // We will write in `found' whether we have passed the children address we are
             // searching for. For example if we search for "beer", the children of b are less
             // than the address we are searching for and the children of c are greater. When we
@@ -482,7 +522,7 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
                             getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
                     // We copy all the characters in this group to the buffer
                     outWord[wordPos] = lastChar;
-                    if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
+                    if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
                         int32_t nextChar =
                                 getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
                         int charCount = maxDepth;
@@ -538,8 +578,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const
     // 0 for the bigram frequency represents the middle of the 16th step from the top,
     // while a value of 15 represents the middle of the top step.
     // See makedict.BinaryDictInputOutput for details.
-    const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
-    return (int)(unigramFreq + (bigramFreq + 1) * stepSize);
+    const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
+    return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize);
 }
 
 // This returns a probability in log space.
@@ -555,7 +595,5 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int,
         return backoff(unigramFreq);
     }
 }
-
 } // namespace latinime
-
 #endif // LATINIME_BINARY_FORMAT_H