diff options
Diffstat (limited to 'native/jni/src/utils/char_utils.h')
-rw-r--r-- | native/jni/src/utils/char_utils.h | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 239419d5b..f28ed5682 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -18,6 +18,7 @@ #define LATINIME_CHAR_UTILS_H #include <cctype> +#include <cstring> #include <vector> #include "defines.h" @@ -86,12 +87,32 @@ class CharUtils { return spaceCount; } + static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) { + return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; + } + static unsigned short latin_tolower(const unsigned short c); static const std::vector<int> EMPTY_STRING; + // Returns updated code point count. Returns 0 when the code points cannot be marked as a + // Beginning-of-Sentence. + static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints, + const int codePointCount, const int maxCodePoint) { + if (codePointCount >= maxCodePoint) { + // the code points cannot be marked as a Beginning-of-Sentence. + return 0; + } + memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount); + codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE; + return codePointCount + 1; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); + static const int MIN_UNICODE_CODE_POINT; + static const int MAX_UNICODE_CODE_POINT; + /** * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c |