aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/char_utils.cpp2
-rw-r--r--native/jni/src/char_utils.h4
2 files changed, 5 insertions, 1 deletions
diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp
index 9d886da31..d0547a982 100644
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/char_utils.cpp
@@ -88,6 +88,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
+ { 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
{ 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
{ 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
{ 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
@@ -219,6 +220,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE
{ 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE
{ 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I
+ { 0x0419, 0x0439 }, // CYRILLIC CAPITAL LETTER SHORT I
{ 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA
{ 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL
{ 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM
diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h
index b17f262ec..20cf2e8b5 100644
--- a/native/jni/src/char_utils.h
+++ b/native/jni/src/char_utils.h
@@ -23,7 +23,9 @@
namespace latinime {
inline static bool isAsciiUpper(unsigned short c) {
- return isupper(static_cast<int>(c)) != 0;
+ // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
+ // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
+ return (c >= 'A' && c <= 'Z');
}
inline static unsigned short toAsciiLower(unsigned short c) {