20 files changed, 157 insertions, 148 deletions
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index c592542bd..ebe27994f 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -20,10 +20,10 @@
 
 #include "bigram_dictionary.h"
 
-#include "char_utils.h"
 #include "defines.h"
 #include "suggest/core/dictionary/binary_format.h"
 #include "suggest/core/dictionary/bloom_filter.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/dictionary/dictionary.h"
 
 namespace latinime {
@@ -52,7 +52,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
     int insertAt = 0;
     while (insertAt < MAX_RESULTS) {
         if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability
-                && length < getCodePointCount(MAX_WORD_LENGTH,
+                && length < CharUtils::getCodePointCount(MAX_WORD_LENGTH,
                         bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
             break;
         }
@@ -196,9 +196,9 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons
     // what user typed.
 
     int maxAlt = MAX_ALTERNATIVES;
-    const int firstBaseLowerCodePoint = toBaseLowerCase(*word);
+    const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
     while (maxAlt > 0) {
-        if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
+        if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
             return true;
         }
         inputCodePoints++;
diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h
deleted file mode 100644
index b429f40b2..000000000
--- a/native/jni/src/char_utils.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_CHAR_UTILS_H
-#define LATINIME_CHAR_UTILS_H
-
-#include <cctype>
-
-#include "defines.h"
-
-namespace latinime {
-
-inline static bool isAsciiUpper(int c) {
-    // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
-    // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
-    return (c >= 'A' && c <= 'Z');
-}
-
-inline static int toAsciiLower(int c) {
-    return c - 'A' + 'a';
-}
-
-inline static bool isAscii(int c) {
-    return isascii(c) != 0;
-}
-
-unsigned short latin_tolower(const unsigned short c);
-
-/**
- * Table mapping most combined Latin, Greek, and Cyrillic characters
- * to their base characters.  If c is in range, BASE_CHARS[c] == c
- * if c is not a combined character, or the base character if it
- * is combined.
- */
-static const int BASE_CHARS_SIZE = 0x0500;
-extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
-
-inline static int toBaseCodePoint(int c) {
-    if (c < BASE_CHARS_SIZE) {
-        return static_cast<int>(BASE_CHARS[c]);
-    }
-    return c;
-}
-
-AK_FORCE_INLINE static int toLowerCase(const int c) {
-    if (isAsciiUpper(c)) {
-        return toAsciiLower(c);
-    }
-    if (isAscii(c)) {
-        return c;
-    }
-    return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
-}
-
-AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
-    return toLowerCase(toBaseCodePoint(c));
-}
-
-inline static bool isIntentionalOmissionCodePoint(const int codePoint) {
-    // TODO: Do not hardcode here
-    return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
-}
-
-inline static int getCodePointCount(const int arraySize, const int *const codePoints) {
-    int size = 0;
-    for (; size < arraySize; ++size) {
-        if (codePoints[size] == '\0') {
-            break;
-        }
-    }
-    return size;
-}
-
-} // namespace latinime
-#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index e2ad557c5..3dc2f3748 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -18,9 +18,9 @@
 
 #include <cmath>
 
-#include "char_utils.h"
 #include "correction.h"
 #include "defines.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/proximity_info_state.h"
 #include "suggest/core/layout/touch_position_correction_utils.h"
 #include "suggest/policyimpl/utils/edit_distance.h"
@@ -528,7 +528,7 @@ inline static int getQuoteCount(const int *word, const int length) {
 }
 
 inline static bool isUpperCase(unsigned short c) {
-    return isAsciiUpper(toBaseCodePoint(c));
+    return CharUtils::isAsciiUpper(CharUtils::toBaseCodePoint(c));
 }
 
 //////////////////////
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index 75b49952c..3f60d48cf 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -21,6 +21,7 @@
 
 #include "correction_state.h"
 #include "defines.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/proximity_info_state.h"
 
 namespace latinime {
@@ -342,13 +343,13 @@ AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, cons
     const int *const prevprev =
             outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
     current[0] = outputLength;
-    const int co = toBaseLowerCase(output[outputLength - 1]);
-    const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
+    const int co = CharUtils::toBaseLowerCase(output[outputLength - 1]);
+    const int prevCO = outputLength >= 2 ? CharUtils::toBaseLowerCase(output[outputLength - 2]) : 0;
     for (int i = 1; i <= inputSize; ++i) {
-        const int ci = toBaseLowerCase(input[i - 1]);
+        const int ci = CharUtils::toBaseLowerCase(input[i - 1]);
         const int cost = (ci == co) ? 0 : 1;
         current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
-        if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
+        if (i >= 2 && prevprev && ci == prevCO && co == CharUtils::toBaseLowerCase(input[i - 2])) {
             current[i] = min(current[i], prevprev[i - 2] + 1);
         }
     }
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 1510e3d5e..14bd2d57a 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -17,12 +17,12 @@
 #ifndef LATINIME_DIC_NODE_H
 #define LATINIME_DIC_NODE_H
 
-#include "char_utils.h"
 #include "defines.h"
 #include "suggest/core/dicnode/dic_node_state.h"
 #include "suggest/core/dicnode/dic_node_profiler.h"
 #include "suggest/core/dicnode/dic_node_properties.h"
 #include "suggest/core/dicnode/dic_node_release_listener.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/dictionary/digraph_utils.h"
 
 #if DEBUG_DICT
@@ -221,7 +221,7 @@ class DicNode {
 
     bool isFirstCharUppercase() const {
         const int c = getOutputWordBuf()[0];
-        return isAsciiUpper(c);
+        return CharUtils::isAsciiUpper(c);
     }
 
     bool isFirstWord() const {
@@ -375,7 +375,7 @@ class DicNode {
     // Whether the current codepoint can be an intentional omission, in which case the traversal
     // algorithm will always check for a possible omission here.
     bool canBeIntentionalOmission() const {
-        return isIntentionalOmissionCodePoint(getNodeCodePoint());
+        return CharUtils::isIntentionalOmissionCodePoint(getNodeCodePoint());
     }
 
     // Whether the omission is so frequent that it should incur zero cost.
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 7f0d0ed0e..c754a5ec2 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -21,6 +21,7 @@
 #include "suggest/core/dicnode/dic_node_utils.h"
 #include "suggest/core/dicnode/dic_node_vector.h"
 #include "suggest/core/dictionary/binary_format.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/dictionary/multi_bigram_map.h"
 #include "suggest/core/layout/proximity_info.h"
 #include "suggest/core/layout/proximity_info_state.h"
@@ -62,9 +63,9 @@ namespace latinime {
         DicNodeVector *childDicNodes) {
     // Passing multiple chars node. No need to traverse child
     const int codePoint = dicNode->getNodeTypedCodePoint();
-    const int baseLowerCaseCodePoint = toBaseLowerCase(codePoint);
+    const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint);
     const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
-    if (isMatch || isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
+    if (isMatch || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
         childDicNodes->pushPassingChild(dicNode);
     }
 }
@@ -125,13 +126,13 @@ namespace latinime {
         return false;
     }
     if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
-            || isIntentionalOmissionCodePoint(nodeCodePoint))) {
+            || CharUtils::isIntentionalOmissionCodePoint(nodeCodePoint))) {
         // If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
         // filtered.
         return false;
     }
-    const int lowerCodePoint = toLowerCase(nodeCodePoint);
-    const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
+    const int lowerCodePoint = CharUtils::toLowerCase(nodeCodePoint);
+    const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
     // TODO: Avoid linear search
     for (int i = 0; i < filterSize; ++i) {
         // Checking if a normalized code point is in filter characters when pInfo is not
diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h
index 65c2e9115..ef9fd3785 100644
--- a/native/jni/src/suggest/core/dictionary/binary_format.h
+++ b/native/jni/src/suggest/core/dictionary/binary_format.h
@@ -21,9 +21,9 @@
 #include <map>
 #include <stdint.h>
 
-#include "char_utils.h"
 #include "hash_map_compat.h"
 #include "suggest/core/dictionary/bloom_filter.h"
+#include "suggest/core/dictionary/char_utils.h"
 
 namespace latinime {
 
@@ -473,7 +473,8 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
         // there was no match (or we would have found it).
         if (wordPos >= length) return NOT_VALID_WORD;
         int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
-        const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
+        const int wChar = forceLowerCaseSearch
+                ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
         while (true) {
             // If there are no more character groups in this node, it means we could not
             // find a matching character for this depth, therefore there is no match.
diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/suggest/core/dictionary/char_utils.cpp
index e219beb62..8d40e54c9 100644
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/char_utils.cpp
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
+#include "suggest/core/dictionary/char_utils.h"
+
 #include <cstdlib>
 
-#include "char_utils.h"
 #include "defines.h"
 
 namespace latinime {
@@ -36,8 +37,7 @@ struct LatinCapitalSmallPair {
  *    $ apt-get install libicu-dev
  *
  * 3. Build the following code
- *    (You need this file, char_utils.h, and defines.h)
- *    $ g++ -o char_utils -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc
+ *    $ g++ -o char_utils -I../../.. -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc
  */
 #ifdef UPDATING_CHAR_UTILS
 #include <stdio.h>
@@ -47,7 +47,7 @@ extern "C" int main() {
     for (unsigned short c = 0; c < 0xFFFF; c++) {
         if (c <= 0x7F) continue;
         const unsigned short icu4cLowerC = u_tolower(c);
-        const unsigned short myLowerC = latin_tolower(c);
+        const unsigned short myLowerC = CharUtils::latin_tolower(c);
         if (c != icu4cLowerC) {
 #ifdef CONFIRMING_CHAR_UTILS
             if (icu4cLowerC != myLowerC) {
@@ -70,7 +70,8 @@ extern "C" int main() {
  *
  * 5. Update the SORTED_CHAR_MAP[] array below with the output above.
  *    Then, rebuild with -DCONFIRMING_CHAR_UTILS and confirm the program exits successfully.
- *    $ g++ -o char_utils -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp -licuuc
+ *    $ g++ -o char_utils -I../../.. -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp \
+ *      -licuuc
  *    $ ./char_utils
  *    $
  */
@@ -1054,7 +1055,7 @@ static int compare_pair_capital(const void *a, const void *b) {
             - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
 }
 
-unsigned short latin_tolower(const unsigned short c) {
+/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) {
     struct LatinCapitalSmallPair *p =
             static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
                     NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
@@ -1063,7 +1064,7 @@ unsigned short latin_tolower(const unsigned short c) {
 
 /*
  * Table mapping most combined Latin, Greek, and Cyrillic characters
- * to their base characters.  If c is in range, BASE_CHARS[c] == c
+ * to their base characters.  If c is in range, CharUtils::BASE_CHARS[c] == c
  * if c is not a combined character, or the base character if it
  * is combined.
  *
@@ -1074,7 +1075,7 @@ unsigned short latin_tolower(const unsigned short c) {
  *   for ($j = $i; $j < $i + 8; $j++) { \
  *   printf("0x%04X, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
  */
-const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
+/* static */ const unsigned short CharUtils::BASE_CHARS[CharUtils::BASE_CHARS_SIZE] = {
     /* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
     /* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
     /* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
diff --git a/native/jni/src/suggest/core/dictionary/char_utils.h b/native/jni/src/suggest/core/dictionary/char_utils.h
new file mode 100644
index 000000000..2e735a81c
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/char_utils.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_CHAR_UTILS_H
+#define LATINIME_CHAR_UTILS_H
+
+#include <cctype>
+
+#include "defines.h"
+
+namespace latinime {
+
+class CharUtils {
+ public:
+    static AK_FORCE_INLINE bool isAsciiUpper(int c) {
+        // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
+        // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
+        return (c >= 'A' && c <= 'Z');
+    }
+
+    static AK_FORCE_INLINE int toAsciiLower(int c) {
+        return c - 'A' + 'a';
+    }
+
+    static AK_FORCE_INLINE bool isAscii(int c) {
+        return isascii(c) != 0;
+    }
+
+    static AK_FORCE_INLINE int toLowerCase(const int c) {
+        if (isAsciiUpper(c)) {
+            return toAsciiLower(c);
+        }
+        if (isAscii(c)) {
+            return c;
+        }
+        return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
+    }
+
+    static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
+        return toLowerCase(toBaseCodePoint(c));
+    }
+
+    static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoint) {
+        // TODO: Do not hardcode here
+        return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
+    }
+
+    static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
+        int size = 0;
+        for (; size < arraySize; ++size) {
+            if (codePoints[size] == '\0') {
+                break;
+            }
+        }
+        return size;
+    }
+
+    static AK_FORCE_INLINE int toBaseCodePoint(int c) {
+        if (c < BASE_CHARS_SIZE) {
+            return static_cast<int>(BASE_CHARS[c]);
+        }
+        return c;
+    }
+
+    static unsigned short latin_tolower(const unsigned short c);
+
+ private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
+
+    /**
+     * Table mapping most combined Latin, Greek, and Cyrillic characters
+     * to their base characters.  If c is in range, BASE_CHARS[c] == c
+     * if c is not a combined character, or the base character if it
+     * is combined.
+     */
+    static const int BASE_CHARS_SIZE = 0x0500;
+    static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+};
+} // namespace latinime
+#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index 7a0f755e5..e8cdd5352 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -16,9 +16,9 @@
 
 #include "suggest/core/dictionary/digraph_utils.h"
 
-#include "char_utils.h"
 #include "defines.h"
 #include "suggest/core/dictionary/binary_format.h"
+#include "suggest/core/dictionary/char_utils.h"
 
 namespace latinime {
 
@@ -122,7 +122,7 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
         const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
     const DigraphUtils::digraph_t *digraphs = 0;
-    const int compositeGlyphLowerCodePoint = toLowerCase(compositeGlyphCodePoint);
+    const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint);
     const int digraphsSize =
             DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
     for (int i = 0; i < digraphsSize; i++) {
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index 6dd88051c..0b5d71a43 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -21,9 +21,9 @@
 #include <cstring>
 #include <cmath>
 
-#include "char_utils.h"
 #include "defines.h"
 #include "jni.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/additional_proximity_chars.h"
 #include "suggest/core/layout/geometry_utils.h"
 #include "suggest/core/layout/proximity_info_params.h"
@@ -165,7 +165,7 @@ void ProximityInfo::initializeG() {
     // TODO: Optimize
     for (int i = 0; i < KEY_COUNT; ++i) {
         const int code = mKeyCodePoints[i];
-        const int lowerCode = toLowerCase(code);
+        const int lowerCode = CharUtils::toLowerCase(code);
         mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
         mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
         mCodeToKeyMap[lowerCode] = i;
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index 2bd3ceb7e..412d5508b 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "defines.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/geometry_utils.h"
 #include "suggest/core/layout/proximity_info.h"
 #include "suggest/core/layout/proximity_info_state_utils.h"
@@ -175,7 +176,7 @@ float ProximityInfoState::getPointToKeyLength(
         const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
         return min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength);
     }
-    if (isIntentionalOmissionCodePoint(codePoint)) {
+    if (CharUtils::isIntentionalOmissionCodePoint(codePoint)) {
         return 0.0f;
     }
     // If the char is not a key on the keyboard then return the max length.
@@ -203,7 +204,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
         const bool checkProximityChars, int *proximityIndex) const {
     const int *currentCodePoints = getProximityCodePointsAt(index);
     const int firstCodePoint = currentCodePoints[0];
-    const int baseLowerC = toBaseLowerCase(codePoint);
+    const int baseLowerC = CharUtils::toBaseLowerCase(codePoint);
 
     // The first char in the array is what user typed. If it matches right away, that means the
     // user typed that same char for this pos.
@@ -215,7 +216,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co
 
     // If the non-accented, lowercased version of that first character matches c, then we have a
     // non-accented version of the accented character the user typed. Treat it as a close char.
-    if (toBaseLowerCase(firstCodePoint) == baseLowerC) {
+    if (CharUtils::toBaseLowerCase(firstCodePoint) == baseLowerC) {
         return PROXIMITY_CHAR;
     }
 
@@ -257,8 +258,8 @@ ProximityType ProximityInfoState::getProximityTypeG(const int index, const int c
     if (!isUsed()) {
         return UNRELATED_CHAR;
     }
-    const int lowerCodePoint = toLowerCase(codePoint);
-    const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
+    const int lowerCodePoint = CharUtils::toLowerCase(codePoint);
+    const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
     for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
         if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
                 || mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index fd09307fe..a971294e3 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -20,7 +20,6 @@
 #include <cstring> // for memset()
 #include <vector>
 
-#include "char_utils.h"
 #include "defines.h"
 #include "hash_map_compat.h"
 #include "suggest/core/layout/proximity_info_params.h"
diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h
index c3a275b3c..3588f4df8 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h
@@ -19,9 +19,9 @@
 
 #include <cmath>
 
-#include "char_utils.h"
 #include "defines.h"
 #include "hash_map_compat.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/additional_proximity_chars.h"
 #include "suggest/core/layout/geometry_utils.h"
 
@@ -37,7 +37,7 @@ class ProximityInfoUtils {
         if (c == NOT_A_CODE_POINT) {
             return NOT_AN_INDEX;
         }
-        const int lowerCode = toLowerCase(c);
+        const int lowerCode = CharUtils::toLowerCase(c);
         hash_map_compat<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
         if (mapPos != codeToKeyMap->end()) {
             return mapPos->second;
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index d01531f07..7ad568e25 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -16,7 +16,6 @@
 
 #include "suggest/core/policy/weighting.h"
 
-#include "char_utils.h"
 #include "defines.h"
 #include "suggest/core/dicnode/dic_node.h"
 #include "suggest/core/dicnode/dic_node_profiler.h"
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 94441877a..720222363 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -16,7 +16,6 @@
 
 #include "suggest/core/suggest.h"
 
-#include "char_utils.h"
 #include "suggest/core/dicnode/dic_node.h"
 #include "suggest/core/dicnode/dic_node_priority_queue.h"
 #include "suggest/core/dicnode/dic_node_vector.h"
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index b212fe101..e0664185c 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -19,10 +19,10 @@
 
 #include <stdint.h>
 
-#include "char_utils.h"
 #include "defines.h"
 #include "suggest/core/dicnode/dic_node.h"
 #include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/proximity_info_state.h"
 #include "suggest/core/policy/traversal.h"
 #include "suggest/core/session/dic_traverse_session.h"
@@ -64,9 +64,9 @@ class TypingTraversal : public Traversal {
         }
         const int point0Index = dicNode->getInputIndex(0);
         const int currentBaseLowerCodePoint =
-                toBaseLowerCase(childDicNode->getNodeCodePoint());
+                CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
         const int typedBaseLowerCodePoint =
-                toBaseLowerCase(traverseSession->getProximityInfoState(0)
+                CharUtils::toBaseLowerCase(traverseSession->getProximityInfoState(0)
                         ->getPrimaryCodePointAt(point0Index));
         return (currentBaseLowerCodePoint != typedBaseLowerCodePoint);
     }
@@ -172,7 +172,7 @@ class TypingTraversal : public Traversal {
         }
         const int c = dicNode->getOutputWordBuf()[0];
         const bool shortCappedWord = dicNode->getDepth()
-                < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && isAsciiUpper(c);
+                < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && CharUtils::isAsciiUpper(c);
         return !shortCappedWord
                 || probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
     }
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index cb6abd574..9e0c0d2f7 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -19,6 +19,7 @@
 
 #include "defines.h"
 #include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/layout/touch_position_correction_utils.h"
 #include "suggest/core/policy/weighting.h"
 #include "suggest/core/session/dic_traverse_session.h"
@@ -98,9 +99,9 @@ class TypingWeighting : public Weighting {
     bool isProximityDicNode(const DicTraverseSession *const traverseSession,
             const DicNode *const dicNode) const {
         const int pointIndex = dicNode->getInputIndex(0);
-        const int primaryCodePoint = toBaseLowerCase(
+        const int primaryCodePoint = CharUtils::toBaseLowerCase(
                 traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex));
-        const int dicNodeChar = toBaseLowerCase(dicNode->getNodeCodePoint());
+        const int dicNodeChar = CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint());
         return primaryCodePoint != dicNodeChar;
     }
 
diff --git a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
index ec1457455..09f986adf 100644
--- a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
+++ b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h
@@ -17,7 +17,7 @@
 #ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
 #define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H
 
-#include "char_utils.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/policyimpl/utils/edit_distance_policy.h"
 
 namespace latinime {
@@ -31,8 +31,8 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
     ~DamerauLevenshteinEditDistancePolicy() {}
 
     AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const {
-        const int c0 = toBaseLowerCase(mString0[index0]);
-        const int c1 = toBaseLowerCase(mString1[index1]);
+        const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
+        const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
         return (c0 == c1) ? 0.0f : 1.0f;
     }
 
@@ -45,10 +45,10 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy {
     }
 
     AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const {
-        const int c0 = toBaseLowerCase(mString0[index0]);
-        const int c1 = toBaseLowerCase(mString1[index1]);
-        if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1])
-                && c1 == toBaseLowerCase(mString0[index0 - 1])) {
+        const int c0 = CharUtils::toBaseLowerCase(mString0[index0]);
+        const int c1 = CharUtils::toBaseLowerCase(mString1[index1]);
+        if (index0 > 0 && index1 > 0 && c0 == CharUtils::toBaseLowerCase(mString1[index1 - 1])
+                && c1 == CharUtils::toBaseLowerCase(mString0[index0 - 1])) {
             return true;
         }
         return false;
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 1133256c4..66a8b8542 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -18,9 +18,9 @@
 
 #define LOG_TAG "LatinIME: unigram_dictionary.cpp"
 
-#include "char_utils.h"
 #include "defines.h"
 #include "suggest/core/dictionary/binary_format.h"
+#include "suggest/core/dictionary/char_utils.h"
 #include "suggest/core/dictionary/dictionary.h"
 #include "suggest/core/dictionary/digraph_utils.h"
 #include "suggest/core/dictionary/terminal_attributes.h"
@@ -696,8 +696,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
     const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
     int pos = startPos;
     int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
-    int baseChar = toBaseLowerCase(codePoint);
-    const int wChar = toBaseLowerCase(inWord[startInputIndex]);
+    int baseChar = CharUtils::toBaseLowerCase(codePoint);
+    const int wChar = CharUtils::toBaseLowerCase(inWord[startInputIndex]);
 
     if (baseChar != wChar) {
         *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -709,8 +709,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
     if (hasMultipleChars) {
         codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
         while (NOT_A_CODE_POINT != codePoint) {
-            baseChar = toBaseLowerCase(codePoint);
-            if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+            baseChar = CharUtils::toBaseLowerCase(codePoint);
+            if (inputIndex + 1 >= inputSize
+                    || CharUtils::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
                 *outPos = BinaryFormat::skipOtherCharacters(root, pos);
                 *outInputIndex = startInputIndex;
                 return false;