8 files changed, 377 insertions, 38 deletions
diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h
index 2c97c6d58..2b778af6f 100644
--- a/native/jni/src/utils/byte_array_view.h
+++ b/native/jni/src/utils/byte_array_view.h
@@ -42,6 +42,13 @@ class ReadOnlyByteArrayView {
         return mPtr;
     }
 
+    AK_FORCE_INLINE const ReadOnlyByteArrayView skip(const size_t n) const {
+        if (mSize <= n) {
+            return ReadOnlyByteArrayView();
+        }
+        return ReadOnlyByteArrayView(mPtr + n, mSize - n);
+    }
+
  private:
     DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView);
 
@@ -77,10 +84,12 @@ class ReadWriteByteArrayView {
     }
 
  private:
-    DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
+    // Default copy constructor and assignment operator are used for using this class with STL
+    // containers.
 
-    uint8_t *const mPtr;
-    const size_t mSize;
+    // These members cannot be const to have the assignment operator.
+    uint8_t *mPtr;
+    size_t mSize;
 };
 
 } // namespace latinime
diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp
index b17e0847d..a43e6dd62 100644
--- a/native/jni/src/utils/char_utils.cpp
+++ b/native/jni/src/utils/char_utils.cpp
@@ -1057,11 +1057,11 @@ static int compare_pair_capital(const void *a, const void *b) {
             - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
 }
 
-/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) {
+/* static */ int CharUtils::latin_tolower(const int c) {
     struct LatinCapitalSmallPair *p =
             static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
                     NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
-    return p ? p->small : c;
+    return p ? static_cast<int>(p->small) : c;
 }
 
 /*
@@ -1117,7 +1117,9 @@ static int compare_pair_capital(const void *a, const void *b) {
           // TODO: Check if it's really acceptable to consider ø a diacritical variant of o
     /* U+0100 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
     /* U+0108 */ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
-    /* U+0110 */ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+    /* U+0110 */ 0x0046, 0x0064, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+        // U+0110: Manually changed from 0110 to 0046
+        // U+0111: Manually changed from 0111 to 0064
     /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
     /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
     /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
@@ -1135,6 +1137,9 @@ static int compare_pair_capital(const void *a, const void *b) {
     /* U+0170 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
     /* U+0178 */ 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073,
     /* U+0180 */ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
+          // TODO: A lot of letters are their own base code points, but for
+          // some (e.g. U+0180) it doesn't seem right. Ideally each code point should
+          // be checked individually with all languages it's used in.
     /* U+0188 */ 0x0188, 0x0189, 0x018A, 0x018B, 0x018C, 0x018D, 0x018E, 0x018F,
     /* U+0190 */ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
     /* U+0198 */ 0x0198, 0x0199, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 63786502b..7871c26ef 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -27,20 +27,14 @@ namespace latinime {
 
 class CharUtils {
  public:
+    static const std::vector<int> EMPTY_STRING;
+
     static AK_FORCE_INLINE bool isAsciiUpper(int c) {
         // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
         // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
         return (c >= 'A' && c <= 'Z');
     }
 
-    static AK_FORCE_INLINE int toAsciiLower(int c) {
-        return c - 'A' + 'a';
-    }
-
-    static AK_FORCE_INLINE bool isAscii(int c) {
-        return isascii(c) != 0;
-    }
-
     static AK_FORCE_INLINE int toLowerCase(const int c) {
         if (isAsciiUpper(c)) {
             return toAsciiLower(c);
@@ -48,7 +42,7 @@ class CharUtils {
         if (isAscii(c)) {
             return c;
         }
-        return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
+        return latin_tolower(c);
     }
 
     static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
@@ -59,7 +53,6 @@ class CharUtils {
         // TODO: Do not hardcode here
         return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
     }
-
     static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
         int size = 0;
         for (; size < arraySize; ++size) {
@@ -91,9 +84,6 @@ class CharUtils {
         return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT;
     }
 
-    static unsigned short latin_tolower(const unsigned short c);
-    static const std::vector<int> EMPTY_STRING;
-
     // Returns updated code point count. Returns 0 when the code points cannot be marked as a
     // Beginning-of-Sentence.
     static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
@@ -111,6 +101,17 @@ class CharUtils {
         return codePointCount + 1;
     }
 
+    // Returns updated code point count.
+    static AK_FORCE_INLINE int removeBeginningOfSentenceMarker(int *const codePoints,
+            const int codePointCount) {
+        if (codePointCount <= 0 || codePoints[0] != CODE_POINT_BEGINNING_OF_SENTENCE) {
+            return codePointCount;
+        }
+        const int newCodePointCount = codePointCount - 1;
+        memmove(codePoints, codePoints + 1, sizeof(int) * newCodePointCount);
+        return newCodePointCount;
+    }
+
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
 
@@ -125,6 +126,16 @@ class CharUtils {
      */
     static const int BASE_CHARS_SIZE = 0x0500;
     static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+
+    static AK_FORCE_INLINE bool isAscii(int c) {
+        return isascii(c) != 0;
+    }
+
+    static AK_FORCE_INLINE int toAsciiLower(int c) {
+        return c - 'A' + 'a';
+    }
+
+    static int latin_tolower(const int c);
 };
 } // namespace latinime
 #endif // LATINIME_CHAR_UTILS_H
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
index c1ddc9812..e0f671056 100644
--- a/native/jni/src/utils/int_array_view.h
+++ b/native/jni/src/utils/int_array_view.h
@@ -17,8 +17,10 @@
 #ifndef LATINIME_INT_ARRAY_VIEW_H
 #define LATINIME_INT_ARRAY_VIEW_H
 
+#include <algorithm>
+#include <array>
 #include <cstdint>
-#include <cstdlib>
+#include <cstring>
 #include <vector>
 
 #include "defines.h"
@@ -56,14 +58,14 @@ class IntArrayView {
     explicit IntArrayView(const std::vector<int> &vector)
             : mPtr(vector.data()), mSize(vector.size()) {}
 
-    template <int N>
-    AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
-        return IntArrayView(array, N);
+    template <size_t N>
+    AK_FORCE_INLINE static IntArrayView fromArray(const std::array<int, N> &array) {
+        return IntArrayView(array.data(), array.size());
     }
 
-    // Returns a view that points one int object. Does not take ownership of the given object.
-    AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
-        return IntArrayView(object, 1);
+    // Returns a view that points one int object.
+    AK_FORCE_INLINE static IntArrayView singleElementView(const int *const ptr) {
+        return IntArrayView(ptr, 1);
     }
 
     AK_FORCE_INLINE int operator[](const size_t index) const {
@@ -91,6 +93,69 @@ class IntArrayView {
         return mPtr + mSize;
     }
 
+    AK_FORCE_INLINE bool contains(const int value) const {
+        return std::find(begin(), end(), value) != end();
+    }
+
+    // Returns the view whose size is smaller than or equal to the given count.
+    AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const {
+        return IntArrayView(mPtr, std::min(maxSize, mSize));
+    }
+
+    AK_FORCE_INLINE const IntArrayView skip(const size_t n) const {
+        if (mSize <= n) {
+            return IntArrayView();
+        }
+        return IntArrayView(mPtr + n, mSize - n);
+    }
+
+    template <size_t N>
+    void copyToArray(std::array<int, N> *const buffer, const size_t offset) const {
+        ASSERT(mSize + offset <= N);
+        memmove(buffer->data() + offset, mPtr, sizeof(int) * mSize);
+    }
+
+    AK_FORCE_INLINE int firstOrDefault(const int defaultValue) const {
+        if (empty()) {
+            return defaultValue;
+        }
+        return mPtr[0];
+    }
+
+    AK_FORCE_INLINE int lastOrDefault(const int defaultValue) const {
+        if (empty()) {
+            return defaultValue;
+        }
+        return mPtr[mSize - 1];
+    }
+
+    AK_FORCE_INLINE std::vector<int> toVector() const {
+        return std::vector<int>(begin(), end());
+    }
+
+    std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const {
+        if (limit <= 0) {
+            return std::vector<IntArrayView>();
+        }
+        std::vector<IntArrayView> result;
+        if (limit == 1) {
+            result.emplace_back(mPtr, mSize);
+            return result;
+        }
+        size_t startIndex = 0;
+        for (size_t i = 0; i < mSize; ++i) {
+            if (mPtr[i] == separator) {
+                result.emplace_back(mPtr + startIndex, i - startIndex);
+                startIndex = i + 1;
+                if (result.size() >= static_cast<size_t>(limit - 1)) {
+                    break;
+                }
+            }
+        }
+        result.emplace_back(mPtr + startIndex, mSize - startIndex);
+        return result;
+    }
+
  private:
     DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
 
@@ -100,6 +165,9 @@ class IntArrayView {
 
 using WordIdArrayView = IntArrayView;
 using PtNodePosArrayView = IntArrayView;
+using CodePointArrayView = IntArrayView;
+template <size_t size>
+using WordIdArray = std::array<int, size>;
 
 } // namespace latinime
 #endif // LATINIME_MEMORY_VIEW_H
diff --git a/native/jni/src/utils/jni_data_utils.cpp b/native/jni/src/utils/jni_data_utils.cpp
index 5555293d5..41f0623d8 100644
--- a/native/jni/src/utils/jni_data_utils.cpp
+++ b/native/jni/src/utils/jni_data_utils.cpp
@@ -16,9 +16,100 @@
 
 #include "utils/jni_data_utils.h"
 
+#include "utils/int_array_view.h"
+
 namespace latinime {
 
 const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD;
 const int JniDataUtils::CODE_POINT_NULL = 0;
 
+/* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env,
+        const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags,
+        jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
+        jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
+        jobject outNgramProbabilities, jobject outShortcutTargets,
+        jobject outShortcutProbabilities) {
+    const CodePointArrayView codePoints = wordProperty.getCodePoints();
+    JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
+            MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(),
+            false /* needsNullTermination */);
+    const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty();
+    const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties();
+    jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(),
+            !ngrams.empty(), unigramProperty.hasShortcuts(),
+            unigramProperty.representsBeginningOfSentence()};
+    env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
+    const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo();
+    int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(),
+            historicalInfo.getLevel(), historicalInfo.getCount()};
+    env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+            probabilityInfo);
+
+    jclass integerClass = env->FindClass("java/lang/Integer");
+    jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
+    jclass arrayListClass = env->FindClass("java/util/ArrayList");
+    jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+
+    // Output ngrams.
+    jclass intArrayClass = env->FindClass("[I");
+    for (const auto &ngramProperty : ngrams) {
+        const NgramContext *const ngramContext = ngramProperty.getNgramContext();
+        jobjectArray prevWordWordCodePointsArray = env->NewObjectArray(
+                ngramContext->getPrevWordCount(), intArrayClass, nullptr);
+        jbooleanArray prevWordIsBeginningOfSentenceArray =
+                env->NewBooleanArray(ngramContext->getPrevWordCount());
+        for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) {
+            const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1);
+            jintArray prevWordCodePoints = env->NewIntArray(codePoints.size());
+            JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */,
+                    codePoints.size(), codePoints.data(), codePoints.size(),
+                    false /* needsNullTermination */);
+            env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints);
+            env->DeleteLocalRef(prevWordCodePoints);
+            JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i,
+                    ngramContext->isNthPrevWordBeginningOfSentence(i + 1));
+        }
+        env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray);
+        env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId,
+                prevWordIsBeginningOfSentenceArray);
+        env->DeleteLocalRef(prevWordWordCodePointsArray);
+        env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray);
+
+        const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints();
+        jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size());
+        JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */,
+                targetWordCodePoints->size(), targetWordCodePoints->data(),
+                targetWordCodePoints->size(), false /* needsNullTermination */);
+        env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray);
+        env->DeleteLocalRef(targetWordCodePointArray);
+
+        const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
+        int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
+                ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
+                ngramHistoricalInfo.getCount()};
+        jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
+        env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
+                NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
+        env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray);
+        env->DeleteLocalRef(bigramProbabilityInfoArray);
+    }
+
+    // Output shortcuts.
+    for (const auto &shortcut : unigramProperty.getShortcuts()) {
+        const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
+        jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
+        JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */,
+                targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(),
+                false /* needsNullTermination */);
+        env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
+        env->DeleteLocalRef(shortcutTargetCodePointArray);
+        jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
+                shortcut.getProbability());
+        env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
+        env->DeleteLocalRef(integerProbability);
+    }
+    env->DeleteLocalRef(integerClass);
+    env->DeleteLocalRef(arrayListClass);
+}
+
 } // namespace latinime
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index cb82d3c3b..8024e34c4 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -20,10 +20,11 @@
 #include <vector>
 
 #include "defines.h"
+#include "dictionary/header/header_read_write_utils.h"
+#include "dictionary/interface/dictionary_header_structure_policy.h"
+#include "dictionary/property/ngram_context.h"
+#include "dictionary/property/word_property.h"
 #include "jni.h"
-#include "suggest/core/session/prev_words_info.h"
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
 #include "utils/char_utils.h"
 
 namespace latinime {
@@ -50,6 +51,7 @@ class JniDataUtils {
             const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
             char keyChars[keyUtf8Length + 1];
             env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
+            env->DeleteLocalRef(keyString);
             keyChars[keyUtf8Length] = '\0';
             DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
             HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
@@ -59,6 +61,7 @@ class JniDataUtils {
             const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
             char valueChars[valueUtf8Length + 1];
             env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
+            env->DeleteLocalRef(valueString);
             valueChars[valueUtf8Length] = '\0';
             DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
             HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
@@ -96,18 +99,14 @@ class JniDataUtils {
         }
     }
 
-    static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
-            jbooleanArray isBeginningOfSentenceArray) {
+    static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+            jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
         int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
         int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
         bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
-        jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays);
-        for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) {
+        for (size_t i = 0; i < prevWordCount; ++i) {
             prevWordCodePointCount[i] = 0;
             isBeginningOfSentence[i] = false;
-            if (prevWordsCount <= static_cast<int>(i)) {
-                continue;
-            }
             jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
             if (!prevWord) {
                 continue;
@@ -117,14 +116,15 @@ class JniDataUtils {
                 continue;
             }
             env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
+            env->DeleteLocalRef(prevWord);
             prevWordCodePointCount[i] = prevWordLength;
             jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
             env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
                     &isBeginningOfSentenceBoolean);
             isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
         }
-        return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
-                MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+        return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+                prevWordCount);
     }
 
     static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
@@ -141,6 +141,12 @@ class JniDataUtils {
         env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
     }
 
+    static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
+            jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
+            jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
+            jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
+            jobject outShortcutProbabilities);
+
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
 
diff --git a/native/jni/src/utils/ngram_utils.h b/native/jni/src/utils/ngram_utils.h
new file mode 100644
index 000000000..fa85ba35f
--- /dev/null
+++ b/native/jni/src/utils/ngram_utils.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NGRAM_UTILS_H
+#define LATINIME_NGRAM_UTILS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+enum class NgramType : int {
+    Unigram = 0,
+    Bigram = 1,
+    Trigram = 2,
+    Quadgram = 3,
+    NotANgramType = -1,
+};
+
+namespace AllNgramTypes {
+// Use anonymous namespace to avoid ODR (One Definition Rule) violation.
+namespace {
+
+const NgramType ASCENDING[] = {
+   NgramType::Unigram, NgramType::Bigram, NgramType::Trigram
+};
+
+const NgramType DESCENDING[] = {
+   NgramType::Trigram, NgramType::Bigram, NgramType::Unigram
+};
+
+}  // namespace
+}  // namespace AllNgramTypes
+
+class NgramUtils final {
+ public:
+    static AK_FORCE_INLINE NgramType getNgramTypeFromWordCount(const int wordCount) {
+        // Max supported ngram is (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram.
+        if (wordCount <= 0 || wordCount > MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) {
+            return NgramType::NotANgramType;
+        }
+        // Convert word count to 0-origin enum value.
+        return static_cast<NgramType>(wordCount - 1);
+    }
+
+ private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(NgramUtils);
+
+};
+}
+#endif /* LATINIME_NGRAM_UTILS_H */
diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h
new file mode 100644
index 000000000..5f107fed3
--- /dev/null
+++ b/native/jni/src/utils/profiler.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROFILER_H
+#define LATINIME_PROFILER_H
+
+#ifdef FLAG_DO_PROFILE
+
+#include "defines.h"
+
+#include <ctime>
+#include <unordered_map>
+
+namespace latinime {
+
+class Profiler final {
+ public:
+    Profiler(const clockid_t clockId)
+            : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(),
+              mCounters() {}
+
+    ~Profiler() {
+        const float totalTime =
+                static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f;
+        AKLOGI("Total time is %6.3f ms.", totalTime);
+        for (const auto &time : mTimes) {
+            AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first,
+                    time.second / totalTime * 100.0f, time.second, mCounters[time.first]);
+        }
+    }
+
+    void startTimer(const int id) {
+        mStartTimes[id] = getTimeInMicroSec();
+    }
+
+    void endTimer(const int id) {
+        mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f;
+        mCounters[id]++;
+    }
+
+    operator bool() const { return false; }
+
+ private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler);
+
+    const clockid_t mClockId;
+    int64_t mStartTime;
+    std::unordered_map<int, int64_t> mStartTimes;
+    std::unordered_map<int, float> mTimes;
+    std::unordered_map<int, int> mCounters;
+
+    int64_t getTimeInMicroSec() {
+        timespec time;
+        clock_gettime(mClockId, &time);
+        return static_cast<int64_t>(time.tv_sec) * 1000000
+                + static_cast<int64_t>(time.tv_nsec) / 1000;
+    }
+};
+} // namespace latinime
+
+#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID)
+#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id)
+#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id)
+
+#else // FLAG_DO_PROFILE
+
+#define PROF_INIT
+#define PROF_TIMER_START(timer_id)
+#define PROF_TIMER_END(timer_id)
+
+#endif // FLAG_DO_PROFILE
+
+#endif /* LATINIME_PROFILER_H */