aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/utils
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/utils')
-rw-r--r--native/jni/src/utils/byte_array_view.h15
-rw-r--r--native/jni/src/utils/char_utils.cpp11
-rw-r--r--native/jni/src/utils/char_utils.h37
-rw-r--r--native/jni/src/utils/int_array_view.h82
-rw-r--r--native/jni/src/utils/jni_data_utils.cpp91
-rw-r--r--native/jni/src/utils/jni_data_utils.h30
-rw-r--r--native/jni/src/utils/ngram_utils.h63
-rw-r--r--native/jni/src/utils/profiler.h86
8 files changed, 377 insertions, 38 deletions
diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h
index 2c97c6d58..2b778af6f 100644
--- a/native/jni/src/utils/byte_array_view.h
+++ b/native/jni/src/utils/byte_array_view.h
@@ -42,6 +42,13 @@ class ReadOnlyByteArrayView {
return mPtr;
}
+ AK_FORCE_INLINE const ReadOnlyByteArrayView skip(const size_t n) const {
+ if (mSize <= n) {
+ return ReadOnlyByteArrayView();
+ }
+ return ReadOnlyByteArrayView(mPtr + n, mSize - n);
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView);
@@ -77,10 +84,12 @@ class ReadWriteByteArrayView {
}
private:
- DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
+ // Default copy constructor and assignment operator are used for using this class with STL
+ // containers.
- uint8_t *const mPtr;
- const size_t mSize;
+ // These members cannot be const to have the assignment operator.
+ uint8_t *mPtr;
+ size_t mSize;
};
} // namespace latinime
diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp
index b17e0847d..a43e6dd62 100644
--- a/native/jni/src/utils/char_utils.cpp
+++ b/native/jni/src/utils/char_utils.cpp
@@ -1057,11 +1057,11 @@ static int compare_pair_capital(const void *a, const void *b) {
- static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital);
}
-/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) {
+/* static */ int CharUtils::latin_tolower(const int c) {
struct LatinCapitalSmallPair *p =
static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
- return p ? p->small : c;
+ return p ? static_cast<int>(p->small) : c;
}
/*
@@ -1117,7 +1117,9 @@ static int compare_pair_capital(const void *a, const void *b) {
// TODO: Check if it's really acceptable to consider ΓΈ a diacritical variant of o
/* U+0100 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
/* U+0108 */ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
- /* U+0110 */ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+ /* U+0110 */ 0x0046, 0x0064, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+ // U+0110: Manually changed from 0110 to 0046
+ // U+0111: Manually changed from 0111 to 0064
/* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
/* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
/* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
@@ -1135,6 +1137,9 @@ static int compare_pair_capital(const void *a, const void *b) {
/* U+0170 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
/* U+0178 */ 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073,
/* U+0180 */ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
+ // TODO: A lot of letters are their own base code points, but for
+ // some (e.g. U+0180) it doesn't seem right. Ideally each code point should
+ // be checked individually with all languages it's used in.
/* U+0188 */ 0x0188, 0x0189, 0x018A, 0x018B, 0x018C, 0x018D, 0x018E, 0x018F,
/* U+0190 */ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
/* U+0198 */ 0x0198, 0x0199, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 63786502b..7871c26ef 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -27,20 +27,14 @@ namespace latinime {
class CharUtils {
public:
+ static const std::vector<int> EMPTY_STRING;
+
static AK_FORCE_INLINE bool isAsciiUpper(int c) {
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
}
- static AK_FORCE_INLINE int toAsciiLower(int c) {
- return c - 'A' + 'a';
- }
-
- static AK_FORCE_INLINE bool isAscii(int c) {
- return isascii(c) != 0;
- }
-
static AK_FORCE_INLINE int toLowerCase(const int c) {
if (isAsciiUpper(c)) {
return toAsciiLower(c);
@@ -48,7 +42,7 @@ class CharUtils {
if (isAscii(c)) {
return c;
}
- return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
+ return latin_tolower(c);
}
static AK_FORCE_INLINE int toBaseLowerCase(const int c) {
@@ -59,7 +53,6 @@ class CharUtils {
// TODO: Do not hardcode here
return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
}
-
static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) {
int size = 0;
for (; size < arraySize; ++size) {
@@ -91,9 +84,6 @@ class CharUtils {
return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT;
}
- static unsigned short latin_tolower(const unsigned short c);
- static const std::vector<int> EMPTY_STRING;
-
// Returns updated code point count. Returns 0 when the code points cannot be marked as a
// Beginning-of-Sentence.
static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
@@ -111,6 +101,17 @@ class CharUtils {
return codePointCount + 1;
}
+ // Returns updated code point count.
+ static AK_FORCE_INLINE int removeBeginningOfSentenceMarker(int *const codePoints,
+ const int codePointCount) {
+ if (codePointCount <= 0 || codePoints[0] != CODE_POINT_BEGINNING_OF_SENTENCE) {
+ return codePointCount;
+ }
+ const int newCodePointCount = codePointCount - 1;
+ memmove(codePoints, codePoints + 1, sizeof(int) * newCodePointCount);
+ return newCodePointCount;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
@@ -125,6 +126,16 @@ class CharUtils {
*/
static const int BASE_CHARS_SIZE = 0x0500;
static const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+
+ static AK_FORCE_INLINE bool isAscii(int c) {
+ return isascii(c) != 0;
+ }
+
+ static AK_FORCE_INLINE int toAsciiLower(int c) {
+ return c - 'A' + 'a';
+ }
+
+ static int latin_tolower(const int c);
};
} // namespace latinime
#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
index c1ddc9812..e0f671056 100644
--- a/native/jni/src/utils/int_array_view.h
+++ b/native/jni/src/utils/int_array_view.h
@@ -17,8 +17,10 @@
#ifndef LATINIME_INT_ARRAY_VIEW_H
#define LATINIME_INT_ARRAY_VIEW_H
+#include <algorithm>
+#include <array>
#include <cstdint>
-#include <cstdlib>
+#include <cstring>
#include <vector>
#include "defines.h"
@@ -56,14 +58,14 @@ class IntArrayView {
explicit IntArrayView(const std::vector<int> &vector)
: mPtr(vector.data()), mSize(vector.size()) {}
- template <int N>
- AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
- return IntArrayView(array, N);
+ template <size_t N>
+ AK_FORCE_INLINE static IntArrayView fromArray(const std::array<int, N> &array) {
+ return IntArrayView(array.data(), array.size());
}
- // Returns a view that points one int object. Does not take ownership of the given object.
- AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
- return IntArrayView(object, 1);
+ // Returns a view that points one int object.
+ AK_FORCE_INLINE static IntArrayView singleElementView(const int *const ptr) {
+ return IntArrayView(ptr, 1);
}
AK_FORCE_INLINE int operator[](const size_t index) const {
@@ -91,6 +93,69 @@ class IntArrayView {
return mPtr + mSize;
}
+ AK_FORCE_INLINE bool contains(const int value) const {
+ return std::find(begin(), end(), value) != end();
+ }
+
+ // Returns the view whose size is smaller than or equal to the given count.
+ AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const {
+ return IntArrayView(mPtr, std::min(maxSize, mSize));
+ }
+
+ AK_FORCE_INLINE const IntArrayView skip(const size_t n) const {
+ if (mSize <= n) {
+ return IntArrayView();
+ }
+ return IntArrayView(mPtr + n, mSize - n);
+ }
+
+ template <size_t N>
+ void copyToArray(std::array<int, N> *const buffer, const size_t offset) const {
+ ASSERT(mSize + offset <= N);
+ memmove(buffer->data() + offset, mPtr, sizeof(int) * mSize);
+ }
+
+ AK_FORCE_INLINE int firstOrDefault(const int defaultValue) const {
+ if (empty()) {
+ return defaultValue;
+ }
+ return mPtr[0];
+ }
+
+ AK_FORCE_INLINE int lastOrDefault(const int defaultValue) const {
+ if (empty()) {
+ return defaultValue;
+ }
+ return mPtr[mSize - 1];
+ }
+
+ AK_FORCE_INLINE std::vector<int> toVector() const {
+ return std::vector<int>(begin(), end());
+ }
+
+ std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const {
+ if (limit <= 0) {
+ return std::vector<IntArrayView>();
+ }
+ std::vector<IntArrayView> result;
+ if (limit == 1) {
+ result.emplace_back(mPtr, mSize);
+ return result;
+ }
+ size_t startIndex = 0;
+ for (size_t i = 0; i < mSize; ++i) {
+ if (mPtr[i] == separator) {
+ result.emplace_back(mPtr + startIndex, i - startIndex);
+ startIndex = i + 1;
+ if (result.size() >= static_cast<size_t>(limit - 1)) {
+ break;
+ }
+ }
+ }
+ result.emplace_back(mPtr + startIndex, mSize - startIndex);
+ return result;
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
@@ -100,6 +165,9 @@ class IntArrayView {
using WordIdArrayView = IntArrayView;
using PtNodePosArrayView = IntArrayView;
+using CodePointArrayView = IntArrayView;
+template <size_t size>
+using WordIdArray = std::array<int, size>;
} // namespace latinime
#endif // LATINIME_MEMORY_VIEW_H
diff --git a/native/jni/src/utils/jni_data_utils.cpp b/native/jni/src/utils/jni_data_utils.cpp
index 5555293d5..41f0623d8 100644
--- a/native/jni/src/utils/jni_data_utils.cpp
+++ b/native/jni/src/utils/jni_data_utils.cpp
@@ -16,9 +16,100 @@
#include "utils/jni_data_utils.h"
+#include "utils/int_array_view.h"
+
namespace latinime {
const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD;
const int JniDataUtils::CODE_POINT_NULL = 0;
+/* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env,
+ const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
+ jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
+ jobject outNgramProbabilities, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) {
+ const CodePointArrayView codePoints = wordProperty.getCodePoints();
+ JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
+ MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(),
+ false /* needsNullTermination */);
+ const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty();
+ const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties();
+ jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(),
+ !ngrams.empty(), unigramProperty.hasShortcuts(),
+ unigramProperty.representsBeginningOfSentence()};
+ env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
+ const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo();
+ int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(),
+ historicalInfo.getLevel(), historicalInfo.getCount()};
+ env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
+ probabilityInfo);
+
+ jclass integerClass = env->FindClass("java/lang/Integer");
+ jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
+ jclass arrayListClass = env->FindClass("java/util/ArrayList");
+ jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+
+ // Output ngrams.
+ jclass intArrayClass = env->FindClass("[I");
+ for (const auto &ngramProperty : ngrams) {
+ const NgramContext *const ngramContext = ngramProperty.getNgramContext();
+ jobjectArray prevWordWordCodePointsArray = env->NewObjectArray(
+ ngramContext->getPrevWordCount(), intArrayClass, nullptr);
+ jbooleanArray prevWordIsBeginningOfSentenceArray =
+ env->NewBooleanArray(ngramContext->getPrevWordCount());
+ for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) {
+ const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1);
+ jintArray prevWordCodePoints = env->NewIntArray(codePoints.size());
+ JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */,
+ codePoints.size(), codePoints.data(), codePoints.size(),
+ false /* needsNullTermination */);
+ env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints);
+ env->DeleteLocalRef(prevWordCodePoints);
+ JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i,
+ ngramContext->isNthPrevWordBeginningOfSentence(i + 1));
+ }
+ env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray);
+ env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId,
+ prevWordIsBeginningOfSentenceArray);
+ env->DeleteLocalRef(prevWordWordCodePointsArray);
+ env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray);
+
+ const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints();
+ jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size());
+ JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */,
+ targetWordCodePoints->size(), targetWordCodePoints->data(),
+ targetWordCodePoints->size(), false /* needsNullTermination */);
+ env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray);
+ env->DeleteLocalRef(targetWordCodePointArray);
+
+ const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
+ int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
+ ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
+ ngramHistoricalInfo.getCount()};
+ jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
+ env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
+ NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
+ env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray);
+ env->DeleteLocalRef(bigramProbabilityInfoArray);
+ }
+
+ // Output shortcuts.
+ for (const auto &shortcut : unigramProperty.getShortcuts()) {
+ const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
+ jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
+ JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */,
+ targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(),
+ false /* needsNullTermination */);
+ env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
+ env->DeleteLocalRef(shortcutTargetCodePointArray);
+ jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
+ shortcut.getProbability());
+ env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
+ env->DeleteLocalRef(integerProbability);
+ }
+ env->DeleteLocalRef(integerClass);
+ env->DeleteLocalRef(arrayListClass);
+}
+
} // namespace latinime
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index cb82d3c3b..8024e34c4 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -20,10 +20,11 @@
#include <vector>
#include "defines.h"
+#include "dictionary/header/header_read_write_utils.h"
+#include "dictionary/interface/dictionary_header_structure_policy.h"
+#include "dictionary/property/ngram_context.h"
+#include "dictionary/property/word_property.h"
#include "jni.h"
-#include "suggest/core/session/prev_words_info.h"
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -50,6 +51,7 @@ class JniDataUtils {
const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
char keyChars[keyUtf8Length + 1];
env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
+ env->DeleteLocalRef(keyString);
keyChars[keyUtf8Length] = '\0';
DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
@@ -59,6 +61,7 @@ class JniDataUtils {
const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
char valueChars[valueUtf8Length + 1];
env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
+ env->DeleteLocalRef(valueString);
valueChars[valueUtf8Length] = '\0';
DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
@@ -96,18 +99,14 @@ class JniDataUtils {
}
}
- static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
- jbooleanArray isBeginningOfSentenceArray) {
+ static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+ jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
- jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays);
- for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) {
+ for (size_t i = 0; i < prevWordCount; ++i) {
prevWordCodePointCount[i] = 0;
isBeginningOfSentence[i] = false;
- if (prevWordsCount <= static_cast<int>(i)) {
- continue;
- }
jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
if (!prevWord) {
continue;
@@ -117,14 +116,15 @@ class JniDataUtils {
continue;
}
env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
+ env->DeleteLocalRef(prevWord);
prevWordCodePointCount[i] = prevWordLength;
jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
&isBeginningOfSentenceBoolean);
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
}
- return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
- MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+ prevWordCount);
}
static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
@@ -141,6 +141,12 @@ class JniDataUtils {
env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
}
+ static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
+ jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
+ jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
+ jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
+ jobject outShortcutProbabilities);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
diff --git a/native/jni/src/utils/ngram_utils.h b/native/jni/src/utils/ngram_utils.h
new file mode 100644
index 000000000..fa85ba35f
--- /dev/null
+++ b/native/jni/src/utils/ngram_utils.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_NGRAM_UTILS_H
+#define LATINIME_NGRAM_UTILS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+enum class NgramType : int {
+ Unigram = 0,
+ Bigram = 1,
+ Trigram = 2,
+ Quadgram = 3,
+ NotANgramType = -1,
+};
+
+namespace AllNgramTypes {
+// Use anonymous namespace to avoid ODR (One Definition Rule) violation.
+namespace {
+
+const NgramType ASCENDING[] = {
+ NgramType::Unigram, NgramType::Bigram, NgramType::Trigram
+};
+
+const NgramType DESCENDING[] = {
+ NgramType::Trigram, NgramType::Bigram, NgramType::Unigram
+};
+
+} // namespace
+} // namespace AllNgramTypes
+
+class NgramUtils final {
+ public:
+ static AK_FORCE_INLINE NgramType getNgramTypeFromWordCount(const int wordCount) {
+ // Max supported ngram is (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram.
+ if (wordCount <= 0 || wordCount > MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) {
+ return NgramType::NotANgramType;
+ }
+ // Convert word count to 0-origin enum value.
+ return static_cast<NgramType>(wordCount - 1);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NgramUtils);
+
+};
+}
+#endif /* LATINIME_NGRAM_UTILS_H */
diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h
new file mode 100644
index 000000000..5f107fed3
--- /dev/null
+++ b/native/jni/src/utils/profiler.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROFILER_H
+#define LATINIME_PROFILER_H
+
+#ifdef FLAG_DO_PROFILE
+
+#include "defines.h"
+
+#include <ctime>
+#include <unordered_map>
+
+namespace latinime {
+
+class Profiler final {
+ public:
+ Profiler(const clockid_t clockId)
+ : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(),
+ mCounters() {}
+
+ ~Profiler() {
+ const float totalTime =
+ static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f;
+ AKLOGI("Total time is %6.3f ms.", totalTime);
+ for (const auto &time : mTimes) {
+ AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first,
+ time.second / totalTime * 100.0f, time.second, mCounters[time.first]);
+ }
+ }
+
+ void startTimer(const int id) {
+ mStartTimes[id] = getTimeInMicroSec();
+ }
+
+ void endTimer(const int id) {
+ mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f;
+ mCounters[id]++;
+ }
+
+ operator bool() const { return false; }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler);
+
+ const clockid_t mClockId;
+ int64_t mStartTime;
+ std::unordered_map<int, int64_t> mStartTimes;
+ std::unordered_map<int, float> mTimes;
+ std::unordered_map<int, int> mCounters;
+
+ int64_t getTimeInMicroSec() {
+ timespec time;
+ clock_gettime(mClockId, &time);
+ return static_cast<int64_t>(time.tv_sec) * 1000000
+ + static_cast<int64_t>(time.tv_nsec) / 1000;
+ }
+};
+} // namespace latinime
+
+#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID)
+#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id)
+#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id)
+
+#else // FLAG_DO_PROFILE
+
+#define PROF_INIT
+#define PROF_TIMER_START(timer_id)
+#define PROF_TIMER_END(timer_id)
+
+#endif // FLAG_DO_PROFILE
+
+#endif /* LATINIME_PROFILER_H */