diff options
Diffstat (limited to 'native/jni/src/utils')
-rw-r--r-- | native/jni/src/utils/byte_array_view.h | 15 | ||||
-rw-r--r-- | native/jni/src/utils/char_utils.cpp | 11 | ||||
-rw-r--r-- | native/jni/src/utils/char_utils.h | 37 | ||||
-rw-r--r-- | native/jni/src/utils/int_array_view.h | 82 | ||||
-rw-r--r-- | native/jni/src/utils/jni_data_utils.cpp | 91 | ||||
-rw-r--r-- | native/jni/src/utils/jni_data_utils.h | 30 | ||||
-rw-r--r-- | native/jni/src/utils/ngram_utils.h | 63 | ||||
-rw-r--r-- | native/jni/src/utils/profiler.h | 86 |
8 files changed, 377 insertions, 38 deletions
diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h index 2c97c6d58..2b778af6f 100644 --- a/native/jni/src/utils/byte_array_view.h +++ b/native/jni/src/utils/byte_array_view.h @@ -42,6 +42,13 @@ class ReadOnlyByteArrayView { return mPtr; } + AK_FORCE_INLINE const ReadOnlyByteArrayView skip(const size_t n) const { + if (mSize <= n) { + return ReadOnlyByteArrayView(); + } + return ReadOnlyByteArrayView(mPtr + n, mSize - n); + } + private: DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView); @@ -77,10 +84,12 @@ class ReadWriteByteArrayView { } private: - DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView); + // Default copy constructor and assignment operator are used for using this class with STL + // containers. - uint8_t *const mPtr; - const size_t mSize; + // These members cannot be const to have the assignment operator. + uint8_t *mPtr; + size_t mSize; }; } // namespace latinime diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp index b17e0847d..a43e6dd62 100644 --- a/native/jni/src/utils/char_utils.cpp +++ b/native/jni/src/utils/char_utils.cpp @@ -1057,11 +1057,11 @@ static int compare_pair_capital(const void *a, const void *b) { - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital); } -/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) { +/* static */ int CharUtils::latin_tolower(const int c) { struct LatinCapitalSmallPair *p = static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP, NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital)); - return p ? p->small : c; + return p ? static_cast<int>(p->small) : c; } /* @@ -1117,7 +1117,9 @@ static int compare_pair_capital(const void *a, const void *b) { // TODO: Check if it's really acceptable to consider ΓΈ a diacritical variant of o /* U+0100 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, /* U+0108 */ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, - /* U+0110 */ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, + /* U+0110 */ 0x0046, 0x0064, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, + // U+0110: Manually changed from 0110 to 0046 + // U+0111: Manually changed from 0111 to 0064 /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, @@ -1135,6 +1137,9 @@ static int compare_pair_capital(const void *a, const void *b) { /* U+0170 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, /* U+0178 */ 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073, /* U+0180 */ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, + // TODO: A lot of letters are their own base code points, but for + // some (e.g. U+0180) it doesn't seem right. Ideally each code point should + // be checked individually with all languages it's used in. /* U+0188 */ 0x0188, 0x0189, 0x018A, 0x018B, 0x018C, 0x018D, 0x018E, 0x018F, /* U+0190 */ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, /* U+0198 */ 0x0198, 0x0199, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F, diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 63786502b..7871c26ef 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -27,20 +27,14 @@ namespace latinime { class CharUtils { public: + static const std::vector<int> EMPTY_STRING; + static AK_FORCE_INLINE bool isAsciiUpper(int c) { // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). return (c >= 'A' && c <= 'Z'); } - static AK_FORCE_INLINE int toAsciiLower(int c) { - return c - 'A' + 'a'; - } - - static AK_FORCE_INLINE bool isAscii(int c) { - return isascii(c) != 0; - } - static AK_FORCE_INLINE int toLowerCase(const int c) { if (isAsciiUpper(c)) { return toAsciiLower(c); @@ -48,7 +42,7 @@ class CharUtils { if (isAscii(c)) { return c; } - return static_cast<int>(latin_tolower(static_cast<unsigned short>(c))); + return latin_tolower(c); } static AK_FORCE_INLINE int toBaseLowerCase(const int c) { @@ -59,7 +53,6 @@ class CharUtils { // TODO: Do not hardcode here return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; } - static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) { int size = 0; for (; size < arraySize; ++size) { @@ -91,9 +84,6 @@ class CharUtils { return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; } - static unsigned short latin_tolower(const unsigned short c); - static const std::vector<int> EMPTY_STRING; - // Returns updated code point count. Returns 0 when the code points cannot be marked as a // Beginning-of-Sentence. static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints, @@ -111,6 +101,17 @@ class CharUtils { return codePointCount + 1; } + // Returns updated code point count. + static AK_FORCE_INLINE int removeBeginningOfSentenceMarker(int *const codePoints, + const int codePointCount) { + if (codePointCount <= 0 || codePoints[0] != CODE_POINT_BEGINNING_OF_SENTENCE) { + return codePointCount; + } + const int newCodePointCount = codePointCount - 1; + memmove(codePoints, codePoints + 1, sizeof(int) * newCodePointCount); + return newCodePointCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); @@ -125,6 +126,16 @@ class CharUtils { */ static const int BASE_CHARS_SIZE = 0x0500; static const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; + + static AK_FORCE_INLINE bool isAscii(int c) { + return isascii(c) != 0; + } + + static AK_FORCE_INLINE int toAsciiLower(int c) { + return c - 'A' + 'a'; + } + + static int latin_tolower(const int c); }; } // namespace latinime #endif // LATINIME_CHAR_UTILS_H diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index c1ddc9812..e0f671056 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -17,8 +17,10 @@ #ifndef LATINIME_INT_ARRAY_VIEW_H #define LATINIME_INT_ARRAY_VIEW_H +#include <algorithm> +#include <array> #include <cstdint> -#include <cstdlib> +#include <cstring> #include <vector> #include "defines.h" @@ -56,14 +58,14 @@ class IntArrayView { explicit IntArrayView(const std::vector<int> &vector) : mPtr(vector.data()), mSize(vector.size()) {} - template <int N> - AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) { - return IntArrayView(array, N); + template <size_t N> + AK_FORCE_INLINE static IntArrayView fromArray(const std::array<int, N> &array) { + return IntArrayView(array.data(), array.size()); } - // Returns a view that points one int object. Does not take ownership of the given object. - AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) { - return IntArrayView(object, 1); + // Returns a view that points one int object. + AK_FORCE_INLINE static IntArrayView singleElementView(const int *const ptr) { + return IntArrayView(ptr, 1); } AK_FORCE_INLINE int operator[](const size_t index) const { @@ -91,6 +93,69 @@ class IntArrayView { return mPtr + mSize; } + AK_FORCE_INLINE bool contains(const int value) const { + return std::find(begin(), end(), value) != end(); + } + + // Returns the view whose size is smaller than or equal to the given count. + AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const { + return IntArrayView(mPtr, std::min(maxSize, mSize)); + } + + AK_FORCE_INLINE const IntArrayView skip(const size_t n) const { + if (mSize <= n) { + return IntArrayView(); + } + return IntArrayView(mPtr + n, mSize - n); + } + + template <size_t N> + void copyToArray(std::array<int, N> *const buffer, const size_t offset) const { + ASSERT(mSize + offset <= N); + memmove(buffer->data() + offset, mPtr, sizeof(int) * mSize); + } + + AK_FORCE_INLINE int firstOrDefault(const int defaultValue) const { + if (empty()) { + return defaultValue; + } + return mPtr[0]; + } + + AK_FORCE_INLINE int lastOrDefault(const int defaultValue) const { + if (empty()) { + return defaultValue; + } + return mPtr[mSize - 1]; + } + + AK_FORCE_INLINE std::vector<int> toVector() const { + return std::vector<int>(begin(), end()); + } + + std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const { + if (limit <= 0) { + return std::vector<IntArrayView>(); + } + std::vector<IntArrayView> result; + if (limit == 1) { + result.emplace_back(mPtr, mSize); + return result; + } + size_t startIndex = 0; + for (size_t i = 0; i < mSize; ++i) { + if (mPtr[i] == separator) { + result.emplace_back(mPtr + startIndex, i - startIndex); + startIndex = i + 1; + if (result.size() >= static_cast<size_t>(limit - 1)) { + break; + } + } + } + result.emplace_back(mPtr + startIndex, mSize - startIndex); + return result; + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); @@ -100,6 +165,9 @@ class IntArrayView { using WordIdArrayView = IntArrayView; using PtNodePosArrayView = IntArrayView; +using CodePointArrayView = IntArrayView; +template <size_t size> +using WordIdArray = std::array<int, size>; } // namespace latinime #endif // LATINIME_MEMORY_VIEW_H diff --git a/native/jni/src/utils/jni_data_utils.cpp b/native/jni/src/utils/jni_data_utils.cpp index 5555293d5..41f0623d8 100644 --- a/native/jni/src/utils/jni_data_utils.cpp +++ b/native/jni/src/utils/jni_data_utils.cpp @@ -16,9 +16,100 @@ #include "utils/jni_data_utils.h" +#include "utils/int_array_view.h" + namespace latinime { const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD; const int JniDataUtils::CODE_POINT_NULL = 0; +/* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env, + const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags, + jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, + jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, + jobject outNgramProbabilities, jobject outShortcutTargets, + jobject outShortcutProbabilities) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, + MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(), + false /* needsNullTermination */); + const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty(); + const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties(); + jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(), + !ngrams.empty(), unigramProperty.hasShortcuts(), + unigramProperty.representsBeginningOfSentence()}; + env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); + const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo(); + int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(), + historicalInfo.getLevel(), historicalInfo.getCount()}; + env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), + probabilityInfo); + + jclass integerClass = env->FindClass("java/lang/Integer"); + jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V"); + jclass arrayListClass = env->FindClass("java/util/ArrayList"); + jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); + + // Output ngrams. + jclass intArrayClass = env->FindClass("[I"); + for (const auto &ngramProperty : ngrams) { + const NgramContext *const ngramContext = ngramProperty.getNgramContext(); + jobjectArray prevWordWordCodePointsArray = env->NewObjectArray( + ngramContext->getPrevWordCount(), intArrayClass, nullptr); + jbooleanArray prevWordIsBeginningOfSentenceArray = + env->NewBooleanArray(ngramContext->getPrevWordCount()); + for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) { + const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1); + jintArray prevWordCodePoints = env->NewIntArray(codePoints.size()); + JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */, + codePoints.size(), codePoints.data(), codePoints.size(), + false /* needsNullTermination */); + env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints); + env->DeleteLocalRef(prevWordCodePoints); + JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i, + ngramContext->isNthPrevWordBeginningOfSentence(i + 1)); + } + env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray); + env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId, + prevWordIsBeginningOfSentenceArray); + env->DeleteLocalRef(prevWordWordCodePointsArray); + env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray); + + const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints(); + jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size()); + JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */, + targetWordCodePoints->size(), targetWordCodePoints->data(), + targetWordCodePoints->size(), false /* needsNullTermination */); + env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray); + env->DeleteLocalRef(targetWordCodePointArray); + + const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo(); + int bigramProbabilityInfo[] = {ngramProperty.getProbability(), + ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(), + ngramHistoricalInfo.getCount()}; + jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); + env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, + NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); + env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray); + env->DeleteLocalRef(bigramProbabilityInfoArray); + } + + // Output shortcuts. + for (const auto &shortcut : unigramProperty.getShortcuts()) { + const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints(); + jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); + JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */, + targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(), + false /* needsNullTermination */); + env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); + env->DeleteLocalRef(shortcutTargetCodePointArray); + jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, + shortcut.getProbability()); + env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability); + env->DeleteLocalRef(integerProbability); + } + env->DeleteLocalRef(integerClass); + env->DeleteLocalRef(arrayListClass); +} + } // namespace latinime diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index cb82d3c3b..8024e34c4 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -20,10 +20,11 @@ #include <vector> #include "defines.h" +#include "dictionary/header/header_read_write_utils.h" +#include "dictionary/interface/dictionary_header_structure_policy.h" +#include "dictionary/property/ngram_context.h" +#include "dictionary/property/word_property.h" #include "jni.h" -#include "suggest/core/session/prev_words_info.h" -#include "suggest/core/policy/dictionary_header_structure_policy.h" -#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "utils/char_utils.h" namespace latinime { @@ -50,6 +51,7 @@ class JniDataUtils { const jsize keyUtf8Length = env->GetStringUTFLength(keyString); char keyChars[keyUtf8Length + 1]; env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); + env->DeleteLocalRef(keyString); keyChars[keyUtf8Length] = '\0'; DictionaryHeaderStructurePolicy::AttributeMap::key_type key; HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); @@ -59,6 +61,7 @@ class JniDataUtils { const jsize valueUtf8Length = env->GetStringUTFLength(valueString); char valueChars[valueUtf8Length + 1]; env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); + env->DeleteLocalRef(valueString); valueChars[valueUtf8Length] = '\0'; DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); @@ -96,18 +99,14 @@ class JniDataUtils { } } - static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays, - jbooleanArray isBeginningOfSentenceArray) { + static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays, + jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) { int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays); - for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) { + for (size_t i = 0; i < prevWordCount; ++i) { prevWordCodePointCount[i] = 0; isBeginningOfSentence[i] = false; - if (prevWordsCount <= static_cast<int>(i)) { - continue; - } jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); if (!prevWord) { continue; @@ -117,14 +116,15 @@ class JniDataUtils { continue; } env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); + env->DeleteLocalRef(prevWord); prevWordCodePointCount[i] = prevWordLength; jboolean isBeginningOfSentenceBoolean = JNI_FALSE; env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, &isBeginningOfSentenceBoolean); isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; } - return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, - MAX_PREV_WORD_COUNT_FOR_N_GRAM); + return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, + prevWordCount); } static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, @@ -141,6 +141,12 @@ class JniDataUtils { env->SetFloatArrayRegion(array, index, 1 /* len */, &value); } + static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty, + jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, + jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, + jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, + jobject outShortcutProbabilities); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); diff --git a/native/jni/src/utils/ngram_utils.h b/native/jni/src/utils/ngram_utils.h new file mode 100644 index 000000000..fa85ba35f --- /dev/null +++ b/native/jni/src/utils/ngram_utils.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_NGRAM_UTILS_H +#define LATINIME_NGRAM_UTILS_H + +#include "defines.h" + +namespace latinime { + +enum class NgramType : int { + Unigram = 0, + Bigram = 1, + Trigram = 2, + Quadgram = 3, + NotANgramType = -1, +}; + +namespace AllNgramTypes { +// Use anonymous namespace to avoid ODR (One Definition Rule) violation. +namespace { + +const NgramType ASCENDING[] = { + NgramType::Unigram, NgramType::Bigram, NgramType::Trigram +}; + +const NgramType DESCENDING[] = { + NgramType::Trigram, NgramType::Bigram, NgramType::Unigram +}; + +} // namespace +} // namespace AllNgramTypes + +class NgramUtils final { + public: + static AK_FORCE_INLINE NgramType getNgramTypeFromWordCount(const int wordCount) { + // Max supported ngram is (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram. + if (wordCount <= 0 || wordCount > MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) { + return NgramType::NotANgramType; + } + // Convert word count to 0-origin enum value. + return static_cast<NgramType>(wordCount - 1); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NgramUtils); + +}; +} +#endif /* LATINIME_NGRAM_UTILS_H */ diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h new file mode 100644 index 000000000..5f107fed3 --- /dev/null +++ b/native/jni/src/utils/profiler.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PROFILER_H +#define LATINIME_PROFILER_H + +#ifdef FLAG_DO_PROFILE + +#include "defines.h" + +#include <ctime> +#include <unordered_map> + +namespace latinime { + +class Profiler final { + public: + Profiler(const clockid_t clockId) + : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(), + mCounters() {} + + ~Profiler() { + const float totalTime = + static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f; + AKLOGI("Total time is %6.3f ms.", totalTime); + for (const auto &time : mTimes) { + AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first, + time.second / totalTime * 100.0f, time.second, mCounters[time.first]); + } + } + + void startTimer(const int id) { + mStartTimes[id] = getTimeInMicroSec(); + } + + void endTimer(const int id) { + mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f; + mCounters[id]++; + } + + operator bool() const { return false; } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler); + + const clockid_t mClockId; + int64_t mStartTime; + std::unordered_map<int, int64_t> mStartTimes; + std::unordered_map<int, float> mTimes; + std::unordered_map<int, int> mCounters; + + int64_t getTimeInMicroSec() { + timespec time; + clock_gettime(mClockId, &time); + return static_cast<int64_t>(time.tv_sec) * 1000000 + + static_cast<int64_t>(time.tv_nsec) / 1000; + } +}; +} // namespace latinime + +#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID) +#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id) +#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id) + +#else // FLAG_DO_PROFILE + +#define PROF_INIT +#define PROF_TIMER_START(timer_id) +#define PROF_TIMER_END(timer_id) + +#endif // FLAG_DO_PROFILE + +#endif /* LATINIME_PROFILER_H */ |