diff options
Diffstat (limited to 'native/jni/src/utils')
-rw-r--r-- | native/jni/src/utils/autocorrection_threshold_utils.cpp | 3 | ||||
-rw-r--r-- | native/jni/src/utils/char_utils.cpp | 8 | ||||
-rw-r--r-- | native/jni/src/utils/char_utils.h | 27 | ||||
-rw-r--r-- | native/jni/src/utils/jni_data_utils.cpp (renamed from native/jni/src/utils/hash_map_compat.h) | 22 | ||||
-rw-r--r-- | native/jni/src/utils/jni_data_utils.h | 151 | ||||
-rw-r--r-- | native/jni/src/utils/time_keeper.cpp | 41 | ||||
-rw-r--r-- | native/jni/src/utils/time_keeper.h | 41 |
7 files changed, 275 insertions, 18 deletions
diff --git a/native/jni/src/utils/autocorrection_threshold_utils.cpp b/native/jni/src/utils/autocorrection_threshold_utils.cpp index 1f8ee0814..349786a27 100644 --- a/native/jni/src/utils/autocorrection_threshold_utils.cpp +++ b/native/jni/src/utils/autocorrection_threshold_utils.cpp @@ -16,6 +16,7 @@ #include "utils/autocorrection_threshold_utils.h" +#include <algorithm> #include <cmath> #include "defines.h" @@ -99,7 +100,7 @@ const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2; const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX) : static_cast<float>(MAX_INITIAL_SCORE) * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), - static_cast<float>(min(beforeLength, afterLength - spaceCount))) + static_cast<float>(std::min(beforeLength, afterLength - spaceCount))) * static_cast<float>(FULL_WORD_MULTIPLIER); return (static_cast<float>(score) / maxScore) * weight; diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp index 0e7039610..b17e0847d 100644 --- a/native/jni/src/utils/char_utils.cpp +++ b/native/jni/src/utils/char_utils.cpp @@ -22,6 +22,9 @@ namespace latinime { +const int CharUtils::MIN_UNICODE_CODE_POINT = 0; +const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF; + struct LatinCapitalSmallPair { unsigned short capital; unsigned short small; @@ -1118,7 +1121,8 @@ static int compare_pair_capital(const void *a, const void *b) { /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, - /* U+0130 */ 0x0049, 0x0131, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B, + // U+0131: Manually changed from 0131 to 0049 + /* U+0130 */ 0x0049, 0x0049, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B, /* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, /* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, // U+0141: Manually changed from 0141 to 004C @@ -1273,4 +1277,6 @@ static int compare_pair_capital(const void *a, const void *b) { /* U+04F0 */ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04F6, 0x04F7, /* U+04F8 */ 0x042B, 0x044B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF, }; + +/* static */ const std::vector<int> CharUtils::EMPTY_STRING(1 /* size */, '\0' /* value */); } // namespace latinime diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 41663c81a..63786502b 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -18,6 +18,8 @@ #define LATINIME_CHAR_UTILS_H #include <cctype> +#include <cstring> +#include <vector> #include "defines.h" @@ -85,11 +87,36 @@ class CharUtils { return spaceCount; } + static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) { + return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; + } + static unsigned short latin_tolower(const unsigned short c); + static const std::vector<int> EMPTY_STRING; + + // Returns updated code point count. Returns 0 when the code points cannot be marked as a + // Beginning-of-Sentence. + static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints, + const int codePointCount, const int maxCodePoint) { + if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Marker has already been attached. + return codePointCount; + } + if (codePointCount >= maxCodePoint) { + // the code points cannot be marked as a Beginning-of-Sentence. + return 0; + } + memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount); + codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE; + return codePointCount + 1; + } private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); + static const int MIN_UNICODE_CODE_POINT; + static const int MAX_UNICODE_CODE_POINT; + /** * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c diff --git a/native/jni/src/utils/hash_map_compat.h b/native/jni/src/utils/jni_data_utils.cpp index a1e982bc4..5555293d5 100644 --- a/native/jni/src/utils/hash_map_compat.h +++ b/native/jni/src/utils/jni_data_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 The Android Open Source Project + * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,21 +14,11 @@ * limitations under the License. */ -#ifndef LATINIME_HASH_MAP_COMPAT_H -#define LATINIME_HASH_MAP_COMPAT_H +#include "utils/jni_data_utils.h" -// TODO: Use std::unordered_map that has been standardized in C++11 +namespace latinime { -#ifdef __APPLE__ -#include <ext/hash_map> -#else // __APPLE__ -#include <hash_map> -#endif // __APPLE__ +const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD; +const int JniDataUtils::CODE_POINT_NULL = 0; -#ifdef __SGI_STL_PORT -#define hash_map_compat stlport::hash_map -#else // __SGI_STL_PORT -#define hash_map_compat __gnu_cxx::hash_map -#endif // __SGI_STL_PORT - -#endif // LATINIME_HASH_MAP_COMPAT_H +} // namespace latinime diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h new file mode 100644 index 000000000..cb82d3c3b --- /dev/null +++ b/native/jni/src/utils/jni_data_utils.h @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_JNI_DATA_UTILS_H +#define LATINIME_JNI_DATA_UTILS_H + +#include <vector> + +#include "defines.h" +#include "jni.h" +#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "utils/char_utils.h" + +namespace latinime { + +class JniDataUtils { + public: + static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) { + if (!array) { + outVector->clear(); + return; + } + const jsize arrayLength = env->GetArrayLength(array); + outVector->resize(arrayLength); + env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data()); + } + + static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env, + jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) { + DictionaryHeaderStructurePolicy::AttributeMap attributeMap; + const int keyCount = env->GetArrayLength(attributeKeyStringArray); + for (int i = 0; i < keyCount; i++) { + jstring keyString = static_cast<jstring>( + env->GetObjectArrayElement(attributeKeyStringArray, i)); + const jsize keyUtf8Length = env->GetStringUTFLength(keyString); + char keyChars[keyUtf8Length + 1]; + env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); + keyChars[keyUtf8Length] = '\0'; + DictionaryHeaderStructurePolicy::AttributeMap::key_type key; + HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); + + jstring valueString = static_cast<jstring>( + env->GetObjectArrayElement(attributeValueStringArray, i)); + const jsize valueUtf8Length = env->GetStringUTFLength(valueString); + char valueChars[valueUtf8Length + 1]; + env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); + valueChars[valueUtf8Length] = '\0'; + DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; + HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); + attributeMap[key] = value; + } + return attributeMap; + } + + static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, + const int maxLength, const int *const codePoints, const int codePointCount, + const bool needsNullTermination) { + const int codePointBufSize = std::min(maxLength, codePointCount); + int outputCodePonts[codePointBufSize]; + int outputCodePointCount = 0; + for (int i = 0; i < codePointBufSize; ++i) { + const int codePoint = codePoints[i]; + int codePointToOutput = codePoint; + if (!CharUtils::isInUnicodeSpace(codePoint)) { + if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Just skip Beginning-of-Sentence marker. + continue; + } + codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; + } else if (codePoint >= 0x01 && codePoint <= 0x1F) { + // Control code. + codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; + } + outputCodePonts[outputCodePointCount++] = codePointToOutput; + } + env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, + outputCodePonts); + if (needsNullTermination && outputCodePointCount < maxLength) { + env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, + 1 /* len */, &CODE_POINT_NULL); + } + } + + static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays, + jbooleanArray isBeginningOfSentenceArray) { + int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; + int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays); + for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) { + prevWordCodePointCount[i] = 0; + isBeginningOfSentence[i] = false; + if (prevWordsCount <= static_cast<int>(i)) { + continue; + } + jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); + if (!prevWord) { + continue; + } + jsize prevWordLength = env->GetArrayLength(prevWord); + if (prevWordLength > MAX_WORD_LENGTH) { + continue; + } + env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); + prevWordCodePointCount[i] = prevWordLength; + jboolean isBeginningOfSentenceBoolean = JNI_FALSE; + env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, + &isBeginningOfSentenceBoolean); + isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; + } + return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, + MAX_PREV_WORD_COUNT_FOR_N_GRAM); + } + + static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, + const jboolean value) { + env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); + } + + static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { + env->SetIntArrayRegion(array, index, 1 /* len */, &value); + } + + static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index, + const float value) { + env->SetFloatArrayRegion(array, index, 1 /* len */, &value); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); + + static const int CODE_POINT_REPLACEMENT_CHARACTER; + static const int CODE_POINT_NULL; +}; +} // namespace latinime +#endif // LATINIME_JNI_DATA_UTILS_H diff --git a/native/jni/src/utils/time_keeper.cpp b/native/jni/src/utils/time_keeper.cpp new file mode 100644 index 000000000..026284060 --- /dev/null +++ b/native/jni/src/utils/time_keeper.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/time_keeper.h" + +#include <ctime> + +namespace latinime { + +int TimeKeeper::sCurrentTime; +bool TimeKeeper::sSetForTesting; + +/* static */ void TimeKeeper::setCurrentTime() { + if (!sSetForTesting) { + sCurrentTime = time(0); + } +} + +/* static */ void TimeKeeper::startTestModeWithForceCurrentTime(const int currentTime) { + sCurrentTime = currentTime; + sSetForTesting = true; +} + +/* static */ void TimeKeeper::stopTestMode() { + sSetForTesting = false; +} + +} // namespace latinime diff --git a/native/jni/src/utils/time_keeper.h b/native/jni/src/utils/time_keeper.h new file mode 100644 index 000000000..d066757e4 --- /dev/null +++ b/native/jni/src/utils/time_keeper.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TIME_KEEPER_H +#define LATINIME_TIME_KEEPER_H + +#include "defines.h" + +namespace latinime { + +class TimeKeeper { + public: + static void setCurrentTime(); + + static void startTestModeWithForceCurrentTime(const int currentTime); + + static void stopTestMode(); + + static int peekCurrentTime() { return sCurrentTime; }; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TimeKeeper); + + static int sCurrentTime; + static bool sSetForTesting; +}; +} // namespace latinime +#endif /* LATINIME_TIME_KEEPER_H */ |