diff options
Diffstat (limited to 'native')
9 files changed, 191 insertions, 95 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index b856718c6..33b6a6f1b 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -176,26 +176,26 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j } static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict, - jintArray wordArray) { + jintArray word) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return 0; - const jsize codePointLength = env->GetArrayLength(wordArray); - int codePoints[codePointLength]; - env->GetIntArrayRegion(wordArray, 0, codePointLength, codePoints); - return dictionary->getProbability(codePoints, codePointLength); + const jsize wordLength = env->GetArrayLength(word); + int codePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, codePoints); + return dictionary->getProbability(codePoints, wordLength); } static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict, - jintArray wordArray1, jintArray wordArray2) { + jintArray word0, jintArray word1) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return JNI_FALSE; - const jsize codePointLength1 = env->GetArrayLength(wordArray1); - const jsize codePointLength2 = env->GetArrayLength(wordArray2); - int codePoints1[codePointLength1]; - int codePoints2[codePointLength2]; - env->GetIntArrayRegion(wordArray1, 0, codePointLength1, codePoints1); - env->GetIntArrayRegion(wordArray2, 0, codePointLength2, codePoints2); - return dictionary->isValidBigram(codePoints1, codePointLength1, codePoints2, codePointLength2); + const jsize word0Length = env->GetArrayLength(word0); + const jsize word1Length = env->GetArrayLength(word1); + int word0CodePoints[word0Length]; + int word1CodePoints[word1Length]; + env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); + env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length); } static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, @@ -246,6 +246,45 @@ static void releaseDictBuf(const void *dictBuf, const size_t length, const int f } } +static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, + jintArray word, jint probability) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return; + } + jsize wordLength = env->GetArrayLength(word); + int codePoints[wordLength]; + dictionary->addUnigramWord(codePoints, wordLength, probability); +} + +static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, + jintArray word0, jintArray word1, jint probability) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return; + } + jsize word0Length = env->GetArrayLength(word0); + int word0CodePoints[word0Length]; + jsize word1Length = env->GetArrayLength(word1); + int word1CodePoints[word1Length]; + dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, + word1Length, probability); +} + +static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, + jintArray word0, jintArray word1) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return; + } + jsize word0Length = env->GetArrayLength(word0); + int word0CodePoints[word0Length]; + jsize word1Length = env->GetArrayLength(word1); + int word1CodePoints[word1Length]; + dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints, + word1Length); +} + static const JNINativeMethod sMethods[] = { { const_cast<char *>("openNative"), @@ -281,6 +320,21 @@ static const JNINativeMethod sMethods[] = { const_cast<char *>("editDistanceNative"), const_cast<char *>("([I[I)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance) + }, + { + const_cast<char *>("addUnigramWordNative"), + const_cast<char *>("(J[II)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) + }, + { + const_cast<char *>("addBigramWordsNative"), + const_cast<char *>("(J[I[II)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) + }, + { + const_cast<char *>("removeBigramWordsNative"), + const_cast<char *>("(J[I[I)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) } }; diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 6e02100fc..242a9bdd6 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -184,13 +184,13 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons return false; } -bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2, - int length2) const { - int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); +bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, + int length1) const { + int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(), - word2, length2, false /* forceLowerCaseSearch */); + word1, length1, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == nextWordPos) return false; for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp index 0a7509c8b..52b668936 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp @@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; // Mask for attribute probability, stored on 4 bits inside the flags byte. const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; +const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; +// The numeric value of the shortcut probability that means 'whitelist'. +const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; /* static */ int TaUtils::getBigramAddressAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h index f38fd5aaa..15637d8a9 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h @@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils { public: typedef uint8_t TerminalAttributeFlags; typedef TerminalAttributeFlags BigramFlags; + typedef TerminalAttributeFlags ShortcutFlags; static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { @@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils { const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, int *const pos); + // Shortcuts reading methods + // This method returns the size of the shortcut list region excluding the shortcut list size + // field at the beginning. + static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( + const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { + // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. + return ByteArrayUtils::readUint16andAdvancePosition( + binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE; + } + + static AK_FORCE_INLINE void skipShortcuts( + const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { + const int shortcutListSize = getShortcutListSizeAndForwardPointer( + binaryDictionaryInfo, pos); + *pos += shortcutListSize; + } + + static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) { + return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; + } + + static AK_FORCE_INLINE int readShortcutTarget( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength, + int *const outWord, int *const pos) { + return ByteArrayUtils::readStringAndAdvancePosition( + binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos); + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils); @@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils { static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT; static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY; static const int ATTRIBUTE_ADDRESS_SHIFT; + static const int SHORTCUT_LIST_SIZE_FIELD_SIZE; + static const int WHITELIST_SHORTCUT_PROBABILITY; static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) { return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h index df0ec480d..9557d8ce7 100644 --- a/native/jni/src/suggest/core/dictionary/binary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_format.h @@ -52,14 +52,10 @@ class BinaryFormat { // Mask for attribute probability, stored on 4 bits inside the flags byte. static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F; - // The numeric value of the shortcut probability that means 'whitelist'. - static const int WHITELIST_SHORTCUT_PROBABILITY = 15; // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; - static const int SHORTCUT_LIST_SIZE_SIZE = 2; - static bool hasBlacklistedOrNotAWordFlag(const int flags); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); @@ -73,9 +69,6 @@ class BinaryFormat { const int pos); static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); static bool hasChildrenInFlags(const uint8_t flags); - static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, - int *pos); - static int getAttributeProbabilityFromFlags(const int flags); static int getTerminalPosition(const uint8_t *const root, const int *const inWord, const int length, const bool forceLowerCaseSearch); static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, @@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) { return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags)); } -AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict, - const uint8_t flags, int *pos) { - int offset = 0; - const int origin = *pos; - switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { - case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - offset = dict[origin]; - *pos = origin + 1; - break; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - offset = dict[origin] << 8; - offset += dict[origin + 1]; - *pos = origin + 2; - break; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - offset = dict[origin] << 16; - offset += dict[origin + 1] << 8; - offset += dict[origin + 2]; - *pos = origin + 3; - break; - } - if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) { - return origin - offset; - } else { - return origin + offset; - } -} - -inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) { - return flags & MASK_ATTRIBUTE_PROBABILITY; -} - // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 028b61506..51f23dc55 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -106,8 +106,37 @@ int Dictionary::getProbability(const int *word, int length) const { return unigramProbability; } -bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const { - return mBigramDictionary->isValidBigram(word1, length1, word2, length2); +bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { + return mBigramDictionary->isValidBigram(word0, length0, word1, length1); +} + +void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { + if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: Dictionary::addUnigramWord() is called for non-updatable dictionary."); + return; + } + // TODO: Support dynamic update +} + +void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1, const int probability) { + if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: Dictionary::addBigramWords() is called for non-updatable dictionary."); + return; + } + // TODO: Support dynamic update +} + +void Dictionary::removeBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1) { + if (!mBinaryDictionaryInfo.isDynamicallyUpdatable()) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: Dictionary::removeBigramWords() is called for non-updatable dictionary."); + return; + } + // TODO: Support dynamic update } } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index afd081841..94579c200 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -64,10 +64,21 @@ class Dictionary { int *frequencies, int *outputTypes) const; int getProbability(const int *word, int length) const; - bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; + + bool isValidBigram(const int *word0, int length0, const int *word1, int length1) const; + + void addUnigramWord(const int *const word, const int length, const int probability); + + void addBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1, const int probability); + + void removeBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1); + const BinaryDictionaryInfo *getBinaryDictionaryInfo() const { return &mBinaryDictionaryInfo; } + virtual ~Dictionary(); private: diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h index 601ac5f5a..3c2180937 100644 --- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -29,15 +29,15 @@ class ShortcutUtils { int outputWordIndex, const int finalScore, int *const outputCodePoints, int *const frequencies, int *const outputTypes, const bool sameAsTyped) { TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); + int shortcutTarget[MAX_WORD_LENGTH]; while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { - int shortcutTarget[MAX_WORD_LENGTH]; - int shortcutProbability; - const int shortcutTargetStringLength = iterator.getNextShortcutTarget( - MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability); + bool isWhilelist; + int shortcutTargetStringLength; + iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget, + &shortcutTargetStringLength, &isWhilelist); int shortcutScore; int kind; - if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY - && sameAsTyped) { + if (isWhilelist && sameAsTyped) { shortcutScore = S_INT_MAX; kind = Dictionary::KIND_WHITELIST; } else { diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h index bbd9af090..cec47081e 100644 --- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h +++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h @@ -20,6 +20,7 @@ #include <stdint.h> #include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_format.h" namespace latinime { @@ -33,60 +34,66 @@ class TerminalAttributes { public: class ShortcutIterator { public: - ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos, - const uint8_t flags) - : mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos), - mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) { - } + ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int shortcutPos, const bool hasShortcutList) + : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos), + mHasNextShortcutTarget(hasShortcutList) {} inline bool hasNextShortcutTarget() const { return mHasNextShortcutTarget; } - // Gets the shortcut target itself as an int string. For parameters and return value - // see BinaryFormat::getWordAtAddress. - inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) { - const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer( - mBinaryDicitionaryInfo->getDictRoot(), &mPos); - mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); - unsigned int i; - for (i = 0; i < MAX_WORD_LENGTH; ++i) { - const int codePoint = BinaryFormat::getCodePointAndForwardPointer( - mBinaryDicitionaryInfo->getDictRoot(), &mPos); - if (NOT_A_CODE_POINT == codePoint) break; - outWord[i] = codePoint; + // Gets the shortcut target itself as an int string and put it to outTarget, put its length + // to outTargetLength, put whether it is whitelist to outIsWhitelist. + AK_FORCE_INLINE void nextShortcutTarget( + const int maxDepth, int *const outTarget, int *const outTargetLength, + bool *const outIsWhitelist) { + const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags = + BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( + mBinaryDictionaryInfo, &mPos); + mHasNextShortcutTarget = + BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); + if (outIsWhitelist) { + *outIsWhitelist = + BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags); + } + if (outTargetLength) { + *outTargetLength = + BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget( + mBinaryDictionaryInfo, maxDepth, outTarget, &mPos); } - *outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags); - return i; } private: - const BinaryDictionaryInfo *const mBinaryDicitionaryInfo; + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; int mPos; bool mHasNextShortcutTarget; }; - TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo, - const uint8_t flags, const int pos) - : mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) { - } + TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const uint8_t nodeFlags, const int shortcutPos) + : mBinaryDictionaryInfo(binaryDictionaryInfo), + mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {} inline ShortcutIterator getShortcutIterator() const { // The size of the shortcuts is stored here so that the whole shortcut chunk can be // skipped quickly, so we ignore it. - return ShortcutIterator( - mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); + int shortcutPos = mShortcutListSizePos; + BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer( + mBinaryDictionaryInfo, &shortcutPos); + const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS); + return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList); } bool isBlacklistedOrNotAWord() const { - return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags); + return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); - const BinaryDictionaryInfo *const mBinaryDicitionaryInfo; - const uint8_t mFlags; - const int mStartPos; + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const uint8_t mNodeFlags; + const int mShortcutListSizePos; }; } // namespace latinime #endif // LATINIME_TERMINAL_ATTRIBUTES_H |