diff options
Diffstat (limited to 'native')
9 files changed, 196 insertions, 32 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 16a3fe825..c919ebd91 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -260,18 +260,39 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c word1Length); } +// Method to iterate all words in the dictionary for makedict. +// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when +// the dictionary does not have a next word. +static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, + jlong dict, jint token, jintArray outCodePoints) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return 0; + const jsize outCodePointsLength = env->GetArrayLength(outCodePoints); + if (outCodePointsLength != MAX_WORD_LENGTH) { + AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength); + ASSERT(false); + return 0; + } + int wordCodePoints[outCodePointsLength]; + memset(wordCodePoints, 0, sizeof(wordCodePoints)); + const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints); + env->SetIntArrayRegion(outCodePoints, 0, outCodePointsLength, wordCodePoints); + return nextToken; +} + static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags, - jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets, - jobject outShortcutProbabilities) { + jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo, + jobject outShortcutTargets, jobject outShortcutProbabilities) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return; const jsize wordLength = env->GetArrayLength(word); int wordCodePoints[wordLength]; env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); - wordProperty.outputProperties(env, outCodePoints, outFlags, outProbability, - outHistoricalInfo, outShortcutTargets, outShortcutProbabilities); + wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); } static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, @@ -521,10 +542,16 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("getWordPropertyNative"), - const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), + const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" + "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty) }, { + const_cast<char *>("getNextWordNative"), + const_cast<char *>("(JI[I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord) + }, + { const_cast<char *>("calcNormalizedScoreNative"), const_cast<char *>("([I[II)F"), reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore) diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 16b1a56b1..9b71eff7a 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -150,6 +150,12 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints, codePoints, codePointCount); } +int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) { + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy.get()->getNextWordAndNextToken( + token, outCodePoints); +} + void Dictionary::logDictionaryInfo(JNIEnv *const env) const { int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 4a468f3df..0a413cb52 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -96,6 +96,11 @@ class Dictionary { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount); + // Method to iterate all words in the dictionary. + // The returned token has to be used to get the next word. If token is 0, this method newly + // starts iterating the dictionary. + int getNextWordAndNextToken(const int token, int *const outCodePoints); + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy.get(); } diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp index ed32bde8b..288e6b05e 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/word_property.cpp @@ -19,29 +19,53 @@ namespace latinime { void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, - jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo, - jobject outShortcutTargets, jobject outShortcutProbabilities) const { + jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, + jobject outBigramProbabilities, jobject outShortcutTargets, + jobject outShortcutProbabilities) const { env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]); jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); - env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability); - int historicalInfo[] = {mTimestamp, mLevel, mCount}; - env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo), - historicalInfo); + int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount}; + env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), + probabilityInfo); jclass integerClass = env->FindClass("java/lang/Integer"); jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V"); jclass arrayListClass = env->FindClass("java/util/ArrayList"); jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); - const int shortcutTargetCount = mShortcutTargets.size(); + + // Output bigrams. + const int bigramCount = mBigrams.size(); + for (int i = 0; i < bigramCount; ++i) { + const BigramProperty *const bigramProperty = &mBigrams[i]; + const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints(); + jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); + env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */, + word1CodePoints->size(), &word1CodePoints->at(0)); + env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); + env->DeleteLocalRef(bigramWord1CodePointArray); + + int bigramProbabilityInfo[] = {bigramProperty->getProbability(), + bigramProperty->getTimestamp(), bigramProperty->getLevel(), + bigramProperty->getCount()}; + jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); + env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, + NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); + env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray); + env->DeleteLocalRef(bigramProbabilityInfoArray); + } + + // Output shortcuts. + const int shortcutTargetCount = mShortcuts.size(); for (int i = 0; i < shortcutTargetCount; ++i) { - jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size()); + const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints(); + jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */, - mShortcutTargets[i].size(), &mShortcutTargets[i][0]); + targetCodePoints->size(), &targetCodePoints->at(0)); env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); env->DeleteLocalRef(shortcutTargetCodePointArray); jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, - mShortcutProbabilities[i]); + mShortcuts[i].getProbability()); env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability); env->DeleteLocalRef(integerProbability); } diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h index dcac8536a..40b1a91a4 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.h +++ b/native/jni/src/suggest/core/dictionary/word_property.h @@ -28,27 +28,78 @@ namespace latinime { // This class is used for returning information belonging to a word to java side. class WordProperty { public: - // TODO: Add bigram information. + class BigramProperty { + public: + BigramProperty(const std::vector<int> *const targetCodePoints, + const int probability, const int timestamp, const int level, const int count) + : mTargetCodePoints(*targetCodePoints), mProbability(probability), + mTimestamp(timestamp), mLevel(level), mCount(count) {} + + const std::vector<int> *getTargetCodePoints() const { + return &mTargetCodePoints; + } + + int getProbability() const { + return mProbability; + } + + int getTimestamp() const { + return mTimestamp; + } + + int getLevel() const { + return mLevel; + } + + int getCount() const { + return mCount; + } + + private: + std::vector<int> mTargetCodePoints; + int mProbability; + int mTimestamp; + int mLevel; + int mCount; + }; + + class ShortcutProperty { + public: + ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability) + : mTargetCodePoints(*targetCodePoints), mProbability(probability) {} + + const std::vector<int> *getTargetCodePoints() const { + return &mTargetCodePoints; + } + + int getProbability() const { + return mProbability; + } + + private: + std::vector<int> mTargetCodePoints; + int mProbability; + }; + // Invalid word. WordProperty() : mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false), mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY), - mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {} + mTimestamp(0), mLevel(0), mCount(0), mBigrams(), mShortcuts() {} WordProperty(const std::vector<int> *const codePoints, const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams, const bool hasShortcuts, const int probability, const int timestamp, - const int level, const int count, - const std::vector<std::vector<int> > *const shortcutTargets, - const std::vector<int> *const shortcutProbabilities) + const int level, const int count, const std::vector<BigramProperty> *const bigrams, + const std::vector<ShortcutProperty> *const shortcuts) : mCodePoints(*codePoints), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams), mHasShortcuts(hasShortcuts), mProbability(probability), - mTimestamp(timestamp), mLevel(level), mCount(count), - mShortcutTargets(*shortcutTargets), mShortcutProbabilities(*shortcutProbabilities) {} + mTimestamp(timestamp), mLevel(level), mCount(count), mBigrams(*bigrams), + mShortcuts(*shortcuts) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, - jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets, - jobject outShortcutProbabilities) const; + jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, + jobject outShortcutTargets, jobject outShortcutProbabilities) const; private: DISALLOW_ASSIGNMENT_OPERATOR(WordProperty); @@ -63,9 +114,8 @@ class WordProperty { int mTimestamp; int mLevel; int mCount; - // Shortcut - std::vector<std::vector<int> > mShortcutTargets; - std::vector<int> mShortcutProbabilities; + std::vector<BigramProperty> mBigrams; + std::vector<ShortcutProperty> mShortcuts; }; } // namespace latinime #endif // LATINIME_WORD_PROPERTY_H diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b878984f1..784419586 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -95,6 +95,11 @@ class DictionaryStructureWithBufferPolicy { virtual const WordProperty getWordProperty(const int *const codePonts, const int codePointCount) const = 0; + // Method to iterate all words in the dictionary. + // The returned token has to be used to get the next word. If token is 0, this method newly + // starts iterating the dictionary. + virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 67d615e86..319c81569 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -129,6 +129,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return WordProperty(); } + int getNextWordAndNextToken(const int token, int *const outCodePoints) { + // getNextWordAndNextToken is not supported. + return 0; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 0b067e127..1c420e070 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -332,9 +332,44 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry( ptNodeParams.getTerminalId()); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); + // Fetch bigram information. + std::vector<WordProperty::BigramProperty> bigrams; + const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); + if (bigramListPos != NOT_A_DICT_POS) { + int bigramWord1CodePoints[MAX_WORD_LENGTH]; + const BigramDictContent *const bigramDictContent = mBuffers.get()->getBigramDictContent(); + const TerminalPositionLookupTable *const terminalPositionLookupTable = + mBuffers.get()->getTerminalPositionLookupTable(); + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + const BigramEntry bigramEntry = + bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + const int word1TerminalId = bigramEntry.getTargetTerminalId(); + const int word1TerminalPtNodePos = + terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); + if (word1TerminalPtNodePos == NOT_A_DICT_POS) { + continue; + } + // Word (unigram) probability + int word1Probability = NOT_A_PROBABILITY; + const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, + &word1Probability); + std::vector<int> word1(bigramWord1CodePoints, + bigramWord1CodePoints + codePointCount); + const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); + const int probability = bigramEntry.hasHistoricalInfo() ? + ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) : + bigramEntry.getProbability(); + bigrams.push_back(WordProperty::BigramProperty(&word1, probability, + historicalInfo->getTimeStamp(), historicalInfo->getLevel(), + historicalInfo->getCount())); + } + } // Fetch shortcut information. - std::vector<std::vector<int> > shortcutTargets; - std::vector<int> shortcutProbabilities; + std::vector<WordProperty::ShortcutProperty> shortcuts; int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); if (shortcutPos != NOT_A_DICT_POS) { int shortcutTarget[MAX_WORD_LENGTH]; @@ -347,15 +382,20 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget, &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos); std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength); - shortcutTargets.push_back(target); - shortcutProbabilities.push_back(shortcutProbability); + shortcuts.push_back(WordProperty::ShortcutProperty(&target, shortcutProbability)); } } return WordProperty(&codePointVector, ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), - historicalInfo->getCount(), &shortcutTargets, &shortcutProbabilities); + historicalInfo->getCount(), &bigrams, &shortcuts); +} + +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, + int *const outCodePoints) { + // TODO: Implement. + return 0; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index a43bd0eca..1bcd4ceea 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -109,6 +109,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; + int getNextWordAndNextToken(const int token, int *const outCodePoints); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); |