diff options
Diffstat (limited to 'native/jni/src')
10 files changed, 120 insertions, 2 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 16b1a56b1..9b71eff7a 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -150,6 +150,12 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints, codePoints, codePointCount); } +int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) { + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy.get()->getNextWordAndNextToken( + token, outCodePoints); +} + void Dictionary::logDictionaryInfo(JNIEnv *const env) const { int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 4a468f3df..0a413cb52 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -96,6 +96,11 @@ class Dictionary { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount); + // Method to iterate all words in the dictionary. + // The returned token has to be used to get the next word. If token is 0, this method newly + // starts iterating the dictionary. + int getNextWordAndNextToken(const int token, int *const outCodePoints); + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy.get(); } diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp index d8c330bbd..288e6b05e 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/word_property.cpp @@ -34,7 +34,27 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jclass arrayListClass = env->FindClass("java/util/ArrayList"); jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); - // TODO: Output bigrams. + // Output bigrams. + const int bigramCount = mBigrams.size(); + for (int i = 0; i < bigramCount; ++i) { + const BigramProperty *const bigramProperty = &mBigrams[i]; + const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints(); + jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); + env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */, + word1CodePoints->size(), &word1CodePoints->at(0)); + env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); + env->DeleteLocalRef(bigramWord1CodePointArray); + + int bigramProbabilityInfo[] = {bigramProperty->getProbability(), + bigramProperty->getTimestamp(), bigramProperty->getLevel(), + bigramProperty->getCount()}; + jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); + env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, + NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); + env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray); + env->DeleteLocalRef(bigramProbabilityInfoArray); + } + // Output shortcuts. const int shortcutTargetCount = mShortcuts.size(); for (int i = 0; i < shortcutTargetCount; ++i) { diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h index cc06b1baa..40b1a91a4 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.h +++ b/native/jni/src/suggest/core/dictionary/word_property.h @@ -35,6 +35,26 @@ class WordProperty { : mTargetCodePoints(*targetCodePoints), mProbability(probability), mTimestamp(timestamp), mLevel(level), mCount(count) {} + const std::vector<int> *getTargetCodePoints() const { + return &mTargetCodePoints; + } + + int getProbability() const { + return mProbability; + } + + int getTimestamp() const { + return mTimestamp; + } + + int getLevel() const { + return mLevel; + } + + int getCount() const { + return mCount; + } + private: std::vector<int> mTargetCodePoints; int mProbability; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b878984f1..784419586 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -95,6 +95,11 @@ class DictionaryStructureWithBufferPolicy { virtual const WordProperty getWordProperty(const int *const codePonts, const int codePointCount) const = 0; + // Method to iterate all words in the dictionary. + // The returned token has to be used to get the next word. If token is 0, this method newly + // starts iterating the dictionary. + virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp index b918e0765..824d442e4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp @@ -28,6 +28,14 @@ const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 10000 const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; +bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode( + const PtNodeParams *const ptNodeParams) { + if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) { + mTerminalPositions->push_back(ptNodeParams->getHeadPos()); + } + return true; +} + // Visits all PtNodes in post-order depth first manner. // For example, visits c -> b -> y -> x -> a for the following dictionary: // a _ b _ c diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h index a69490943..bcc5c7857 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h @@ -59,6 +59,21 @@ class DynamicPtReadingHelper { DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); }; + class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener { + public: + TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions) + : mTerminalPositions(terminalPositions) {} + bool onAscend() { return true; } + bool onDescend(const int ptNodeArrayPos) { return true; } + bool onReadingPtNodeArrayTail() { return true; } + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions); + + std::vector<int> *const mTerminalPositions; + }; + DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer, const PtNodeReader *const ptNodeReader) : mIsError(false), mReadingState(), mBuffer(buffer), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 67d615e86..319c81569 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -129,6 +129,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return WordProperty(); } + int getNextWordAndNextToken(const int token, int *const outCodePoints) { + // getNextWordAndNextToken is not supported. + return 0; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 5022bafcb..75d85988c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -392,4 +392,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code historicalInfo->getCount(), &bigrams, &shortcuts); } +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { + if (token == 0) { + mTerminalPtNodePositionsForIteratingWords.clear(); + DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( + &mTerminalPtNodePositionsForIteratingWords); + DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); + } + const int terminalPtNodePositionsVectorSize = + static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); + if (token < 0 || token >= terminalPtNodePositionsVectorSize) { + AKLOGE("Given token %d is invalid.", token); + return 0; + } + const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; + int unigramProbability = NOT_A_PROBABILITY; + getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, + outCodePoints, &unigramProbability); + const int nextToken = token + 1; + if (nextToken >= terminalPtNodePositionsVectorSize) { + // All words have been iterated. + mTerminalPtNodePositionsForIteratingWords.clear(); + return 0; + } + return nextToken; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index a43bd0eca..9ba5be0c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -17,6 +17,8 @@ #ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H #define LATINIME_VER4_PATRICIA_TRIE_POLICY_H +#include <vector> + #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" @@ -50,7 +52,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), - mBigramCount(mHeaderPolicy->getBigramCount()) {}; + mBigramCount(mHeaderPolicy->getBigramCount()), + mTerminalPtNodePositionsForIteratingWords() {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -109,6 +112,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; + int getNextWordAndNextToken(const int token, int *const outCodePoints); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); @@ -132,6 +137,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4PatriciaTrieWritingHelper mWritingHelper; int mUnigramCount; int mBigramCount; + std::vector<int> mTerminalPtNodePositionsForIteratingWords; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H |