aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h5
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.cpp22
-rw-r--r--native/jni/src/suggest/core/dictionary/word_property.h20
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h8
10 files changed, 120 insertions, 2 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 16b1a56b1..9b71eff7a 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -150,6 +150,12 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints,
codePoints, codePointCount);
}
+int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy.get()->getNextWordAndNextToken(
+ token, outCodePoints);
+}
+
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 4a468f3df..0a413cb52 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -96,6 +96,11 @@ class Dictionary {
const WordProperty getWordProperty(const int *const codePoints, const int codePointCount);
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ int getNextWordAndNextToken(const int token, int *const outCodePoints);
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();
}
diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/word_property.cpp
index d8c330bbd..288e6b05e 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/word_property.cpp
@@ -34,7 +34,27 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
- // TODO: Output bigrams.
+ // Output bigrams.
+ const int bigramCount = mBigrams.size();
+ for (int i = 0; i < bigramCount; ++i) {
+ const BigramProperty *const bigramProperty = &mBigrams[i];
+ const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints();
+ jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
+ env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
+ word1CodePoints->size(), &word1CodePoints->at(0));
+ env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
+ env->DeleteLocalRef(bigramWord1CodePointArray);
+
+ int bigramProbabilityInfo[] = {bigramProperty->getProbability(),
+ bigramProperty->getTimestamp(), bigramProperty->getLevel(),
+ bigramProperty->getCount()};
+ jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
+ env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
+ NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
+ env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
+ env->DeleteLocalRef(bigramProbabilityInfoArray);
+ }
+
// Output shortcuts.
const int shortcutTargetCount = mShortcuts.size();
for (int i = 0; i < shortcutTargetCount; ++i) {
diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h
index cc06b1baa..40b1a91a4 100644
--- a/native/jni/src/suggest/core/dictionary/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/word_property.h
@@ -35,6 +35,26 @@ class WordProperty {
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count) {}
+ const std::vector<int> *getTargetCodePoints() const {
+ return &mTargetCodePoints;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
private:
std::vector<int> mTargetCodePoints;
int mProbability;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index b878984f1..784419586 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -95,6 +95,11 @@ class DictionaryStructureWithBufferPolicy {
virtual const WordProperty getWordProperty(const int *const codePonts,
const int codePointCount) const = 0;
+ // Method to iterate all words in the dictionary.
+ // The returned token has to be used to get the next word. If token is 0, this method newly
+ // starts iterating the dictionary.
+ virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
index b918e0765..824d442e4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
@@ -28,6 +28,14 @@ const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 10000
const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode(
+ const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) {
+ mTerminalPositions->push_back(ptNodeParams->getHeadPos());
+ }
+ return true;
+}
+
// Visits all PtNodes in post-order depth first manner.
// For example, visits c -> b -> y -> x -> a for the following dictionary:
// a _ b _ c
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
index a69490943..bcc5c7857 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -59,6 +59,21 @@ class DynamicPtReadingHelper {
DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
};
+ class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener {
+ public:
+ TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions)
+ : mTerminalPositions(terminalPositions) {}
+ bool onAscend() { return true; }
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+ bool onReadingPtNodeArrayTail() { return true; }
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions);
+
+ std::vector<int> *const mTerminalPositions;
+ };
+
DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer,
const PtNodeReader *const ptNodeReader)
: mIsError(false), mReadingState(), mBuffer(buffer),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 67d615e86..319c81569 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -129,6 +129,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return WordProperty();
}
+ int getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ // getNextWordAndNextToken is not supported.
+ return 0;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 5022bafcb..75d85988c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -392,4 +392,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
historicalInfo->getCount(), &bigrams, &shortcuts);
}
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ if (token == 0) {
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
+ outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index a43bd0eca..9ba5be0c3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#include <vector>
+
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
@@ -50,7 +52,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
- mBigramCount(mHeaderPolicy->getBigramCount()) {};
+ mBigramCount(mHeaderPolicy->getBigramCount()),
+ mTerminalPtNodePositionsForIteratingWords() {};
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -109,6 +112,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
+ int getNextWordAndNextToken(const int token, int *const outCodePoints);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
@@ -132,6 +137,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount;
int mBigramCount;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H