aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h34
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h10
-rw-r--r--native/jni/src/utils/char_utils.cpp3
4 files changed, 54 insertions, 22 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index faaf44162..e4847fcf9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -30,18 +30,19 @@ namespace latinime {
class PtNodeParams {
public:
// Invalid PtNode.
- PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
- mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
- mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
- mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
- mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_DICT_POS) {}
+ PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
PtNodeParams(const PtNodeParams& ptNodeParams)
: mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
- mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
- mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
+ mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
+ mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
mTerminalId(ptNodeParams.mTerminalId),
mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
mProbability(ptNodeParams.mProbability),
@@ -58,7 +59,7 @@ class PtNodeParams {
const int codePointCount, const int *const codePoints, const int probability,
const int childrenPos, const int shortcutPos, const int bigramPos,
const int siblingPos)
- : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
+ : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
@@ -73,7 +74,7 @@ class PtNodeParams {
const int parentPos, const int codePointCount, const int *const codePoints,
const int terminalIdFieldPos, const int terminalId, const int probability,
const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
- : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
+ : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
mCodePointCount(codePointCount), mCodePoints(),
mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
@@ -87,8 +88,8 @@ class PtNodeParams {
PtNodeParams(const PtNodeParams *const ptNodeParams,
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability)
- : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos),
- mCodePointCount(codePointCount), mCodePoints(),
+ : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
+ mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(),
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
mTerminalId(ptNodeParams->getTerminalId()),
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
@@ -104,7 +105,7 @@ class PtNodeParams {
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability)
- : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos),
+ : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
mCodePointCount(codePointCount), mCodePoints(),
mTerminalIdFieldPos(NOT_A_DICT_POS),
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
@@ -126,11 +127,11 @@ class PtNodeParams {
// Flags
AK_FORCE_INLINE bool isDeleted() const {
- return DynamicPtReadingUtils::isDeleted(mFlags);
+ return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
}
AK_FORCE_INLINE bool willBecomeNonTerminal() const {
- return DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
+ return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
}
AK_FORCE_INLINE bool hasChildren() const {
@@ -224,6 +225,7 @@ class PtNodeParams {
const int mHeadPos;
const PatriciaTrieReadingUtils::NodeFlags mFlags;
+ const bool mHasMovedFlag;
const int mParentPos;
const uint8_t mCodePointCount;
int mCodePoints[MAX_WORD_LENGTH];
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index bf38dffa5..fa5993090 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -363,4 +363,33 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
&bigrams, &shortcuts);
}
+int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+ if (token == 0) {
+ // Start iterating the dictionary.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
+ &mTerminalPtNodePositionsForIteratingWords);
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
+ }
+ const int terminalPtNodePositionsVectorSize =
+ static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
+ if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
+ AKLOGE("Given token %d is invalid.", token);
+ return 0;
+ }
+ const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
+ int unigramProbability = NOT_A_PROBABILITY;
+ getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
+ outCodePoints, &unigramProbability);
+ const int nextToken = token + 1;
+ if (nextToken >= terminalPtNodePositionsVectorSize) {
+ // All words have been iterated.
+ mTerminalPtNodePositionsForIteratingWords.clear();
+ return 0;
+ }
+ return nextToken;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index da4be87ce..8fbca2612 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -18,6 +18,7 @@
#define LATINIME_PATRICIA_TRIE_POLICY_H
#include <stdint.h>
+#include <vector>
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@@ -44,7 +45,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
- mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
- mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
+ mPtNodeArrayReader(mDictRoot, mDictBufferSize),
+ mTerminalPtNodePositionsForIteratingWords() {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -130,10 +132,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints) {
- // getNextWordAndNextToken is not supported.
- return 0;
- }
+ int getNextWordAndNextToken(const int token, int *const outCodePoints);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
@@ -146,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const ShortcutListPolicy mShortcutListPolicy;
const Ver2ParticiaTrieNodeReader mPtNodeReader;
const Ver2PtNodeArrayReader mPtNodeArrayReader;
+ std::vector<int> mTerminalPtNodePositionsForIteratingWords;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp
index d41fc8924..adc474b4c 100644
--- a/native/jni/src/utils/char_utils.cpp
+++ b/native/jni/src/utils/char_utils.cpp
@@ -1118,7 +1118,8 @@ static int compare_pair_capital(const void *a, const void *b) {
/* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
/* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
/* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
- /* U+0130 */ 0x0049, 0x0131, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B,
+ // U+0131: Manually changed from 0131 to 0049
+ /* U+0130 */ 0x0049, 0x0049, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B,
/* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C,
/* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E,
// U+0141: Manually changed from 0141 to 004C