aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/defines.h1
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h4
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h4
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h16
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h1
11 files changed, 23 insertions, 16 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 24d04e51f..57e18884d 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -299,6 +299,7 @@ static inline void prof_out(void) {
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)
+#define NOT_A_WORD_ID (S_INT_MIN)
#define NOT_A_TIMESTAMP (-1)
#define NOT_A_LANGUAGE_WEIGHT (-1.0f)
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index d1b2c87be..214cdfca6 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -136,7 +136,7 @@ class DicNode {
}
void initAsChild(const DicNode *const dicNode, const int ptNodePos,
- const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const int childrenPtNodeArrayPos, const int probability, const int wordId,
const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
@@ -144,7 +144,7 @@ class DicNode {
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
- probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ probability, wordId, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 54cde1988..f01640a93 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -59,13 +59,13 @@ class DicNodeVector {
}
void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
- const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const int childrenPtNodeArrayPos, const int probability, const int wordId,
const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.emplace_back();
mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
- isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ wordId, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
index 8202176f7..fc242a92b 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
@@ -31,20 +31,20 @@ class DicNodeProperties {
AK_FORCE_INLINE DicNodeProperties()
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
- mIsTerminal(false), mHasChildrenPtNodes(false),
+ mWordId(NOT_A_WORD_ID), mHasChildrenPtNodes(false),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
- const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const int wordId, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
- mIsTerminal = isTerminal;
+ mWordId = wordId;
mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
@@ -58,7 +58,7 @@ class DicNodeProperties {
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
mDicNodeCodePoint = NOT_A_CODE_POINT;
mProbability = NOT_A_PROBABILITY;
- mIsTerminal = false;
+ mWordId = NOT_A_WORD_ID;
mHasChildrenPtNodes = true;
mIsBlacklistedOrNotAWord = false;
mDepth = 0;
@@ -71,7 +71,7 @@ class DicNodeProperties {
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
mProbability = dicNodeProp->mProbability;
- mIsTerminal = dicNodeProp->mIsTerminal;
+ mWordId = dicNodeProp->mWordId;
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth;
@@ -86,7 +86,7 @@ class DicNodeProperties {
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = dicNodeProp->mProbability;
- mIsTerminal = dicNodeProp->mIsTerminal;
+ mWordId = dicNodeProp->mWordId;
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
@@ -121,7 +121,7 @@ class DicNodeProperties {
}
bool isTerminal() const {
- return mIsTerminal;
+ return mWordId != NOT_A_WORD_ID;
}
bool hasChildren() const {
@@ -144,7 +144,7 @@ class DicNodeProperties {
int mChildrenPtNodeArrayPos;
int mProbability;
int mDicNodeCodePoint;
- bool mIsTerminal;
+ int mWordId;
bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index e91f07682..5052f46cb 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -36,6 +36,7 @@ class UnigramProperty;
* This class abstracts the structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
+// TODO: Use word id instead of terminal PtNode position.
class DictionaryStructureWithBufferPolicy {
public:
typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 9c6452e40..c86ae9305 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -76,8 +76,9 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// Skip PtNodes that represent non-word information.
continue;
}
+ const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
- ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index d77499636..fac3828c3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -55,6 +55,7 @@ class DicNodeVector;
namespace backward {
namespace v402 {
+// Word id = Position of a PtNode that represents the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index ea32eb2a9..aca64b351 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -367,8 +367,8 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
// Skip PtNodes don't start with Unicode code point because they represent non-word information.
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
- childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
- PatriciaTrieReadingUtils::isTerminal(flags),
+ const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
+ childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, wordId,
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
PatriciaTrieReadingUtils::isBlacklisted(flags)
|| PatriciaTrieReadingUtils::isNotAWord(flags),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 70351d147..4257b0bf6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -36,6 +36,7 @@ namespace latinime {
class DicNode;
class DicNodeVector;
+// Word id = Position of a PtNode that represents the word.
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index b7411d6f9..ae3208cfe 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -66,8 +66,9 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// Skip PtNodes that represent non-word information.
continue;
}
+ const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
- ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index c545ac8ef..90e06c7f9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -36,6 +36,7 @@ namespace latinime {
class DicNode;
class DicNodeVector;
+// Word id = Artificial id that is stored in the PtNode looked up by the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)