aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h1
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h52
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_properties.h50
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_state.h9
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_state_output.h40
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp56
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h2
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h13
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp11
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp13
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.h9
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp46
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h12
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h23
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp21
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_policy.h72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp70
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h58
21 files changed, 404 insertions, 211 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index e349aedb1..cb6681456 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -270,6 +270,7 @@ static inline void prof_out(void) {
#define NOT_A_COORDINATE (-1)
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
+#define NOT_A_DICT_POS (S_INT_MIN)
#define KEYCODE_SPACE ' '
#define KEYCODE_SINGLE_QUOTE '\''
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index c700b01ca..52db8e9c7 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -97,7 +97,6 @@ class DicNode {
DicNode &operator=(const DicNode &dicNode);
virtual ~DicNode() {}
- // TODO: minimize arguments by looking binary_format
// Init for copy
void initByCopy(const DicNode *dicNode) {
mIsUsed = true;
@@ -107,14 +106,15 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // TODO: minimize arguments by looking binary_format
// Init for root with prevWordNodePos which is used for bigram
- void initAsRoot(const int pos, const int childrenPos, const int childrenCount,
- const int prevWordNodePos) {
+ void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
+ NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
+ NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
+ false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
+ 0 /* terminalDepth */);
mDicNodeState.init(prevWordNodePos);
PROF_NODE_RESET(mProfiler);
}
@@ -128,14 +128,15 @@ class DicNode {
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
}
- // TODO: minimize arguments by looking binary_format
// Init for root with previous word
- void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos,
- const int childrenCount) {
+ void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
+ NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
+ NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
+ false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
+ 0 /* terminalDepth */);
// TODO: Move to dicNodeState?
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
mDicNodeState.mDicNodeStateInput.init(
@@ -157,19 +158,18 @@ class DicNode {
// TODO: minimize arguments by looking binary_format
void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
- const int attributesPos, const int siblingPos, const int nodeCodePoint,
- const int childrenCount, const int probability, const int bigramProbability,
- const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
- const uint16_t additionalSubwordLength, const int *additionalSubword) {
+ const int attributesPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
- dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
- mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint,
- childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars,
- hasChildren, newDepth, newLeavingDepth);
- mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword);
+ dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
+ mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, newDepth, newLeavingDepth);
+ mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
+ mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
@@ -193,8 +193,8 @@ class DicNode {
}
bool isLeavingNode() const {
- ASSERT(getNodeCodePointCount() <= getLeavingDepth());
- return getNodeCodePointCount() == getLeavingDepth();
+ ASSERT(getNodeCodePointCount() <= mDicNodeProperties.getLeavingDepth());
+ return getNodeCodePointCount() == mDicNodeProperties.getLeavingDepth();
}
AK_FORCE_INLINE bool isFirstLetter() const {
@@ -256,12 +256,6 @@ class DicNode {
return mDicNodeProperties.getChildrenPos();
}
- // Used in DicNodeUtils
- int getChildrenCount() const {
- return mDicNodeProperties.getChildrenCount();
- }
-
- // Used in DicNodeUtils
int getProbability() const {
return mDicNodeProperties.getProbability();
}
@@ -280,10 +274,6 @@ class DicNode {
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
}
- uint16_t getLeavingDepth() const {
- return mDicNodeProperties.getLeavingDepth();
- }
-
bool isTotalInputSizeExceedingLimit() const {
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
const int currentWordDepth = getNodeCodePointCount();
@@ -370,7 +360,7 @@ class DicNode {
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
- return mDicNodeState.mDicNodeStateOutput.mWordBuf;
+ return mDicNodeState.mDicNodeStateOutput.mCodePointsBuf;
}
int getPrevCodePointG(int pointerId) const {
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/dic_node_properties.h
index d2f87c10b..7e8aa4979 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_properties.h
@@ -27,37 +27,31 @@ namespace latinime {
/**
* Node for traversing the lexicon trie.
*/
+// TODO: Introduce a dictionary node class which has attribute members required to understand the
+// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0),
- mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0),
- mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false),
- mHasChildren(false) {
- }
+ : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
+ mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false),
+ mHasChildren(false) {}
virtual ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
- const int siblingPos, const int nodeCodePoint, const int childrenCount,
- const int probability, const int bigramProbability, const bool isTerminal,
- const bool hasMultipleChars, const bool hasChildren, const uint16_t depth,
- const uint16_t terminalDepth) {
+ const int nodeCodePoint, const int probability, const bool isTerminal,
+ const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) {
mPos = pos;
mFlags = flags;
mChildrenPos = childrenPos;
mAttributesPos = attributesPos;
- mSiblingPos = siblingPos;
mNodeCodePoint = nodeCodePoint;
- mChildrenCount = childrenCount;
mProbability = probability;
- mBigramProbability = bigramProbability;
mIsTerminal = isTerminal;
- mHasMultipleChars = hasMultipleChars;
mHasChildren = hasChildren;
mDepth = depth;
- mLeavingDepth = terminalDepth;
+ mLeavingDepth = leavingDepth;
}
// Init for copy
@@ -66,13 +60,9 @@ class DicNodeProperties {
mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
- mSiblingPos = nodeProp->mSiblingPos;
mNodeCodePoint = nodeProp->mNodeCodePoint;
- mChildrenCount = nodeProp->mChildrenCount;
mProbability = nodeProp->mProbability;
- mBigramProbability = nodeProp->mBigramProbability;
mIsTerminal = nodeProp->mIsTerminal;
- mHasMultipleChars = nodeProp->mHasMultipleChars;
mHasChildren = nodeProp->mHasChildren;
mDepth = nodeProp->mDepth;
mLeavingDepth = nodeProp->mLeavingDepth;
@@ -84,13 +74,9 @@ class DicNodeProperties {
mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
- mSiblingPos = nodeProp->mSiblingPos;
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
- mChildrenCount = nodeProp->mChildrenCount;
mProbability = nodeProp->mProbability;
- mBigramProbability = nodeProp->mBigramProbability;
mIsTerminal = nodeProp->mIsTerminal;
- mHasMultipleChars = nodeProp->mHasMultipleChars;
mHasChildren = nodeProp->mHasChildren;
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = nodeProp->mLeavingDepth;
@@ -112,10 +98,6 @@ class DicNodeProperties {
return mAttributesPos;
}
- int getChildrenCount() const {
- return mChildrenCount;
- }
-
int getProbability() const {
return mProbability;
}
@@ -137,12 +119,8 @@ class DicNodeProperties {
return mIsTerminal;
}
- bool hasMultipleChars() const {
- return mHasMultipleChars;
- }
-
bool hasChildren() const {
- return mChildrenCount > 0 || mDepth != mLeavingDepth;
+ return mHasChildren || mDepth != mLeavingDepth;
}
bool hasBlacklistedOrNotAWordFlag() const {
@@ -153,25 +131,15 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
-
- // Not used
- int getSiblingPos() const {
- return mSiblingPos;
- }
-
int mPos;
uint8_t mFlags;
int mChildrenPos;
int mAttributesPos;
- int mSiblingPos;
- int mChildrenCount;
int mProbability;
- int mBigramProbability; // not used for now
int mNodeCodePoint;
uint16_t mDepth;
uint16_t mLeavingDepth;
bool mIsTerminal;
- bool mHasMultipleChars;
bool mHasChildren;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_state.h b/native/jni/src/suggest/core/dicnode/dic_node_state.h
index d35e7d79f..b1b6266f2 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_state.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_state.h
@@ -55,11 +55,12 @@ class DicNodeState {
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
}
- // Init by copy and adding subword
- void init(const DicNodeState *const src, const uint16_t additionalSubwordLength,
- const int *const additionalSubword) {
+ // Init by copy and adding merged node code points.
+ void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
init(src);
- mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword);
+ mDicNodeStateOutput.addMergedNodeCodePoints(
+ mergedNodeCodePointCount, mergedNodeCodePoints);
}
private:
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_state_output.h b/native/jni/src/suggest/core/dicnode/dic_node_state_output.h
index 1d4f50a06..45c7f5cf9 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_state_output.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_state_output.h
@@ -26,50 +26,52 @@ namespace latinime {
class DicNodeStateOutput {
public:
- DicNodeStateOutput() : mOutputtedLength(0) {
+ DicNodeStateOutput() : mOutputtedCodePointCount(0) {
init();
}
virtual ~DicNodeStateOutput() {}
void init() {
- mOutputtedLength = 0;
- mWordBuf[0] = 0;
+ mOutputtedCodePointCount = 0;
+ mCodePointsBuf[0] = 0;
}
void init(const DicNodeStateOutput *const stateOutput) {
- memcpy(mWordBuf, stateOutput->mWordBuf,
- stateOutput->mOutputtedLength * sizeof(mWordBuf[0]));
- mOutputtedLength = stateOutput->mOutputtedLength;
- if (mOutputtedLength < MAX_WORD_LENGTH) {
- mWordBuf[mOutputtedLength] = 0;
+ memcpy(mCodePointsBuf, stateOutput->mCodePointsBuf,
+ stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
+ mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
+ if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
+ mCodePointsBuf[mOutputtedCodePointCount] = 0;
}
}
- void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) {
- if (additionalSubword) {
- memcpy(&mWordBuf[mOutputtedLength], additionalSubword,
- additionalSubwordLength * sizeof(mWordBuf[0]));
- mOutputtedLength = static_cast<uint16_t>(mOutputtedLength + additionalSubwordLength);
- if (mOutputtedLength < MAX_WORD_LENGTH) {
- mWordBuf[mOutputtedLength] = 0;
+ void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
+ if (mergedNodeCodePoints) {
+ memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
+ mergedNodeCodePointCount * sizeof(mCodePointsBuf[0]));
+ mOutputtedCodePointCount = static_cast<uint16_t>(
+ mOutputtedCodePointCount + mergedNodeCodePointCount);
+ if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
+ mCodePointsBuf[mOutputtedCodePointCount] = 0;
}
}
}
// TODO: Remove
- int getCodePointAt(const int id) const {
- return mWordBuf[id];
+ int getCodePointAt(const int index) const {
+ return mCodePointsBuf[index];
}
// TODO: Move to private
- int mWordBuf[MAX_WORD_LENGTH];
+ int mCodePointsBuf[MAX_WORD_LENGTH];
private:
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
- uint16_t mOutputtedLength;
+ uint16_t mOutputtedCodePointCount;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index f0f26c72b..9bf7eceb5 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -26,6 +26,7 @@
#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/policy/dictionary_structure_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -36,23 +37,15 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int prevWordNodePos, DicNode *const newRootNode) {
- int curPos = binaryDictionaryInfo->getRootPosition();
- const int pos = curPos;
- const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
- binaryDictionaryInfo->getDictRoot(), &curPos);
- const int childrenPos = curPos;
- newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos);
+ newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(),
+ prevWordNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
- int curPos = binaryDictionaryInfo->getRootPosition();
- const int pos = curPos;
- const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
- binaryDictionaryInfo->getDictRoot(), &curPos);
- const int childrenPos = curPos;
- newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount);
+ newRootNode->initAsRootWithPreviousWord(
+ prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition());
}
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
@@ -76,7 +69,7 @@ namespace latinime {
}
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
DicNodeVector *childDicNodes) {
@@ -86,15 +79,15 @@ namespace latinime {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
+ const bool hasShortcuts = (0 != (BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS & flags));
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
ASSERT(NOT_A_CODE_POINT != codePoint);
- const int nodeCodePoint = codePoint;
// TODO: optimize this
- int additionalWordBuf[MAX_WORD_LENGTH];
- uint16_t additionalSubwordLength = 0;
- additionalWordBuf[additionalSubwordLength++] = codePoint;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ uint16_t mergedNodeCodePointCount = 0;
+ mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
do {
const int nextCodePoint = hasMultipleChars
@@ -102,31 +95,29 @@ namespace latinime {
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
if (!isLastChar) {
- additionalWordBuf[additionalSubwordLength++] = nextCodePoint;
+ mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
}
codePoint = nextCodePoint;
} while (NOT_A_CODE_POINT != codePoint);
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
- binaryDictionaryInfo->getDictRoot(), pos) : -1;
+ binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
pos = BinaryFormat::skipProbability(flags, pos);
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
- binaryDictionaryInfo->getDictRoot(), flags, pos) : 0;
- const int attributesPos = BinaryFormat::skipChildrenPosition(flags, pos);
+ binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
+ const int attributesPos =
+ hasShortcuts ? BinaryFormat::skipChildrenPosition(flags, pos) : NOT_A_DICT_POS;
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
- if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) {
+ if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) {
return siblingPos;
}
- if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) {
+ if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) {
return siblingPos;
}
- const int childrenCount = hasChildren ? BinaryFormat::getGroupCountAndForwardPointer(
- binaryDictionaryInfo->getDictRoot(), &childrenPos) : 0;
- childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos,
- nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
- hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf);
+ childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
+ probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
return siblingPos;
}
@@ -163,13 +154,16 @@ namespace latinime {
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
DicNodeVector *childDicNodes) {
- const int terminalDepth = dicNode->getLeavingDepth();
- const int childCount = dicNode->getChildrenCount();
+ if (!dicNode->hasChildren()) {
+ return;
+ }
int nextPos = dicNode->getChildrenPos();
+ const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
+ binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
- terminalDepth, pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
+ pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
childDicNodes);
if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
// All code points have been found.
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index e198d6181..d526975ce 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -72,7 +72,7 @@ class DicNodeUtils {
const std::vector<int> *const codePointsFilter,
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
const ProximityInfoState *pInfoState, const int pointIndex,
const bool exactOnly, const std::vector<int> *const codePointsFilter,
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index e23c411f0..9641cc19c 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -63,16 +63,13 @@ class DicNodeVector {
}
void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
- const int childrenPos, const int attributesPos, const int siblingPos,
- const int nodeCodePoint, const int childrenCount, const int probability,
- const int bigramProbability, const bool isTerminal, const bool hasMultipleChars,
- const bool hasChildren, const uint16_t additionalSubwordLength,
- const int *additionalSubword) {
+ const int childrenPos, const int attributesPos, const int probability,
+ const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, siblingPos,
- nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
- hasMultipleChars, hasChildren, additionalSubwordLength, additionalSubword);
+ mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability,
+ isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
}
DicNode *operator[](const int id) {
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 242a9bdd6..ff304d2b2 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -150,11 +150,10 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return 0;
- const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
- int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
- forceLowerCaseSearch);
-
+ int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
+ mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
if (NOT_VALID_WORD == pos) return 0;
+ const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
@@ -189,8 +188,8 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;
- int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(),
- word1, length1, false /* forceLowerCaseSearch */);
+ int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
+ mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == nextWordPos) return false;
for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
index 737df63c7..bbb4ca3f0 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
@@ -22,7 +22,7 @@ namespace latinime {
* Dictionary size
*/
// Any file smaller than this is not a dictionary.
-const int BinaryDictionaryFormat::DICTIONARY_MINIMUM_SIZE = 4;
+const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
/**
* Format versions
@@ -30,17 +30,18 @@ const int BinaryDictionaryFormat::DICTIONARY_MINIMUM_SIZE = 4;
// Originally, format version 1 had a 16-bit magic number, then the version number `01'
// then options that must be 0. Hence the first 32-bits of the format are always as follow
// and it's okay to consider them a magic number as a whole.
-const uint32_t BinaryDictionaryFormat::FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
+const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
// The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older
// implementations. On this occasion, we made the magic number 32 bits long.
-const uint32_t BinaryDictionaryFormat::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int BinaryDictionaryFormat::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
+const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
-/* static */ BinaryDictionaryFormat::FORMAT_VERSION BinaryDictionaryFormat::detectFormatVersion(
- const uint8_t *const dict, const int dictSize) {
+/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
+ BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
+ const int dictSize) {
// The magic number is stored big-endian.
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
// understand this format.
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
index c0fd56111..33618b9f0 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
@@ -31,7 +31,7 @@ namespace latinime {
* reading methods and utility methods for various purposes.
* On the other hand, this file deals with only about dictionary format version.
*/
-class BinaryDictionaryFormat {
+class BinaryDictionaryFormatUtils {
public:
// TODO: Remove obsolete version logic
enum FORMAT_VERSION {
@@ -43,7 +43,7 @@ class BinaryDictionaryFormat {
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormat);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER;
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
index 04bb81f71..91c643a5f 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp
@@ -29,12 +29,12 @@ const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
BinaryDictionaryHeader::BinaryDictionaryHeader(
const BinaryDictionaryInfo *const binaryDictionaryInfo)
: mBinaryDictionaryInfo(binaryDictionaryInfo),
- mDictionaryFlags(BinaryDictionaryHeaderReader::getFlags(binaryDictionaryInfo)),
- mSize(BinaryDictionaryHeaderReader::getHeaderSize(binaryDictionaryInfo)),
+ mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(binaryDictionaryInfo)),
+ mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(binaryDictionaryInfo)),
mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
float BinaryDictionaryHeader::readMultiWordCostMultiplier() const {
- const int headerValue = BinaryDictionaryHeaderReader::readHeaderValueInt(
+ const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
if (headerValue == S_INT_MIN) {
// not found
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
index 9db000362..6dba0b266 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
@@ -37,15 +37,16 @@ class BinaryDictionaryHeader {
}
AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return BinaryDictionaryHeaderReader::supportsDynamicUpdate(mDictionaryFlags);
+ return BinaryDictionaryHeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return BinaryDictionaryHeaderReader::requiresGermanUmlautProcessing(mDictionaryFlags);
+ return BinaryDictionaryHeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return BinaryDictionaryHeaderReader::requiresFrenchLigatureProcessing(mDictionaryFlags);
+ return BinaryDictionaryHeaderReadingUtils::requiresFrenchLigatureProcessing(
+ mDictionaryFlags);
}
AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
@@ -60,7 +61,7 @@ class BinaryDictionaryHeader {
static const float MULTI_WORD_COST_MULTIPLIER_SCALE;
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const BinaryDictionaryHeaderReader::DictionaryFlags mDictionaryFlags;
+ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags;
const int mSize;
const float mMultiWordCostMultiplier;
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
index c09a78f03..2c9593144 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -24,32 +24,33 @@
namespace latinime {
-const int BinaryDictionaryHeaderReader::MAX_OPTION_KEY_LENGTH = 256;
+const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
-const int BinaryDictionaryHeaderReader::FORMAT_VERSION_1_HEADER_SIZE = 5;
+const int BinaryDictionaryHeaderReadingUtils::FORMAT_VERSION_1_HEADER_SIZE = 5;
-const int BinaryDictionaryHeaderReader::VERSION_2_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
-const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
-const BinaryDictionaryHeaderReader::DictionaryFlags BinaryDictionaryHeaderReader::NO_FLAGS = 0;
+const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
+ BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
// Flags for special processing
// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const BinaryDictionaryHeaderReader::DictionaryFlags
- BinaryDictionaryHeaderReader::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const BinaryDictionaryHeaderReader::DictionaryFlags
- BinaryDictionaryHeaderReader::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const BinaryDictionaryHeaderReader::DictionaryFlags
- BinaryDictionaryHeaderReader::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
+ BinaryDictionaryHeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
+ BinaryDictionaryHeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
+ BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-/* static */ int BinaryDictionaryHeaderReader::getHeaderSize(
+/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormat::VERSION_1:
+ case BinaryDictionaryFormatUtils::VERSION_1:
return FORMAT_VERSION_1_HEADER_SIZE;
- case BinaryDictionaryFormat::VERSION_2:
+ case BinaryDictionaryFormatUtils::VERSION_2:
// See the format of the header in the comment in
// BinaryDictionaryFormatUtils::detectFormatVersion()
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
@@ -60,12 +61,13 @@ const BinaryDictionaryHeaderReader::DictionaryFlags
}
}
-/* static */ BinaryDictionaryHeaderReader::DictionaryFlags BinaryDictionaryHeaderReader::getFlags(
- const BinaryDictionaryInfo *const binaryDictionaryInfo) {
+/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
+ BinaryDictionaryHeaderReadingUtils::getFlags(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo) {
switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormat::VERSION_1:
+ case BinaryDictionaryFormatUtils::VERSION_1:
return NO_FLAGS;
- case BinaryDictionaryFormat::VERSION_2:
+ case BinaryDictionaryFormatUtils::VERSION_2:
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
default:
@@ -74,7 +76,7 @@ const BinaryDictionaryHeaderReader::DictionaryFlags
}
// Returns if the key is found or not and reads the found value into outValue.
-/* static */ bool BinaryDictionaryHeaderReader::readHeaderValue(
+/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const char *const key, int *outValue, const int outValueSize) {
if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
@@ -97,7 +99,7 @@ const BinaryDictionaryHeaderReader::DictionaryFlags
return false;
}
-/* static */ int BinaryDictionaryHeaderReader::readHeaderValueInt(
+/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) {
const int bufferSize = LARGEST_INT_DIGIT_COUNT;
int intBuffer[bufferSize];
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
index 6e9dca73c..49ed2b9cc 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
@@ -26,7 +26,7 @@ namespace latinime {
class BinaryDictionaryInfo;
-class BinaryDictionaryHeaderReader {
+class BinaryDictionaryHeaderReadingUtils {
public:
typedef uint16_t DictionaryFlags;
@@ -49,10 +49,10 @@ class BinaryDictionaryHeaderReader {
}
static AK_FORCE_INLINE bool hasHeaderAttributes(
- const BinaryDictionaryFormat::FORMAT_VERSION format) {
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
// Only format 2 and above have header attributes as {key,value} string pairs.
switch (format) {
- case BinaryDictionaryFormat::VERSION_2:
+ case BinaryDictionaryFormatUtils::VERSION_2:
return true;
break;
default:
@@ -61,9 +61,9 @@ class BinaryDictionaryHeaderReader {
}
static AK_FORCE_INLINE int getHeaderOptionsPosition(
- const BinaryDictionaryFormat::FORMAT_VERSION format) {
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
switch (format) {
- case BinaryDictionaryFormat::VERSION_2:
+ case BinaryDictionaryFormatUtils::VERSION_2:
return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
+ VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
break;
@@ -80,7 +80,7 @@ class BinaryDictionaryHeaderReader {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReader);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
static const int FORMAT_VERSION_1_HEADER_SIZE;
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index e0b583588..7cb31440a 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -22,19 +22,21 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
+#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
namespace latinime {
-class BinaryDictionaryHeader;
-
class BinaryDictionaryInfo {
public:
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
- mDictionaryFormat(BinaryDictionaryFormat::detectFormatVersion(mDictBuf, mDictSize)),
- mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {}
+ mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
+ mDictBuf, mDictSize)),
+ mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
+ mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
+ mDictionaryFormat)) {}
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -56,14 +58,10 @@ class BinaryDictionaryInfo {
return mDictRoot;
}
- AK_FORCE_INLINE BinaryDictionaryFormat::FORMAT_VERSION getFormat() const {
+ AK_FORCE_INLINE BinaryDictionaryFormatUtils::FORMAT_VERSION getFormat() const {
return mDictionaryFormat;
}
- AK_FORCE_INLINE int getRootPosition() const {
- return 0;
- }
-
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
return &mDictionaryHeader;
}
@@ -74,6 +72,10 @@ class BinaryDictionaryInfo {
return mIsUpdatable && isUpdatableDictionaryFormat;
}
+ AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
+ return mStructurePolicy;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
@@ -82,9 +84,10 @@ class BinaryDictionaryInfo {
const int mMmapFd;
const int mDictBufOffset;
const bool mIsUpdatable;
- const BinaryDictionaryFormat::FORMAT_VERSION mDictionaryFormat;
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
+ const DictionaryStructurePolicy *const mStructurePolicy;
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 51f23dc55..675b54972 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -83,27 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
}
int Dictionary::getProbability(const int *word, int length) const {
- const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot();
- int pos = BinaryFormat::getTerminalPosition(root, word, length,
+ const DictionaryStructurePolicy *const structurePolicy =
+ mBinaryDictionaryInfo.getStructurePolicy();
+ int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == pos) {
return NOT_A_PROBABILITY;
}
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
- // If this is not a word, or if it's a blacklisted entry, it should behave as
- // having no probability outside of the suggestion process (where it should be used
- // for shortcuts).
- return NOT_A_PROBABILITY;
- }
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- if (hasMultipleChars) {
- pos = BinaryFormat::skipOtherCharacters(root, pos);
- } else {
- BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- }
- const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
- return unigramProbability;
+ return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
}
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_policy.h
new file mode 100644
index 000000000..ab42c13b4
--- /dev/null
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_policy.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class BinaryDictionaryInfo;
+class DicNode;
+class DicNodeVector;
+
+/*
+ * This class abstracts structure of dictionaries.
+ * Implement this policy to support additional dictionaries.
+ */
+class DictionaryStructurePolicy {
+ public:
+ // This provides a filtering method for filtering new node.
+ class NodeFilter {
+ public:
+ virtual bool isFilteredOut(const int codePoint) const = 0;
+
+ protected:
+ NodeFilter() {}
+ virtual ~NodeFilter() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(NodeFilter);
+ };
+
+ virtual int getRootPosition() const = 0;
+
+ virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0;
+
+ virtual void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int terminalNodePos, const int maxDepth, int *const outWord,
+ int *const outUnigramProbability) const = 0;
+
+ virtual int getTerminalNodePositionOfWord(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const = 0;
+
+ virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int nodePos) const = 0;
+
+ protected:
+ DictionaryStructurePolicy() {}
+ virtual ~DictionaryStructurePolicy() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy);
+};
+} // namespace latinime
+#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
new file mode 100644
index 000000000..5070651cb
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
+#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+
+namespace latinime {
+
+class DictionaryStructurePolicy;
+
+class DictionaryStructurePolicyFactory {
+ public:
+ static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
+ switch (dictionaryFormat) {
+ case BinaryDictionaryFormatUtils::VERSION_1:
+ // Fall through
+ case BinaryDictionaryFormatUtils::VERSION_2:
+ return PatriciaTriePolicy::getInstance();
+ default:
+ ASSERT(false);
+ return 0;
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
+};
+} // namespace latinime
+#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
new file mode 100644
index 000000000..c995af98a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+
+#include "defines.h"
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_format.h"
+
+namespace latinime {
+
+const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
+
+void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
+ // TODO: Move children creating methods form DicNodeUtils.
+}
+
+void PatriciaTriePolicy::getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int terminalNodePos, const int maxDepth, int *const outWord,
+ int *const outUnigramProbability) const {
+ BinaryFormat::getWordAtAddress(binaryDictionaryInfo->getDictRoot(), terminalNodePos,
+ maxDepth, outWord, outUnigramProbability);
+}
+
+int PatriciaTriePolicy::getTerminalNodePositionOfWord(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord,
+ length, forceLowerCaseSearch);
+}
+
+int PatriciaTriePolicy::getUnigramProbability(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
+ const uint8_t *const root = binaryDictionaryInfo->getDictRoot();
+ int pos = nodePos;
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no probability outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
+ const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
+ if (hasMultipleChars) {
+ pos = BinaryFormat::skipOtherCharacters(root, pos);
+ } else {
+ BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ }
+ return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
new file mode 100644
index 000000000..9b9338145
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
+#define LATINIME_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_policy.h"
+
+namespace latinime {
+
+class PatriciaTriePolicy : public DictionaryStructurePolicy {
+ public:
+ static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() {
+ return &sInstance;
+ }
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildNodes(const DicNode *const dicNode,
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
+
+ void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int terminalNodePos, const int maxDepth, int *const outWord,
+ int *const outUnigramProbability) const;
+
+ int getTerminalNodePositionOfWord(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int nodePos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
+ static const PatriciaTriePolicy sInstance;
+
+ PatriciaTriePolicy() {}
+ ~PatriciaTriePolicy() {}
+};
+} // namespace latinime
+#endif // LATINIME_PATRICIA_TRIE_POLICY_H