aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h43
-rw-r--r--native/jni/src/suggest/core/dictionary/byte_array_utils.h11
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h6
4 files changed, 40 insertions, 24 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 25299948d..c700b01ca 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -28,13 +28,13 @@
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getDepth(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
#define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getDepth(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
@@ -51,6 +51,11 @@ namespace latinime {
// This struct is purely a bucket to return values. No instances of this struct should be kept.
struct DicNode_InputStateG {
+ DicNode_InputStateG()
+ : mNeedsToUpdateInputStateG(false), mPointerId(0), mInputIndex(0),
+ mPrevCodePoint(0), mTerminalDiffCost(0.0f), mRawLength(0.0f),
+ mDoubleLetterLevel(NOT_A_DOUBLE_LETTER) {}
+
bool mNeedsToUpdateInputStateG;
int mPointerId;
int16_t mInputIndex;
@@ -157,7 +162,7 @@ class DicNode {
const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
const uint16_t additionalSubwordLength, const int *additionalSubword) {
mIsUsed = true;
- uint16_t newDepth = static_cast<uint16_t>(dicNode->getDepth() + 1);
+ uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
@@ -180,7 +185,7 @@ class DicNode {
}
bool isRoot() const {
- return getDepth() == 0;
+ return getNodeCodePointCount() == 0;
}
bool hasChildren() const {
@@ -188,12 +193,12 @@ class DicNode {
}
bool isLeavingNode() const {
- ASSERT(getDepth() <= getLeavingDepth());
- return getDepth() == getLeavingDepth();
+ ASSERT(getNodeCodePointCount() <= getLeavingDepth());
+ return getNodeCodePointCount() == getLeavingDepth();
}
AK_FORCE_INLINE bool isFirstLetter() const {
- return getDepth() == 1;
+ return getNodeCodePointCount() == 1;
}
bool isCached() const {
@@ -206,7 +211,7 @@ class DicNode {
// Used to expand the node in DicNodeUtils
int getNodeTypedCodePoint() const {
- return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getDepth());
+ return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
}
bool isImpossibleBigramWord() const {
@@ -215,7 +220,7 @@ class DicNode {
}
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
- const int currentWordLen = getDepth();
+ const int currentWordLen = getNodeCodePointCount();
return (prevWordLen == 1 && currentWordLen == 1);
}
@@ -263,13 +268,13 @@ class DicNode {
AK_FORCE_INLINE bool isTerminalWordNode() const {
const bool isTerminalNodes = mDicNodeProperties.isTerminal();
- const int currentNodeDepth = getDepth();
+ const int currentNodeDepth = getNodeCodePointCount();
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
}
bool shouldBeFilterdBySafetyNetForBigram() const {
- const uint16_t currentDepth = getDepth();
+ const uint16_t currentDepth = getNodeCodePointCount();
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
@@ -281,7 +286,7 @@ class DicNode {
bool isTotalInputSizeExceedingLimit() const {
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
- const int currentWordDepth = getDepth();
+ const int currentWordDepth = getNodeCodePointCount();
// TODO: 3 can be 2? Needs to be investigated.
// TODO: Have a const variable for 3 (or 2)
return prevWordsLen + currentWordDepth > MAX_WORD_LENGTH - 3;
@@ -316,7 +321,7 @@ class DicNode {
void outputResult(int *dest) const {
const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
- const uint16_t currentDepth = getDepth();
+ const uint16_t currentDepth = getNodeCodePointCount();
DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
prevWordLength, getOutputWordBuf(), currentDepth, dest);
DUMP_WORD_AND_SCORE("OUTPUT");
@@ -475,13 +480,13 @@ class DicNode {
return mDicNodeProperties.getAttributesPos();
}
- inline uint16_t getDepth() const {
+ inline uint16_t getNodeCodePointCount() const {
return mDicNodeProperties.getDepth();
}
- // "Length" includes spaces.
- inline uint16_t getTotalLength() const {
- return getDepth() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
+ // Returns code point count including spaces
+ inline uint16_t getTotalNodeCodePointCount() const {
+ return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
}
AK_FORCE_INLINE void dump(const char *tag) const {
@@ -516,8 +521,8 @@ class DicNode {
} else if (diff < -MIN_DIFF) {
return false;
}
- const int depth = getDepth();
- const int depthDiff = right->getDepth() - depth;
+ const int depth = getNodeCodePointCount();
+ const int depthDiff = right->getNodeCodePointCount() - depth;
if (depthDiff != 0) {
return depthDiff > 0;
}
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/core/dictionary/byte_array_utils.h
index d3321f624..daa822ffa 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h
+++ b/native/jni/src/suggest/core/dictionary/byte_array_utils.h
@@ -57,6 +57,17 @@ class ByteArrayUtils {
return value;
}
+ static AK_FORCE_INLINE int readSint24andAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t value = readUint8(buffer, *pos);
+ if (value < 0x80) {
+ return readUint24andAdvancePosition(buffer, pos);
+ } else {
+ (*pos)++;
+ return -(((value & 0x7F) << 16) ^ readUint16andAdvancePosition(buffer, pos));
+ }
+ }
+
static AK_FORCE_INLINE uint32_t readUint24andAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint32_t value = readUint24(buffer, *pos);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index e21b318e6..5ae396e64 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -147,7 +147,7 @@ class TypingTraversal : public Traversal {
AK_FORCE_INLINE bool sameAsTyped(
const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const {
return traverseSession->getProximityInfoState(0)->sameAsTyped(
- dicNode->getOutputWordBuf(), dicNode->getDepth());
+ dicNode->getOutputWordBuf(), dicNode->getNodeCodePointCount());
}
AK_FORCE_INLINE int getMaxCacheSize() const {
@@ -171,7 +171,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int c = dicNode->getOutputWordBuf()[0];
- const bool shortCappedWord = dicNode->getDepth()
+ const bool shortCappedWord = dicNode->getNodeCodePointCount()
< ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && CharUtils::isAsciiUpper(c);
return !shortCappedWord
|| probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index d7656128d..7ba4af5f9 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -55,7 +55,7 @@ class TypingWeighting : public Weighting {
const bool isZeroCostOmission = parentDicNode->isZeroCostOmission();
const bool sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode);
// If the traversal omitted the first letter then the dicNode should now be on the second.
- const bool isFirstLetterOmission = dicNode->getDepth() == 2;
+ const bool isFirstLetterOmission = dicNode->getNodeCodePointCount() == 2;
float cost = 0.0f;
if (isZeroCostOmission) {
cost = 0.0f;
@@ -83,7 +83,7 @@ class TypingWeighting : public Weighting {
const bool isProximity = isProximityDicNode(traverseSession, dicNode);
float cost = isProximity ? (isFirstChar ? ScoringParams::FIRST_PROXIMITY_COST
: ScoringParams::PROXIMITY_COST) : 0.0f;
- if (dicNode->getDepth() == 2) {
+ if (dicNode->getNodeCodePointCount() == 2) {
// At the second character of the current word, we check if the first char is uppercase
// and the word is a second or later word of a multiple word suggestion. We demote it
// if so.
@@ -131,7 +131,7 @@ class TypingWeighting : public Weighting {
const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength(
parentPointIndex + 1, currentCodePoint);
const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH;
- const bool singleChar = dicNode->getDepth() == 1;
+ const bool singleChar = dicNode->getNodeCodePointCount() == 1;
const float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f)
+ (sameCodePoint ? ScoringParams::INSERTION_COST_SAME_CHAR
: ScoringParams::INSERTION_COST);