aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp84
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp84
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h6
3 files changed, 93 insertions, 81 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
index 50882b3e9..205d181e7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp
@@ -77,95 +77,17 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- // This method traverses parent nodes from the terminal by following parent pointers; thus,
- // node code points are stored in the buffer in the reverse order.
- int reverseCodePoints[maxCodePointCount];
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
- // First, read the terminal node and get its probability.
readingHelper.initWithPtNodePos(ptNodePos);
-
- const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams());
- if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) {
- // Node at the ptNodePos is not a valid terminal node.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store terminal node probability.
- *outUnigramProbability = terminalPtNodeParams.getProbability();
- // Then, following parent node link to the dictionary root and fetch node code points.
- int totalCodePointCount = 0;
- while (!readingHelper.isEnd()) {
- const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
- totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams);
- if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
- // The ptNodePos is not a valid terminal node position in the dictionary.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store node code points to buffer in the reverse order.
- readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams,
- readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
- // Follow parent node toward the root node.
- readingHelper.readParentNode(ptNodeParams);
- }
- if (readingHelper.isError()) {
- // The node position or the dictionary is invalid.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Reverse the stored code points to output them.
- for (int i = 0; i < totalCodePointCount; ++i) {
- outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
- }
- return totalCodePointCount;
+ return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount,
+ outCodePoints, outUnigramProbability);
}
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- int searchCodePoints[length];
- for (int i = 0; i < length; ++i) {
- searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
- }
-
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
- while (!readingHelper.isEnd()) {
- const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
- if (!ptNodeParams.isValid()) {
- break;
- }
- const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
- if (readingHelper.getTotalCodePointCount(ptNodeParams) > length
- || !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */,
- searchCodePoints[matchedCodePointCount])) {
- // Current node has too many code points or its first code point is different from
- // target code point. Skip this node and read the next sibling node.
- readingHelper.readNextSiblingNode(ptNodeParams);
- continue;
- }
- // Check following merged node code points.
- const int nodeCodePointCount = ptNodeParams.getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- if (!readingHelper.isMatchedCodePoint(ptNodeParams,
- j, searchCodePoints[matchedCodePointCount + j])) {
- // Different code point is found. The given word is not included in the dictionary.
- return NOT_A_DICT_POS;
- }
- }
- // All characters are matched.
- if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) {
- // Terminal position is found.
- return ptNodeParams.getHeadPos();
- }
- if (!ptNodeParams.hasChildren()) {
- return NOT_A_DICT_POS;
- }
- // Advance to the children nodes.
- readingHelper.readChildNode(ptNodeParams);
- }
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- return NOT_A_DICT_POS;
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
index 398ff21cf..c3fe03d37 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp
@@ -19,6 +19,7 @@
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "utils/char_utils.h"
namespace latinime {
@@ -168,6 +169,89 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
return !isError();
}
+int DynamicPatriciaTrieReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ const PtNodeParams terminalPtNodeParams(getPtNodeParams());
+ // First, read the terminal node and get its probability.
+ if (!isValidTerminalNode(terminalPtNodeParams)) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = terminalPtNodeParams.getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ int totalCodePointCount = 0;
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ totalCodePointCount = getTotalCodePointCount(ptNodeParams);
+ if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
+ reverseCodePoints);
+ // Follow parent node toward the root node.
+ readParentNode(ptNodeParams);
+ }
+ if (isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ for (int i = 0; i < totalCodePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
+ }
+ return totalCodePointCount;
+}
+
+int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ const int matchedCodePointCount = getPrevTotalCodePointCount();
+ if (getTotalCodePointCount(ptNodeParams) > length
+ || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == getTotalCodePointCount(ptNodeParams)) {
+ // Terminal position is found.
+ return ptNodeParams.getHeadPos();
+ }
+ if (!ptNodeParams.hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readChildNode(ptNodeParams);
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
index 1e9218e58..197027313 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h
@@ -198,6 +198,12 @@ class DynamicPatriciaTrieReadingHelper {
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener);
+ int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
+ int *const outCodePoints, int *const outUnigramProbability);
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
+ const bool forceLowerCaseSearch);
+
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);