aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/src/defines.h2
-rw-r--r--native/src/unigram_dictionary.cpp67
-rw-r--r--native/src/unigram_dictionary.h14
3 files changed, 45 insertions, 38 deletions
diff --git a/native/src/defines.h b/native/src/defines.h
index a3edaab59..953905fb2 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -52,4 +52,6 @@
#define MAX_WORD_LENGTH_INTERNAL 64
+#define MAX_DEPTH_MULTIPLIER 3
+
#endif // LATINIME_DEFINES_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index b3479738e..8a9742bbf 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -87,12 +87,11 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
int *nextLetters, int nextLettersSize) {
+ int initialPos = 0;
if (IS_LATEST_DICT_VERSION) {
- getWordsRec(DICTIONARY_HEADER_SIZE, 0, inputLength * 3, false, 1, 0, 0, skipPos,
- nextLetters, nextLettersSize);
- } else {
- getWordsRec(0, 0, inputLength * 3, false, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
+ initialPos = DICTIONARY_HEADER_SIZE;
}
+ getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize);
// Get the word count
int suggestedWordsCount = 0;
@@ -174,50 +173,49 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
static const char QUOTE = '\'';
+void UnigramDictionary::getWords(const int initialPos, const int inputLength, const int skipPos,
+ int *nextLetters, const int nextLettersSize) {
+ int initialPosition = initialPos;
+ const int count = Dictionary::getCount(DICT, &initialPosition);
+ getWordsRec(count, initialPosition, 0, inputLength * MAX_DEPTH_MULTIPLIER,
+ mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
+}
+
// snr : frequency?
-void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool traverseAllNodes,
- int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) {
- // Optimization: Prune out words that are too long compared to how much was typed.
- if (depth > maxDepth || diffs > mMaxEditDistance) {
- return;
- }
- // get the count of nodes and increment pos.
- int count = Dictionary::getCount(DICT, &pos);
- int *currentChars = NULL;
+void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
+ const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
+ const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
+ int position = pos;
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
- if (mInputLength <= inputIndex) {
- traverseAllNodes = true;
- } else {
- currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
- }
-
- for (int i = 0; i < count; ++i) {
+ for (int i = 0; i < childrenCount; ++i) {
// -- at char
- const unsigned short c = Dictionary::getChar(DICT, &pos);
+ const unsigned short c = Dictionary::getChar(DICT, &position);
// -- at flag/add
const unsigned short lowerC = toLowerCase(c);
- const bool terminal = Dictionary::getTerminal(DICT, &pos);
- const int childrenAddress = Dictionary::getAddress(DICT, &pos);
+ const bool terminal = Dictionary::getTerminal(DICT, &position);
+ int childrenPosition = Dictionary::getAddress(DICT, &position);
int matchedProximityCharId = -1;
- const bool needsToTraverseNextNode = childrenAddress != 0;
+ const bool needsToTraverseNextNode = childrenPosition != 0;
// -- after address or flag
int freq = 1;
// If terminal, increment pos
- if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
+ if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &position);
// -- after add or freq
bool newTraverseAllNodes = traverseAllNodes;
int newSnr = snr;
int newDiffs = diffs;
int newInputIndex = inputIndex;
+ const int newDepth = depth + 1;
// If we are only doing traverseAllNodes, no need to look at the typed characters.
- if (traverseAllNodes || needsToSkipCurrentNode(c, currentChars[0], skipPos, depth)) {
+ if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
mWord[depth] = c;
if (traverseAllNodes && terminal) {
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
snr, nextLetters, nextLettersSize, skipPos, freq);
}
} else {
+ int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
if (matchedProximityCharId < 0) continue;
mWord[depth] = c;
@@ -236,8 +234,17 @@ void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool trave
newDiffs += (matchedProximityCharId > 0);
++newInputIndex;
}
+ // Optimization: Prune out words that are too long compared to how much was typed.
+ if (newDepth > maxDepth || newDiffs > mMaxEditDistance) {
+ continue;
+ }
+ if (mInputLength <= newInputIndex) {
+ newTraverseAllNodes = true;
+ }
if (needsToTraverseNextNode) {
- getWordsRec(childrenAddress, depth + 1, maxDepth, newTraverseAllNodes,
+ // get the count of nodes and increment childAddress.
+ const int count = Dictionary::getCount(DICT, &childrenPosition);
+ getWordsRec(count, childrenPosition, newDepth, maxDepth, newTraverseAllNodes,
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
}
}
@@ -265,17 +272,17 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
}
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
- const unsigned short userTypedChar, const int skipPos, const int depth) {
+ const int inputIndex, const int skipPos, const int depth) {
+ const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_ALTERNATIVES))[0];
// Skip the ' or other letter and continue deeper
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
}
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
const unsigned short lowerC, const unsigned short c, const int skipPos) {
- bool matched = false;
int j = 0;
while (currentChars[j] > 0) {
- matched = (currentChars[j] == lowerC || currentChars[j] == c);
+ const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
// If skipPos is defined, not to search proximity collections.
// First char is what user typed.
if (matched) {
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 259276cea..733b80c79 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -38,24 +38,22 @@ private:
int getAddress(int *pos);
int getFreq(int *pos);
int wideStrLen(unsigned short *str);
-
bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c);
- void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
- int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
+ void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
+ const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
+ const int skipPos, int *nextLetters, const int nextLettersSize);
+ void getWords(const int initialPos, const int inputLength, const int skipPos, int *nextLetters,
+ const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
-
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int mInputLength, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int freq);
-
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
const int snr, const int skipPos, const int freq, const int addedWeight);
-
bool needsToSkipCurrentNode(const unsigned short c,
- const unsigned short userTypedChar, const int skipPos, const int depth);
-
+ const int inputIndex, const int skipPos, const int depth);
int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
const unsigned short c, const int skipPos);