aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/src/dictionary.h19
-rw-r--r--native/src/unigram_dictionary.cpp145
-rw-r--r--native/src/unigram_dictionary.h9
3 files changed, 109 insertions, 64 deletions
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 1cd517cfb..da876242d 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -54,6 +54,10 @@ public:
static int getAddress(const unsigned char *dict, int *pos);
static int getFreq(const unsigned char *dict, const bool isLatestDictVersion, int *pos);
static int wideStrLen(unsigned short *str);
+ // returns next sibling's position
+ static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
+ const int pos, unsigned short *c, int *childrenPosition,
+ bool *terminal, int *freq);
private:
bool hasBigram();
@@ -127,5 +131,20 @@ inline int Dictionary::wideStrLen(unsigned short *str) {
return end - str;
}
+inline int Dictionary::setDictionaryValues(const unsigned char *dict,
+ const bool isLatestDictVersion, const int pos, unsigned short *c,int *childrenPosition,
+ bool *terminal, int *freq) {
+ int position = pos;
+ // -- at char
+ *c = Dictionary::getChar(dict, &position);
+ // -- at flag/add
+ *terminal = Dictionary::getTerminal(dict, &position);
+ *childrenPosition = Dictionary::getAddress(dict, &position);
+ // -- after address or flag
+ *freq = (*terminal) ? Dictionary::getFreq(dict, isLatestDictVersion, &position) : 1;
+ // returns next sibling's position
+ return position;
+}
+
}; // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8a9742bbf..fa4e29632 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -15,9 +15,9 @@
** limitations under the License.
*/
+#include <assert.h>
#include <stdio.h>
#include <fcntl.h>
-#include <sys/mman.h>
#include <string.h>
#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
@@ -185,66 +185,24 @@ void UnigramDictionary::getWords(const int initialPos, const int inputLength, co
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
- int position = pos;
- // If inputIndex is greater than mInputLength, that means there are no proximity chars.
+ int siblingPos = pos;
for (int i = 0; i < childrenCount; ++i) {
- // -- at char
- const unsigned short c = Dictionary::getChar(DICT, &position);
- // -- at flag/add
- const unsigned short lowerC = toLowerCase(c);
- const bool terminal = Dictionary::getTerminal(DICT, &position);
- int childrenPosition = Dictionary::getAddress(DICT, &position);
- int matchedProximityCharId = -1;
- const bool needsToTraverseNextNode = childrenPosition != 0;
- // -- after address or flag
- int freq = 1;
- // If terminal, increment pos
- if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &position);
- // -- after add or freq
- bool newTraverseAllNodes = traverseAllNodes;
- int newSnr = snr;
- int newDiffs = diffs;
- int newInputIndex = inputIndex;
- const int newDepth = depth + 1;
-
- // If we are only doing traverseAllNodes, no need to look at the typed characters.
- if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
- mWord[depth] = c;
- if (traverseAllNodes && terminal) {
- onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
- snr, nextLetters, nextLettersSize, skipPos, freq);
- }
- } else {
- int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
- matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
- if (matchedProximityCharId < 0) continue;
- mWord[depth] = c;
- // If inputIndex is greater than mInputLength, that means there is no
- // proximity chars. So, we don't need to check proximity.
- const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
- const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
- if (isSameAsUserTypedLength && terminal) {
- onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
- skipPos, freq, addedWeight);
- }
- if (!needsToTraverseNextNode) continue;
- // Start traversing all nodes after the index exceeds the user typed length
- newTraverseAllNodes = isSameAsUserTypedLength;
- newSnr *= addedWeight;
- newDiffs += (matchedProximityCharId > 0);
- ++newInputIndex;
- }
- // Optimization: Prune out words that are too long compared to how much was typed.
- if (newDepth > maxDepth || newDiffs > mMaxEditDistance) {
- continue;
- }
- if (mInputLength <= newInputIndex) {
- newTraverseAllNodes = true;
- }
- if (needsToTraverseNextNode) {
- // get the count of nodes and increment childAddress.
- const int count = Dictionary::getCount(DICT, &childrenPosition);
- getWordsRec(count, childrenPosition, newDepth, maxDepth, newTraverseAllNodes,
+ int newCount;
+ int newChildPosition;
+ int newDepth;
+ bool newTraverseAllNodes;
+ int newSnr;
+ int newInputIndex;
+ int newDiffs;
+ int newSiblingPos;
+ const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
+ traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, nextLettersSize,
+ &newCount, &newChildPosition, &newDepth, &newTraverseAllNodes, &newSnr,
+ &newInputIndex, &newDiffs, &newSiblingPos);
+ siblingPos = newSiblingPos;
+
+ if (needsToTraverseChildrenNodes) {
+ getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
}
}
@@ -279,7 +237,8 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
}
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
- const unsigned short lowerC, const unsigned short c, const int skipPos) {
+ const unsigned short c, const int skipPos) {
+ const unsigned short lowerC = toLowerCase(c);
int j = 0;
while (currentChars[j] > 0) {
const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
@@ -295,4 +254,68 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
return -1;
}
+inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
+ const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
+ const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize,
+ int *newCount, int *newChildPosition, int *newDepth, bool *newTraverseAllNodes,
+ int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) {
+ unsigned short c;
+ int childPosition;
+ bool terminal;
+ int freq;
+ *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
+ &childPosition, &terminal, &freq);
+
+ const bool needsToTraverseChildrenNodes = childPosition != 0;
+
+ // If we are only doing traverseAllNodes, no need to look at the typed characters.
+ if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
+ mWord[depth] = c;
+ if (traverseAllNodes && terminal) {
+ onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
+ snr, nextLetters, nextLettersSize, skipPos, freq);
+ }
+ if (!needsToTraverseChildrenNodes) return false;
+ *newTraverseAllNodes = traverseAllNodes;
+ *newSnr = snr;
+ *newDiffs = diffs;
+ *newInputIndex = inputIndex;
+ *newDepth = depth + 1;
+ } else {
+ int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
+ int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos);
+ if (matchedProximityCharId < 0) return false;
+ mWord[depth] = c;
+ // If inputIndex is greater than mInputLength, that means there is no
+ // proximity chars. So, we don't need to check proximity.
+ const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
+ const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
+ if (isSameAsUserTypedLength && terminal) {
+ onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
+ skipPos, freq, addedWeight);
+ }
+ if (!needsToTraverseChildrenNodes) return false;
+ // Start traversing all nodes after the index exceeds the user typed length
+ *newTraverseAllNodes = isSameAsUserTypedLength;
+ *newSnr = snr * addedWeight;
+ *newDiffs = diffs + (matchedProximityCharId > 0);
+ *newInputIndex = inputIndex + 1;
+ *newDepth = depth + 1;
+ }
+ // Optimization: Prune out words that are too long compared to how much was typed.
+ if (*newDepth > maxDepth || *newDiffs > mMaxEditDistance) {
+ return false;
+ }
+
+ // If inputIndex is greater than mInputLength, that means there are no proximity chars.
+ if (mInputLength <= *newInputIndex) {
+ *newTraverseAllNodes = true;
+ }
+ // get the count of nodes and increment childAddress.
+ *newCount = Dictionary::getCount(DICT, &childPosition);
+ *newChildPosition = childPosition;
+ if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);
+ return needsToTraverseChildrenNodes;
+}
+
} // namespace latinime
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 733b80c79..c02d366de 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -54,9 +54,12 @@ private:
const int snr, const int skipPos, const int freq, const int addedWeight);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
- int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
- const unsigned short c, const int skipPos);
-
+ int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos);
+ bool processCurrentNode(const int pos, const int depth,
+ const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
+ const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize,
+ int *newCount, int *newChildPosition, int *newDepth, bool *newTraverseAllNodes,
+ int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition);
const unsigned char *DICT;
const int MAX_WORDS;
const int MAX_WORD_LENGTH;