aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/unigram_dictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/src/unigram_dictionary.cpp')
-rw-r--r--native/src/unigram_dictionary.cpp180
1 files changed, 71 insertions, 109 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index e3296f12a..290e9f997 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -153,6 +153,13 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
if (DEBUG_DICT) {
LOGI("Returning %d words", suggestedWordsCount);
+ /// Print the returned words
+ for (int j = 0; j < suggestedWordsCount; ++j) {
+ short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
+ char s[MAX_WORD_LENGTH];
+ for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
+ LOGI("%s %i", s, mFrequencies[j]);
+ }
LOGI("Next letters: ");
for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
if (mNextLettersFrequency[k] > 0) {
@@ -322,16 +329,6 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
return false;
}
-inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos,
- int depth, int finalFreq) {
- // TODO: actually add alternates when the format supports it.
-}
-
-static inline bool hasAlternateSpellings(uint8_t flags) {
- // TODO: when the format supports it, return the actual value.
- return false;
-}
-
static inline unsigned short toBaseLowerCase(unsigned short c) {
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
c = BASE_CHARS[c];
@@ -372,7 +369,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
assert(missingPos < mInputLength);
}
int rootPosition = ROOT_POS;
- // Get the number of child of root, then increment the position
+ // Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
int depth = 0;
@@ -657,22 +654,19 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId
}
inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
- const uint8_t* const root, const uint8_t flags, int pos,
+ const uint8_t* const root, const uint8_t flags, const int pos,
const int inputIndex, const int matchWeight, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
int* nextLetters, const int nextLettersSize) {
const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
- const bool hasAlternates = hasAlternateSpellings(flags);
- if (isSameAsTyped && !hasAlternates) return;
+ if (isSameAsTyped) return;
if (depth >= MIN_SUGGEST_DEPTH) {
const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
excessivePos, transposedPos, freq, sameLength);
if (!isSameAsTyped)
addWord(word, depth + 1, finalFreq);
- if (hasAlternates)
- addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq);
}
if (sameLength && depth >= mInputLength && skipPos < 0) {
@@ -680,6 +674,47 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de
}
}
+bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
+ const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
+ const int secondWordLength, const bool isSpaceProximity) {
+ if (inputLength >= MAX_WORD_LENGTH) return false;
+ if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
+ || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
+ return false;
+ const int newWordLength = firstWordLength + secondWordLength + 1;
+ // Allocating variable length array on stack
+ unsigned short word[newWordLength];
+ const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);
+ if (DEBUG_DICT) {
+ LOGI("First freq: %d", firstFreq);
+ }
+ if (firstFreq <= 0) return false;
+
+ for (int i = 0; i < firstWordLength; ++i) {
+ word[i] = mWord[i];
+ }
+
+ const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);
+ if (DEBUG_DICT) {
+ LOGI("Second freq: %d", secondFreq);
+ }
+ if (secondFreq <= 0) return false;
+
+ word[firstWordLength] = SPACE;
+ for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
+ word[i] = mWord[i - firstWordLength - 1];
+ }
+
+ int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
+ secondWordLength, firstFreq, secondFreq, isSpaceProximity);
+ if (DEBUG_DICT) {
+ LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
+ TYPED_LETTER_MULTIPLIER);
+ }
+ addWord(word, newWordLength, pairFreq);
+ return true;
+}
+
#ifndef NEW_DICTIONARY_FORMAT
// TODO: Don't forget to bring inline functions back to over where they are used.
@@ -725,8 +760,8 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
}
}
-inline int UnigramDictionary::getBestWordFreq(const int startInputIndex, const int inputLength,
- unsigned short *word) {
+inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
+ const int inputLength, unsigned short *word) {
int pos = ROOT_POS;
int count = Dictionary::getCount(DICT_ROOT, &pos);
int maxFreq = 0;
@@ -860,52 +895,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
return NOT_VALID_WORD;
}
-
// The following functions will be modified.
-bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
- const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
- const int secondWordLength, const bool isSpaceProximity) {
- if (inputLength >= MAX_WORD_LENGTH) return false;
- if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
- || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
- return false;
- const int newWordLength = firstWordLength + secondWordLength + 1;
- // Allocating variable length array on stack
- unsigned short word[newWordLength];
- const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord);
- if (DEBUG_DICT) {
- LOGI("First freq: %d", firstFreq);
- }
- if (firstFreq <= 0) return false;
-
- for (int i = 0; i < firstWordLength; ++i) {
- word[i] = mWord[i];
- }
-
- const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord);
- if (DEBUG_DICT) {
- LOGI("Second freq: %d", secondFreq);
- }
- if (secondFreq <= 0) return false;
-
- word[firstWordLength] = SPACE;
- for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
- word[i] = mWord[i - firstWordLength - 1];
- }
-
- int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
- secondWordLength, firstFreq, secondFreq, isSpaceProximity);
- if (DEBUG_DICT) {
- LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
- TYPED_LETTER_MULTIPLIER);
- }
- addWord(word, newWordLength, pairFreq);
- return true;
-}
-
-inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
- const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
- const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
+ const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
+ const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
int *nextSiblingPosition, int *nextOutputIndex) {
@@ -922,6 +915,11 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
int freq;
bool isSameAsUserTypedLength = false;
+ const int pos = initialPos;
+ const int depth = initialDepth;
+ const int traverseAllNodes = initialTraverseAllNodes;
+ const int diffs = initialDiffs;
+
const uint8_t flags = 0; // No flags for now
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
@@ -993,53 +991,12 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
#else // NEW_DICTIONARY_FORMAT
-bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
- const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
- const int secondWordLength, const bool isSpaceProximity) {
- if (inputLength >= MAX_WORD_LENGTH) return false;
- if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
- || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
- return false;
- const int newWordLength = firstWordLength + secondWordLength + 1;
- // Allocating variable length array on stack
- unsigned short word[newWordLength];
- const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord);
- if (DEBUG_DICT) {
- LOGI("First freq: %d", firstFreq);
- }
- if (firstFreq <= 0) return false;
-
- for (int i = 0; i < firstWordLength; ++i) {
- word[i] = mWord[i];
- }
-
- const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord);
- if (DEBUG_DICT) {
- LOGI("Second freq: %d", secondFreq);
- }
- if (secondFreq <= 0) return false;
-
- word[firstWordLength] = SPACE;
- for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
- word[i] = mWord[i - firstWordLength - 1];
- }
-
- int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
- secondWordLength, firstFreq, secondFreq, isSpaceProximity);
- if (DEBUG_DICT) {
- LOGI("Split two words: %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
- TYPED_LETTER_MULTIPLIER);
- }
- addWord(word, newWordLength, pairFreq);
- return true;
-}
-
-inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
- const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
- const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
+ const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
+ const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
- int *nextSiblingPosition, int *nextOutputIndex) {
+ int *nextSiblingPosition, int *newOutputIndex) {
if (DEBUG_DICT) {
int inputCount = 0;
if (skipPos >= 0) ++inputCount;
@@ -1053,13 +1010,18 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
int freq;
bool isSameAsUserTypedLength = false;
+ int pos = initialPos;
+ int depth = initialDepth;
+ int traverseAllNodes = initialTraverseAllNodes;
+ int diffs = initialDiffs;
+
const uint8_t flags = 0; // No flags for now
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
&c, &childPosition, &terminal, &freq);
- *nextOutputIndex = depth + 1;
+ *newOutputIndex = depth + 1;
const bool needsToTraverseChildrenNodes = childPosition != 0;