aboutsummaryrefslogtreecommitdiffstats
path: root/native/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/src')
-rw-r--r--native/src/correction.cpp36
-rw-r--r--native/src/correction.h3
-rw-r--r--native/src/defines.h31
-rw-r--r--native/src/unigram_dictionary.cpp70
-rw-r--r--native/src/unigram_dictionary.h9
-rw-r--r--native/src/words_priority_queue.h7
-rw-r--r--native/src/words_priority_queue_pool.h15
7 files changed, 103 insertions, 68 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index c40f94b3f..75831b69d 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -168,8 +168,8 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
- return -1;
+ if (outputIndex < MIN_SUGGEST_DEPTH) {
+ return NOT_A_FREQUENCY;
}
*word = mWord;
@@ -215,20 +215,10 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getOutputIndex() {
- return mOutputIndex;
-}
-
-// TODO: remove
int Correction::getInputIndex() {
return mInputIndex;
}
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
- return mNeedsToTraverseAllNodes;
-}
-
void Correction::incrementInputIndex() {
++mInputIndex;
}
@@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
mWord, mOutputIndex + 1);
}
-// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c);
- if (needsToTraverseAllNodes() && isTerminal) {
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ if (mNeedsToTraverseAllNodes && isTerminal) {
incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL;
} else {
@@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
}
}
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR;
}
@@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) {
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
// TODO: Change the limit if we'll allow two or more corrections
@@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
}
@@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no
@@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
return ON_TERMINAL;
} else {
+ mTerminalInputIndex = mInputIndex - 1;
+ mTerminalOutputIndex = mOutputIndex - 1;
return NOT_ON_TERMINAL;
}
}
diff --git a/native/src/correction.h b/native/src/correction.h
index 4012e7e82..a0fd55fd9 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -48,7 +48,6 @@ class Correction {
void checkState();
bool initProcessState(const int index);
- int getOutputIndex();
int getInputIndex();
virtual ~Correction();
@@ -115,11 +114,11 @@ class Correction {
private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
- inline bool needsToTraverseAllNodes();
inline void startToTraverseAllNodes();
inline bool isQuote(const unsigned short c);
inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+ inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c);
const int TYPED_LETTER_MULTIPLIER;
diff --git a/native/src/defines.h b/native/src/defines.h
index 01ef65678..ce3f85acd 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -22,9 +22,23 @@
#include <cutils/log.h>
#define AKLOGE ALOGE
#define AKLOGI ALOGI
+
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
+
+static char charBuf[50];
+
+static void dumpWord(const unsigned short* word, const int length) {
+ for (int i = 0; i < length; ++i) {
+ charBuf[i] = word[i];
+ }
+ charBuf[length] = 0;
+ AKLOGI("[ %s ]", charBuf);
+}
+
#else
#define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...)
+#define DUMP_WORD(word, length)
#endif
#ifdef FLAG_DO_PROFILE
@@ -106,18 +120,6 @@ static void prof_out(void) {
#define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
-
-static char charBuf[50];
-
-static void dumpWord(const unsigned short* word, const int length) {
- for (int i = 0; i < length; ++i) {
- charBuf[i] = word[i];
- }
- charBuf[length] = 0;
- AKLOGI("[ %s ]", charBuf);
-}
-
#else // FLAG_DBG
#define DEBUG_DICT false
@@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
-#define DUMP_WORD(word, length)
#endif // FLAG_DBG
@@ -171,6 +172,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
#define NOT_A_INDEX -1
+#define NOT_A_FREQUENCY -1
#define KEYCODE_SPACE ' '
@@ -207,7 +209,8 @@ static void dumpWord(const unsigned short* word, const int length) {
// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
// for better performance.
-#define SUB_QUEUE_MAX_WORDS 5
+// Holds up to 1 candidate for each word
+#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10
#define MAX_DEPTH_MULTIPLIER 3
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index ca7f0be0c..69e3200fc 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_OPEN;
PROF_START(0);
- // Note: This line is intentionally left blank
+ queuePool->clearAll();
PROF_END(0);
PROF_START(1);
@@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
}
PROF_END(6);
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ queuePool->dumpSubQueue1TopSuggestions();
+ }
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int inputLength,
- WordsPriorityQueue *queue, Correction *correction) {
+ const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
if (DEBUG_DICT) {
AKLOGI("initSuggest");
}
proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
- if (queue) {
- queue->clear();
- }
const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
@@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, Correction *correction,
WordsPriorityQueuePool *queuePool) {
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- initSuggestions(
- proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
- getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+ getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
}
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
- const int inputLength, Correction *correction, WordsPriorityQueue *queue,
+ const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams
correction->setCorrectionParams(0, 0, 0,
@@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- correction, &childCount, &firstChildPos, &siblingPos, queue);
+ correction, &childCount, &firstChildPos, &siblingPos, queuePool);
// Update next sibling pos
correction->setTreeSiblingPos(outputIndex, siblingPos);
@@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
inline void UnigramDictionary::onTerminal(const int freq,
const TerminalAttributes& terminalAttributes, Correction *correction,
- WordsPriorityQueue *queue) {
+ WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
+ const int inputIndex = correction->getInputIndex();
+ const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
+ if (!addToMasterQueue && !addToSubQueue) {
+ return;
+ }
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
- if (finalFreq >= 0) {
+ if (finalFreq != NOT_A_FREQUENCY) {
if (!terminalAttributes.isShortcutOnly()) {
- addWord(wordPointer, wordLength, finalFreq, queue);
+ if (addToMasterQueue) {
+ addWord(wordPointer, wordLength, finalFreq, masterQueue);
+ }
+ // TODO: Check the validity of "inputIndex == wordLength"
+ //if (addToSubQueue && inputIndex == wordLength) {
+ if (addToSubQueue) {
+ addWord(wordPointer, wordLength, finalFreq, subQueue);
+ }
}
+ // Please note that the shortcut candidates will be added to the master queue only.
+ if (!addToMasterQueue) {
+ return;
+ }
+
+ // From here, below is the code to add shortcut candidates.
TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
while (iterator.hasNextShortcutTarget()) {
// TODO: addWord only supports weak ordering, meaning we have no means to control the
@@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
- addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
}
}
}
@@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
}
// TODO: Remove initSuggestions and correction->setCorrectionParams
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
- 0 /* do not clear queue */, correction);
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
-1 /* transposedPos */, spaceProximityPos, missingSpacePos,
@@ -584,7 +600,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount,
- int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
+ int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -659,15 +675,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
} while (NOT_A_CHARACTER != c);
if (isTerminalNode) {
- if (needsToInvokeOnTerminal) {
- // The frequency should be here, because we come here only if this is actually
- // a terminal node, and we are on its last char.
- const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
- const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
- TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
- onTerminal(freq, terminalAttributes, correction, queue);
- }
+ // The frequency should be here, because we come here only if this is actually
+ // a terminal node, and we are on its last char.
+ const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+ const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
+ const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
+ onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);
// If there are more chars in this node, then this virtual node has children.
// If we are on the last char, this virtual node has children if this node has.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 23581425a..5e7a7580f 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -93,14 +93,13 @@ class UnigramDictionary {
const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
- WordsPriorityQueue *queue, Correction *correction);
+ const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
void getSuggestionCandidates(
const bool useFullEditDistance, const int inputLength, Correction *correction,
- WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
+ WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@@ -114,12 +113,12 @@ class UnigramDictionary {
const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool);
void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
- Correction *correction, WordsPriorityQueue *queue);
+ Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
- int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
+ int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index ce5d2ce51..54bf27a59 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@@ -128,6 +128,13 @@ class WordsPriorityQueue {
}
}
+ void dumpTopWord() {
+ if (size() <= 0) {
+ return;
+ }
+ DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
+ }
+
private:
struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
index bf9619e19..5fa254852 100644
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@@ -58,6 +58,21 @@ class WordsPriorityQueuePool {
return mSubQueues2[id];
}
+ inline void clearAll() {
+ mMasterQueue->clear();
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ mSubQueues1[i]->clear();
+ mSubQueues2[i]->clear();
+ }
+ }
+
+ void dumpSubQueue1TopSuggestions() {
+ AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ mSubQueues1[i]->dumpTopWord();
+ }
+ }
+
private:
WordsPriorityQueue* mMasterQueue;
WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];