diff options
Diffstat (limited to '')
-rw-r--r-- | native/src/unigram_dictionary.h | 92 |
1 files changed, 54 insertions, 38 deletions
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index ef9709a89..b950971bb 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -22,17 +22,14 @@ #include "correction_state.h" #include "defines.h" #include "proximity_info.h" - -#ifndef NULL -#define NULL 0 -#endif +#include "words_priority_queue.h" +#include "words_priority_queue_pool.h" namespace latinime { +class TerminalAttributes; class UnigramDictionary { - -public: - + public: // Mask and flags for children address type selection. static const int MASK_GROUP_ADDRESS_TYPE = 0xC0; static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; @@ -46,8 +43,14 @@ public: // Flag for terminal groups static const int FLAG_IS_TERMINAL = 0x10; + // Flag for shortcut targets presence + static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; + // Flag for shortcut-only words. Some words are shortcut-only, which means they match when + // the user types them but they don't pop in the suggestion strip, only the words they are + // shortcuts for do. + static const int FLAG_IS_SHORTCUT_ONLY = 0x02; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes @@ -64,47 +67,66 @@ public: static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; + // Error tolerances + static const int DEFAULT_MAX_ERRORS = 2; + static const int MAX_ERRORS_FOR_TWO_WORDS = 1; + UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); bool isValidWord(const uint16_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, + Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); virtual ~UnigramDictionary(); -private: - + private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies, const int flags); - bool isDigraph(const int* codes, const int i, const int codesSize) const; + const int *ycoordinates, const int *codes, const int inputLength, + const int flags, Correction *correction, WordsPriorityQueuePool *queuePool); + bool isDigraph(const int *codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, - const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies); + const int codesBufferSize, const int flags, const int* codesSrc, + const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, + WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, - unsigned short *outWords, int *frequencies); - void getSuggestionCandidates(const bool useFullEditDistance); - bool addWord(unsigned short *word, int length, int frequency); - void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); - void getMissingSpaceWords(const int inputLength, const int missingSpacePos, - Correction *correction, const bool useFullEditDistance); - void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos, - Correction *correction, const bool useFullEditDistance); - void onTerminal(const int freq, Correction *correction); + const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); + void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); + void getSuggestionCandidates( + const bool useFullEditDistance, const int inputLength, Correction *correction, + WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors); + void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, + const int *xcoordinates, const int *ycoordinates, const int *codes, + const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, + const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool); + void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo, + const int *xcoordinates, const int *ycoordinates, const int *codes, + const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, + const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool); + void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputLength, const int missingSpacePos, Correction *correction, + WordsPriorityQueuePool* queuePool); + void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputLength, const int spaceProximityPos, Correction *correction, + WordsPriorityQueuePool* queuePool); + void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character - bool processCurrentNode(const int initialPos, - Correction *correction, int *newCount, - int *newChildPosition, int *nextSiblingPosition); + bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, + int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, - unsigned short *word); + ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, - short unsigned int* outWord); + short unsigned int *outWord); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; @@ -127,14 +149,8 @@ private: }; static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[]; - int *mFrequencies; - unsigned short *mOutputChars; - ProximityInfo *mProximityInfo; - Correction *mCorrection; - int mInputLength; - // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; - + // Still bundled members + unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove |