diff options
Diffstat (limited to 'native/src')
-rw-r--r-- | native/src/bigram_dictionary.h | 4 | ||||
-rw-r--r-- | native/src/binary_format.h | 4 | ||||
-rw-r--r-- | native/src/correction.h | 8 | ||||
-rw-r--r-- | native/src/dictionary.h | 4 | ||||
-rw-r--r-- | native/src/proximity_info.h | 4 | ||||
-rw-r--r-- | native/src/terminal_attributes.h | 78 | ||||
-rw-r--r-- | native/src/unigram_dictionary.cpp | 26 | ||||
-rw-r--r-- | native/src/unigram_dictionary.h | 11 | ||||
-rw-r--r-- | native/src/words_priority_queue.h | 5 | ||||
-rw-r--r-- | native/src/words_priority_queue_pool.h | 5 |
10 files changed, 123 insertions, 26 deletions
diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h index c07458a38..585a1866a 100644 --- a/native/src/bigram_dictionary.h +++ b/native/src/bigram_dictionary.h @@ -21,14 +21,14 @@ namespace latinime { class Dictionary; class BigramDictionary { -public: + public: BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives, const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary); int getBigrams(unsigned short *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams, int maxAlternatives); ~BigramDictionary(); -private: + private: bool addWordBigram(unsigned short *word, int length, int frequency); int getBigramAddress(int *pos, bool advance); int getBigramFreq(int *pos); diff --git a/native/src/binary_format.h b/native/src/binary_format.h index cbaccb295..9944fa2bd 100644 --- a/native/src/binary_format.h +++ b/native/src/binary_format.h @@ -22,12 +22,12 @@ namespace latinime { class BinaryFormat { -private: + private: const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; -public: + public: const static int UNKNOWN_FORMAT = -1; const static int FORMAT_VERSION_1 = 1; const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1; diff --git a/native/src/correction.h b/native/src/correction.h index e55be8dd6..9ba472955 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -27,8 +27,7 @@ namespace latinime { class ProximityInfo; class Correction { - -public: + public: typedef enum { TRAVERSE_ALL_ON_TERMINAL, TRAVERSE_ALL_NOT_ON_TERMINAL, @@ -95,7 +94,8 @@ public: inline int getTreeParentIndex(const int index) const { return mCorrectionStates[index].mParentIndex; } -private: + + private: inline void incrementInputIndex(); inline void incrementOutputIndex(); inline bool needsToTraverseAllNodes(); @@ -154,7 +154,7 @@ private: bool mSkipping; class RankingAlgorithm { - public: + public: static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction); static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 52048ecca..79d377a4f 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -28,7 +28,7 @@ namespace latinime { class Dictionary { -public: + public: Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives); @@ -67,7 +67,7 @@ public: const int pos, unsigned short *c, int *childrenPosition, bool *terminal, int *freq); -private: + private: bool hasBigram(); const unsigned char *mDict; diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h index 832db1062..9ca5505a7 100644 --- a/native/src/proximity_info.h +++ b/native/src/proximity_info.h @@ -26,7 +26,7 @@ namespace latinime { class Correction; class ProximityInfo { -public: + public: static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; @@ -68,7 +68,7 @@ public: return mTouchPositionCorrectionEnabled; } -private: + private: // The max number of the keys in one keyboard layout static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64; // The upper limit of the char code in mCodeToKeyIndex diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h new file mode 100644 index 000000000..1f9815936 --- /dev/null +++ b/native/src/terminal_attributes.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TERMINAL_ATTRIBUTES_H +#define LATINIME_TERMINAL_ATTRIBUTES_H + +#include "unigram_dictionary.h" + +namespace latinime { + +/** + * This class encapsulates information about a terminal that allows to + * retrieve local node attributes like the list of shortcuts without + * exposing the format structure to the client. + */ +class TerminalAttributes { + public: + class ShortcutIterator { + const uint8_t* const mDict; + bool mHasNextShortcutTarget; + int mPos; + + public: + ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict), + mPos(pos) { + mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS)); + } + + inline bool hasNextShortcutTarget() const { + return mHasNextShortcutTarget; + } + + // Gets the shortcut target itself as a uint16_t string. For parameters and return value + // see BinaryFormat::getWordAtAddress. + inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) { + const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); + mHasNextShortcutTarget = + 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT); + int shortcutAddress = + BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos); + return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord); + } + }; + + private: + const uint8_t* const mDict; + const uint8_t mFlags; + const int mStartPos; + + public: + TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) : + mDict(dict), mFlags(flags), mStartPos(pos) { + } + + inline bool isShortcutOnly() const { + return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY); + } + + inline ShortcutIterator getShortcutIterator() const { + return ShortcutIterator(mDict, mStartPos, mFlags); + } +}; +} // namespace latinime + +#endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 7c3c35e40..e95e03ce5 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -25,6 +25,7 @@ #include "unigram_dictionary.h" #include "binary_format.h" +#include "terminal_attributes.h" namespace latinime { @@ -324,13 +325,28 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons correction, queuePool); } -inline void UnigramDictionary::onTerminal( - const int freq, Correction *correction, WordsPriorityQueue *queue) { +inline void UnigramDictionary::onTerminal(const int freq, + const TerminalAttributes& terminalAttributes, Correction *correction, + WordsPriorityQueue *queue) { int wordLength; unsigned short* wordPointer; const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { - addWord(wordPointer, wordLength, finalFreq, queue); + if (!terminalAttributes.isShortcutOnly()) { + addWord(wordPointer, wordLength, finalFreq, queue); + } + TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); + while (iterator.hasNextShortcutTarget()) { + // TODO: addWord only supports weak ordering, meaning we have no means to control the + // order of the shortcuts relative to one another or to the word. We need to either + // modulate the frequency of each shortcut according to its own shortcut frequency or + // to make the queue so that the insert order is protected inside the queue for words + // with the same score. + uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + const int shortcutTargetStringLength = iterator.getNextShortcutTarget( + MAX_WORD_LENGTH_INTERNAL, shortcutTarget); + addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue); + } } } @@ -646,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); - onTerminal(freq, correction, queue); + TerminalAttributes terminalAttributes(DICT_ROOT, flags, + BinaryFormat::skipFrequency(flags, pos)); + onTerminal(freq, terminalAttributes, correction, queue); } // If there are more chars in this node, then this virtual node has children. diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index afe92e5b9..23581425a 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -27,10 +27,9 @@ namespace latinime { +class TerminalAttributes; class UnigramDictionary { - -public: - + public: // Mask and flags for children address type selection. static const int MASK_GROUP_ADDRESS_TYPE = 0xC0; static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; @@ -83,8 +82,7 @@ public: unsigned short *outWords, int *frequencies); virtual ~UnigramDictionary(); -private: - + private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, const int flags, Correction *correction, WordsPriorityQueuePool *queuePool); @@ -115,7 +113,8 @@ private: const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool); - void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue); + void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, + Correction *correction, WordsPriorityQueue *queue); bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h index 2d6270977..84f2523c2 100644 --- a/native/src/words_priority_queue.h +++ b/native/src/words_priority_queue.h @@ -24,7 +24,7 @@ namespace latinime { class WordsPriorityQueue { -public: + public: class SuggestedWord { public: int mScore; @@ -126,7 +126,8 @@ public: mSuggestions.pop(); } } -private: + + private: struct wordComparator { bool operator ()(SuggestedWord * left, SuggestedWord * right) { return left->mScore > right->mScore; diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h index d964bfc3b..386297650 100644 --- a/native/src/words_priority_queue_pool.h +++ b/native/src/words_priority_queue_pool.h @@ -22,7 +22,7 @@ namespace latinime { class WordsPriorityQueuePool { -public: + public: WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength); mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength); @@ -43,7 +43,8 @@ public: WordsPriorityQueue* getSubQueue2() { return mSubQueue2; } -private: + + private: WordsPriorityQueue *mMasterQueue; WordsPriorityQueue *mSubQueue1; WordsPriorityQueue *mSubQueue2; |