aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/src/bigram_dictionary.h4
-rw-r--r--native/src/binary_format.h4
-rw-r--r--native/src/correction.h8
-rw-r--r--native/src/dictionary.h4
-rw-r--r--native/src/proximity_info.h4
-rw-r--r--native/src/terminal_attributes.h78
-rw-r--r--native/src/unigram_dictionary.cpp26
-rw-r--r--native/src/unigram_dictionary.h11
-rw-r--r--native/src/words_priority_queue.h5
-rw-r--r--native/src/words_priority_queue_pool.h5
10 files changed, 123 insertions, 26 deletions
diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h
index c07458a38..585a1866a 100644
--- a/native/src/bigram_dictionary.h
+++ b/native/src/bigram_dictionary.h
@@ -21,14 +21,14 @@ namespace latinime {
class Dictionary;
class BigramDictionary {
-public:
+ public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives,
const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
~BigramDictionary();
-private:
+ private:
bool addWordBigram(unsigned short *word, int length, int frequency);
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index cbaccb295..9944fa2bd 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -22,12 +22,12 @@
namespace latinime {
class BinaryFormat {
-private:
+ private:
const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-public:
+ public:
const static int UNKNOWN_FORMAT = -1;
const static int FORMAT_VERSION_1 = 1;
const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1;
diff --git a/native/src/correction.h b/native/src/correction.h
index e55be8dd6..9ba472955 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -27,8 +27,7 @@ namespace latinime {
class ProximityInfo;
class Correction {
-
-public:
+ public:
typedef enum {
TRAVERSE_ALL_ON_TERMINAL,
TRAVERSE_ALL_NOT_ON_TERMINAL,
@@ -95,7 +94,8 @@ public:
inline int getTreeParentIndex(const int index) const {
return mCorrectionStates[index].mParentIndex;
}
-private:
+
+ private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline bool needsToTraverseAllNodes();
@@ -154,7 +154,7 @@ private:
bool mSkipping;
class RankingAlgorithm {
- public:
+ public:
static int calculateFinalFreq(const int inputIndex, const int depth,
const int freq, int *editDistanceTable, const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 52048ecca..79d377a4f 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -28,7 +28,7 @@
namespace latinime {
class Dictionary {
-public:
+ public:
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
@@ -67,7 +67,7 @@ public:
const int pos, unsigned short *c, int *childrenPosition,
bool *terminal, int *freq);
-private:
+ private:
bool hasBigram();
const unsigned char *mDict;
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 832db1062..9ca5505a7 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -26,7 +26,7 @@ namespace latinime {
class Correction;
class ProximityInfo {
-public:
+ public:
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
@@ -68,7 +68,7 @@ public:
return mTouchPositionCorrectionEnabled;
}
-private:
+ private:
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex
diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h
new file mode 100644
index 000000000..1f9815936
--- /dev/null
+++ b/native/src/terminal_attributes.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
+#define LATINIME_TERMINAL_ATTRIBUTES_H
+
+#include "unigram_dictionary.h"
+
+namespace latinime {
+
+/**
+ * This class encapsulates information about a terminal that allows to
+ * retrieve local node attributes like the list of shortcuts without
+ * exposing the format structure to the client.
+ */
+class TerminalAttributes {
+ public:
+ class ShortcutIterator {
+ const uint8_t* const mDict;
+ bool mHasNextShortcutTarget;
+ int mPos;
+
+ public:
+ ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict),
+ mPos(pos) {
+ mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
+ }
+
+ inline bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // see BinaryFormat::getWordAtAddress.
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
+ const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
+ mHasNextShortcutTarget =
+ 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
+ int shortcutAddress =
+ BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos);
+ return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord);
+ }
+ };
+
+ private:
+ const uint8_t* const mDict;
+ const uint8_t mFlags;
+ const int mStartPos;
+
+ public:
+ TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) :
+ mDict(dict), mFlags(flags), mStartPos(pos) {
+ }
+
+ inline bool isShortcutOnly() const {
+ return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY);
+ }
+
+ inline ShortcutIterator getShortcutIterator() const {
+ return ShortcutIterator(mDict, mStartPos, mFlags);
+ }
+};
+} // namespace latinime
+
+#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 7c3c35e40..e95e03ce5 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -25,6 +25,7 @@
#include "unigram_dictionary.h"
#include "binary_format.h"
+#include "terminal_attributes.h"
namespace latinime {
@@ -324,13 +325,28 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons
correction, queuePool);
}
-inline void UnigramDictionary::onTerminal(
- const int freq, Correction *correction, WordsPriorityQueue *queue) {
+inline void UnigramDictionary::onTerminal(const int freq,
+ const TerminalAttributes& terminalAttributes, Correction *correction,
+ WordsPriorityQueue *queue) {
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) {
- addWord(wordPointer, wordLength, finalFreq, queue);
+ if (!terminalAttributes.isShortcutOnly()) {
+ addWord(wordPointer, wordLength, finalFreq, queue);
+ }
+ TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means to control the
+ // order of the shortcuts relative to one another or to the word. We need to either
+ // modulate the frequency of each shortcut according to its own shortcut frequency or
+ // to make the queue so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ }
}
}
@@ -646,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(freq, correction, queue);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags,
+ BinaryFormat::skipFrequency(flags, pos));
+ onTerminal(freq, terminalAttributes, correction, queue);
}
// If there are more chars in this node, then this virtual node has children.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index afe92e5b9..23581425a 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -27,10 +27,9 @@
namespace latinime {
+class TerminalAttributes;
class UnigramDictionary {
-
-public:
-
+ public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
@@ -83,8 +82,7 @@ public:
unsigned short *outWords, int *frequencies);
virtual ~UnigramDictionary();
-private:
-
+ private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int inputLength,
const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
@@ -115,7 +113,8 @@ private:
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction,
WordsPriorityQueuePool* queuePool);
- void onTerminal(const int freq, Correction *correction, WordsPriorityQueue *queue);
+ void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
+ Correction *correction, WordsPriorityQueue *queue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index 2d6270977..84f2523c2 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@@ -24,7 +24,7 @@
namespace latinime {
class WordsPriorityQueue {
-public:
+ public:
class SuggestedWord {
public:
int mScore;
@@ -126,7 +126,8 @@ public:
mSuggestions.pop();
}
}
-private:
+
+ private:
struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
return left->mScore > right->mScore;
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
index d964bfc3b..386297650 100644
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@@ -22,7 +22,7 @@
namespace latinime {
class WordsPriorityQueuePool {
-public:
+ public:
WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
@@ -43,7 +43,8 @@ public:
WordsPriorityQueue* getSubQueue2() {
return mSubQueue2;
}
-private:
+
+ private:
WordsPriorityQueue *mMasterQueue;
WordsPriorityQueue *mSubQueue1;
WordsPriorityQueue *mSubQueue2;