aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dictionary/src/dictionary.cpp19
-rw-r--r--dictionary/src/dictionary.h23
2 files changed, 29 insertions, 13 deletions
diff --git a/dictionary/src/dictionary.cpp b/dictionary/src/dictionary.cpp
index 6aecb6374..b37f4c926 100644
--- a/dictionary/src/dictionary.cpp
+++ b/dictionary/src/dictionary.cpp
@@ -85,10 +85,14 @@ int
Dictionary::getAddress(int *pos)
{
int address = 0;
- address += (mDict[*pos] & 0x7F) << 16;
- address += (mDict[*pos + 1] & 0xFF) << 8;
- address += (mDict[*pos + 2] & 0xFF);
- *pos += 3;
+ if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
+ *pos += 1;
+ } else {
+ address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
+ address += (mDict[*pos + 1] & 0xFF) << 8;
+ address += (mDict[*pos + 2] & 0xFF);
+ *pos += 3;
+ }
return address;
}
@@ -193,7 +197,8 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
unsigned short lowerC = toLowerCase(c, depth);
bool terminal = getTerminal(&pos);
int childrenAddress = getAddress(&pos);
- int freq = getFreq(&pos);
+ int freq = 1;
+ if (terminal) freq = getFreq(&pos);
// If we are only doing completions, no need to look at the typed characters.
if (completion) {
mWord[depth] = c;
@@ -266,7 +271,9 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length
}
}
}
- getFreq(&pos);
+ if (terminal) {
+ getFreq(&pos);
+ }
// There could be two instances of each alphabet - upper and lower case. So continue
// looking ...
}
diff --git a/dictionary/src/dictionary.h b/dictionary/src/dictionary.h
index 8574e0736..b13e97795 100644
--- a/dictionary/src/dictionary.h
+++ b/dictionary/src/dictionary.h
@@ -19,35 +19,44 @@
namespace latinime {
+// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
+#define ADDRESS_MASK 0x3FFFFF
+
+// The bit that decides if an address follows in the next 22 bits
+#define FLAG_ADDRESS_MASK 0x40
+// The bit that decides if this is a terminal node for a word. The node could still have children,
+// if the word has other endings.
+#define FLAG_TERMINAL_MASK 0x80
+
class Dictionary {
public:
Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
- int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
+ int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxWords, int maxAlternatives);
bool isValidWord(unsigned short *word, int length);
void setAsset(void *asset) { mAsset = asset; }
void *getAsset() { return mAsset; }
~Dictionary();
-
+
private:
int getAddress(int *pos);
- bool getTerminal(int *pos) { return (mDict[*pos] & 0x80) > 0; }
+ bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; }
int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
unsigned short getChar(int *pos);
int wideStrLen(unsigned short *str);
-
+
bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c, int depth);
- void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
+ void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
int inputIndex);
bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
unsigned char *mDict;
void *mAsset;
-
+
int *mFrequencies;
int mMaxWords;
int mMaxWordLength;
@@ -57,7 +66,7 @@ private:
int mInputLength;
int mMaxAlternatives;
unsigned short mWord[128];
-
+
int mFullWordMultiplier;
int mTypedLetterMultiplier;
};