aboutsummaryrefslogtreecommitdiffstats
path: root/native/src/dictionary.cpp
diff options
context:
space:
mode:
authorsatok <satok@google.com>2010-12-01 23:18:39 -0800
committerAndroid (Google) Code Review <android-gerrit@google.com>2010-12-01 23:18:39 -0800
commit9fee221248cfdb61843a8db9ab91ea56498d58f7 (patch)
treec60a2e781761611ae0647f1f6e89a3017030439d /native/src/dictionary.cpp
parent9b89f6571b2fbd36be1e2492626c11d829c2ae3d (diff)
parente808e436cbd6f1aeadb5d61f354d03c3c50872a7 (diff)
downloadlatinime-9fee221248cfdb61843a8db9ab91ea56498d58f7.tar.gz
latinime-9fee221248cfdb61843a8db9ab91ea56498d58f7.tar.xz
latinime-9fee221248cfdb61843a8db9ab91ea56498d58f7.zip
Merge "Refactor: Move utility functions and no suggestion functions from unigram_dictionary.cpp to dictionary.cpp"
Diffstat (limited to 'native/src/dictionary.cpp')
-rw-r--r--native/src/dictionary.cpp58
1 files changed, 56 insertions, 2 deletions
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index a21b80a48..cf050fd30 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -17,15 +17,22 @@
#include <stdio.h>
+#define LOG_TAG "LatinIME: dictionary.cpp"
+
#include "dictionary.h"
namespace latinime {
Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier,
int maxWordLength, int maxWords, int maxAlternatives)
+ : DICT((unsigned char*) dict),
+ // Checks whether it has the latest dictionary or the old dictionary
+ IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN)
{
- mUnigramDictionary = new UnigramDictionary(dict, typedLetterMultiplier, fullWordMultiplier,
- maxWordLength, maxWords, maxAlternatives, this);
+ LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
+ mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier,
+ maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION,
+ hasBigram(), this);
mBigramDictionary = new BigramDictionary(dict, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives, this);
}
@@ -35,4 +42,51 @@ Dictionary::~Dictionary()
delete mUnigramDictionary;
delete mBigramDictionary;
}
+
+bool Dictionary::hasBigram() {
+ return ((DICT[1] & 0xFF) == 1);
+}
+
+// TODO: use uint16_t instead of unsigned short
+bool Dictionary::isValidWord(unsigned short *word, int length)
+{
+ if (IS_LATEST_DICT_VERSION) {
+ return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
+ } else {
+ return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
+ }
+}
+
+int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
+ // returns address of bigram data of that word
+ // return -99 if not found
+
+ int count = Dictionary::getCount(DICT, &pos);
+ unsigned short currentChar = (unsigned short) word[offset];
+ for (int j = 0; j < count; j++) {
+ unsigned short c = Dictionary::getChar(DICT, &pos);
+ int terminal = Dictionary::getTerminal(DICT, &pos);
+ int childPos = Dictionary::getAddress(DICT, &pos);
+ if (c == currentChar) {
+ if (offset == length - 1) {
+ if (terminal) {
+ return (pos+1);
+ }
+ } else {
+ if (childPos != 0) {
+ int t = isValidWordRec(childPos, word, offset + 1, length);
+ if (t > 0) {
+ return t;
+ }
+ }
+ }
+ }
+ if (terminal) {
+ Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
+ }
+ // There could be two instances of each alphabet - upper and lower case. So continue
+ // looking ...
+ }
+ return NOT_VALID_WORD;
+}
} // namespace latinime