aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
authorSatoshi Kataoka <satok@google.com>2012-05-29 15:58:13 +0900
committerSatoshi Kataoka <satok@google.com>2012-05-29 17:21:59 +0900
commit2f854e170c9fde47cae804145f90d164cdb5ceb8 (patch)
treeb2229149083474bfadfb5d086e66055f9130a133 /native/jni/src
parentbd40b94f965b1602bea35fd92f2f05d524f9ab3b (diff)
downloadlatinime-2f854e170c9fde47cae804145f90d164cdb5ceb8.tar.gz
latinime-2f854e170c9fde47cae804145f90d164cdb5ceb8.tar.xz
latinime-2f854e170c9fde47cae804145f90d164cdb5ceb8.zip
Add a JNI to get the frequency
Bug: 4192129 Change-Id: I3f220f5a10114d4eb23956148076cf76220bda0f
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/dictionary.cpp4
-rw-r--r--native/jni/src/dictionary.h2
-rw-r--r--native/jni/src/unigram_dictionary.cpp17
-rw-r--r--native/jni/src/unigram_dictionary.h2
4 files changed, 19 insertions, 6 deletions
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 65d0f73a3..1fb02478b 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -55,8 +55,8 @@ Dictionary::~Dictionary() {
delete mBigramDictionary;
}
-bool Dictionary::isValidWord(const int32_t *word, int length) {
- return mUnigramDictionary->isValidWord(word, length);
+int Dictionary::getFrequency(const int32_t *word, int length) {
+ return mUnigramDictionary->getFrequency(word, length);
}
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index 87891ee4d..9f2367904 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -52,7 +52,7 @@ class Dictionary {
maxWordLength, maxBigrams);
}
- bool isValidWord(const int32_t *word, int length);
+ int getFrequency(const int32_t *word, int length);
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
void *getDict() { return (void *)mDict; }
int getDictSize() { return mDictSize; }
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 828582848..efe9c4fe3 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -747,8 +747,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
return maxFreq;
}
-bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
- return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
+int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const {
+ const uint8_t* const root = DICT_ROOT;
+ int pos = BinaryFormat::getTerminalPosition(root, inWord, length);
+ if (NOT_VALID_WORD == pos) {
+ return NOT_A_PROBABILITY;
+ }
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
+ if (hasMultipleChars) {
+ pos = BinaryFormat::skipOtherCharacters(root, pos);
+ } else {
+ BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
+ }
+ const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
+ return unigramFreq;
}
// TODO: remove this function.
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index b9233518f..b70894004 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -72,7 +72,7 @@ class UnigramDictionary {
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
- bool isValidWord(const int32_t* const inWord, const int length) const;
+ int getFrequency(const int32_t* const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
Correction *correction, const int *xcoordinates, const int *ycoordinates,