aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/Android.mk1
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp3
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp45
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h67
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp68
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h90
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_format.h60
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp2
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h60
10 files changed, 299 insertions, 101 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 1cdfbe4d1..9db50473d 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -53,6 +53,7 @@ LATIN_IME_CORE_SRC_FILES := \
dic_nodes_cache.cpp) \
$(addprefix suggest/core/dictionary/, \
bigram_dictionary.cpp \
+ binary_dictionary_bigrams_reading_utils.cpp \
binary_dictionary_format_utils.cpp \
binary_dictionary_header.cpp \
binary_dictionary_header_reading_utils.cpp \
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 3deee1a42..f0f26c72b 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -233,8 +233,7 @@ namespace latinime {
return multiBigramMap->getBigramProbability(
binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability);
}
- return BinaryFormat::getBigramProbability(
- binaryDictionaryInfo->getDictRoot(), prevWordPos, wordPos, unigramProbability);
+ return ProbabilityUtils::backoff(unigramProbability);
}
///////////////////////////////////////
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 53e2df62d..6e02100fc 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -21,6 +21,7 @@
#include "bigram_dictionary.h"
#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/dictionary.h"
@@ -100,12 +101,11 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
* and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter.
*/
-int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints,
+int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints,
int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
- const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
@@ -116,21 +116,20 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
}
// If still no bigrams, we really don't have them!
if (0 == pos) return 0;
- uint8_t bigramFlags;
+
int bigramCount = 0;
- do {
- bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- int bigramBuffer[MAX_WORD_LENGTH];
- int unigramProbability = 0;
- const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
- &pos);
- const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
- bigramBuffer, &unigramProbability);
+ int unigramProbability = 0;
+ int bigramBuffer[MAX_WORD_LENGTH];
+ for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
+ bigramsIt.hasNext(); /* no-op */) {
+ bigramsIt.next();
+ const int length = BinaryFormat::getWordAtAddress(
+ mBinaryDictionaryInfo->getDictRoot(), bigramsIt.getBigramPos(),
+ MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
// inputSize == 0 means we are trying to find bigram predictions.
if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
- const int bigramProbabilityTemp =
- BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
+ const int bigramProbabilityTemp = bigramsIt.getProbability();
// Due to space constraints, the probability for bigrams is approximate - the lower the
// unigram probability, the worse the precision. The theoritical maximum error in
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
@@ -142,7 +141,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
outputTypes);
++bigramCount;
}
- } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
+ }
return min(bigramCount, MAX_RESULTS);
}
@@ -187,22 +186,20 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons
bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2,
int length2) const {
- const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;
- int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2,
- false /* forceLowerCaseSearch */);
+ int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(),
+ word2, length2, false /* forceLowerCaseSearch */);
if (NOT_VALID_WORD == nextWordPos) return false;
- uint8_t bigramFlags;
- do {
- bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
- &pos);
- if (bigramPos == nextWordPos) {
+
+ for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
+ bigramsIt.hasNext(); /* no-op */) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPos) {
return true;
}
- } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
+ }
return false;
}
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 06d0e9da3..7706a2c22 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -27,8 +27,8 @@ class BigramDictionary {
public:
BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);
- int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
- int *frequencies, int *outputTypes) const;
+ int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize,
+ int *outWords, int *frequencies, int *outputTypes) const;
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
new file mode 100644
index 000000000..0856840b2
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
+#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+
+namespace latinime {
+
+class BinaryDictionaryBigramsIterator {
+ public:
+ BinaryDictionaryBigramsIterator(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
+ mBigramPos(0), mHasNext(true) {}
+
+ AK_FORCE_INLINE bool hasNext() const {
+ return mHasNext;
+ }
+
+ AK_FORCE_INLINE void next() {
+ mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer(
+ mBinaryDictionaryInfo, &mPos);
+ mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
+ mBinaryDictionaryInfo, mBigramFlags, &mPos);
+ mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags);
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags);
+ }
+
+ AK_FORCE_INLINE int getBigramPos() const {
+ return mBigramPos;
+ }
+
+ AK_FORCE_INLINE int getFlags() const {
+ return mBigramFlags;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);
+
+ const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ int mPos;
+ BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags;
+ int mBigramPos;
+ bool mHasNext;
+};
+} // namespace latinime
+#endif // LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
new file mode 100644
index 000000000..78a54b141
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
+
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+// Flag for presence of more attributes
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+// Mask for attribute probability, stored on 4 bits inside the flags byte.
+const BinaryDictionaryBigramsReadingUtils::BigramFlags
+ BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int BinaryDictionaryBigramsReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+
+/* static */ int BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
+ int *const pos) {
+ int offset = 0;
+ const int origin = *pos;
+ switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ break;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ break;
+ }
+ if (isOffsetNegative(flags)) {
+ return origin - offset;
+ } else {
+ return origin + offset;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
new file mode 100644
index 000000000..e71f2a17a
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
+#define LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+class BinaryDictionaryBigramsReadingUtils {
+ public:
+ typedef uint8_t BigramFlags;
+
+ static AK_FORCE_INLINE void skipExistingBigrams(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+ BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
+ while (hasNext(flags)) {
+ *pos += attributeAddressSize(flags);
+ flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
+ }
+ *pos += attributeAddressSize(flags);
+ }
+
+ static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+ return ByteArrayUtils::readUint8andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos);
+ }
+
+ static AK_FORCE_INLINE int getBigramProbability(const BigramFlags flags) {
+ return flags & MASK_ATTRIBUTE_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ }
+
+ static int getBigramAddressAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const BigramFlags flags, int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryBigramsReadingUtils);
+
+ static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int ATTRIBUTE_ADDRESS_SHIFT;
+
+ static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+ return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
+ /* Note: this is a value-dependant optimization of what may probably be
+ more readably written this way:
+ switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
+ default: return 0;
+ }
+ */
+ }
+};
+}
+#endif /* LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h
index 0a290d80a..df0ec480d 100644
--- a/native/jni/src/suggest/core/dictionary/binary_format.h
+++ b/native/jni/src/suggest/core/dictionary/binary_format.h
@@ -21,7 +21,6 @@
#include "suggest/core/dictionary/probability_utils.h"
#include "utils/char_utils.h"
-#include "utils/hash_map_compat.h"
namespace latinime {
@@ -81,16 +80,10 @@ class BinaryFormat {
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
int *outWord, int *outUnigramProbability);
- static int getBigramProbabilityFromHashMap(const int position,
- const hash_map_compat<int, int> *bigramMap, const int unigramProbability);
- static void fillBigramProbabilityToHashMap(const uint8_t *const root, int position,
- hash_map_compat<int, int> *bigramMap);
- static int getBigramProbability(const uint8_t *const root, int position,
- const int nextPosition, const int unigramProbability);
+ static int getBigramListPositionForWordPosition(const uint8_t *const root, int position);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
- static int getBigramListPositionForWordPosition(const uint8_t *const root, int position);
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
@@ -516,57 +509,6 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
return 0;
}
-// This returns a probability in log space.
-inline int BinaryFormat::getBigramProbabilityFromHashMap(const int position,
- const hash_map_compat<int, int> *bigramMap, const int unigramProbability) {
- if (!bigramMap) {
- return ProbabilityUtils::backoff(unigramProbability);
- }
- const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = bigramMap->find(position);
- if (bigramProbabilityIt != bigramMap->end()) {
- const int bigramProbability = bigramProbabilityIt->second;
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, bigramProbability);
- }
- return ProbabilityUtils::backoff(unigramProbability);
-}
-
-AK_FORCE_INLINE void BinaryFormat::fillBigramProbabilityToHashMap(
- const uint8_t *const root, int position, hash_map_compat<int, int> *bigramMap) {
- position = getBigramListPositionForWordPosition(root, position);
- if (0 == position) return;
-
- uint8_t bigramFlags;
- do {
- bigramFlags = getFlagsAndForwardPointer(root, &position);
- const int probability = MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
- const int bigramPos = getAttributeAddressAndForwardPointer(root, bigramFlags,
- &position);
- (*bigramMap)[bigramPos] = probability;
- } while (FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
-}
-
-AK_FORCE_INLINE int BinaryFormat::getBigramProbability(const uint8_t *const root, int position,
- const int nextPosition, const int unigramProbability) {
- position = getBigramListPositionForWordPosition(root, position);
- if (0 == position) {
- return ProbabilityUtils::backoff(unigramProbability);
- }
-
- uint8_t bigramFlags;
- do {
- bigramFlags = getFlagsAndForwardPointer(root, &position);
- const int bigramPos = getAttributeAddressAndForwardPointer(
- root, bigramFlags, &position);
- if (bigramPos == nextPosition) {
- const int bigramProbability = MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
- return ProbabilityUtils::computeProbabilityForBigram(
- unigramProbability, bigramProbability);
- }
- } while (FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
- return ProbabilityUtils::backoff(unigramProbability);
-}
-
-// Returns a pointer to the start of the bigram list.
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
const uint8_t *const root, int position) {
if (NOT_VALID_WORD == position) return 0;
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 561e22d2d..27b052b7e 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -79,7 +79,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize,
int *outWords, int *frequencies, int *outputTypes) const {
if (length <= 0) return 0;
- return mBigramDictionary->getBigrams(word, length, inputCodePoints, inputSize, outWords,
+ return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords,
frequencies, outputTypes);
}
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index ba97e5842..b380e9727 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -18,6 +18,7 @@
#define LATINIME_MULTI_BIGRAM_MAP_H
#include "defines.h"
+#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "utils/hash_map_compat.h"
@@ -34,7 +35,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
- int getBigramProbability(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
+ int getBigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition);
@@ -42,11 +43,11 @@ class MultiBigramMap {
return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(binaryDicitonaryInfo, wordPosition);
+ addBigramsForWordPosition(binaryDictionaryInfo, wordPosition);
return mBigramMaps[wordPosition].getBigramProbability(
nextWordPosition, unigramProbability);
}
- return BinaryFormat::getBigramProbability(binaryDicitonaryInfo->getDictRoot(),
+ return readBigramProbabilityFromBinaryDictionary(binaryDictionaryInfo,
wordPosition, nextWordPosition, unigramProbability);
}
@@ -62,15 +63,29 @@ class MultiBigramMap {
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP) {}
~BigramMap() {}
- void init(const BinaryDictionaryInfo *const binaryDicitonaryInfo, const int position) {
- BinaryFormat::fillBigramProbabilityToHashMap(
- binaryDicitonaryInfo->getDictRoot(), position, &mBigramMap);
+ void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
+ const int bigramsListPos = BinaryFormat::getBigramListPositionForWordPosition(
+ binaryDictionaryInfo->getDictRoot(), nodePos);
+ if (0 == bigramsListPos) {
+ return;
+ }
+ for (BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
+ bigramsIt.hasNext(); /* no-op */) {
+ bigramsIt.next();
+ mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
+ }
}
- inline int getBigramProbability(const int nextWordPosition, const int unigramProbability)
- const {
- return BinaryFormat::getBigramProbabilityFromHashMap(
- nextWordPosition, &mBigramMap, unigramProbability);
+ AK_FORCE_INLINE int getBigramProbability(
+ const int nextWordPosition, const int unigramProbability) const {
+ const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
+ mBigramMap.find(nextWordPosition);
+ if (bigramProbabilityIt != mBigramMap.end()) {
+ const int bigramProbability = bigramProbabilityIt->second;
+ return ProbabilityUtils::computeProbabilityForBigram(
+ unigramProbability, bigramProbability);
+ }
+ return ProbabilityUtils::backoff(unigramProbability);
}
private:
@@ -78,9 +93,28 @@ class MultiBigramMap {
hash_map_compat<int, int> mBigramMap;
};
- void addBigramsForWordPosition(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
- const int position) {
- mBigramMaps[position].init(binaryDicitonaryInfo, position);
+ AK_FORCE_INLINE void addBigramsForWordPosition(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int position) {
+ mBigramMaps[position].init(binaryDictionaryInfo, position);
+ }
+
+ AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
+ const int nextWordPosition, const int unigramProbability) {
+ const int bigramsListPos = BinaryFormat::getBigramListPositionForWordPosition(
+ binaryDictionaryInfo->getDictRoot(), nodePos);
+ if (0 == bigramsListPos) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ }
+ for (BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
+ bigramsIt.hasNext(); /* no-op */) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPosition) {
+ return ProbabilityUtils::computeProbabilityForBigram(
+ unigramProbability, bigramsIt.getProbability());
+ }
+ }
+ return ProbabilityUtils::backoff(unigramProbability);
}
hash_map_compat<int, BigramMap> mBigramMaps;