aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp2
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp58
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h7
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
5 files changed, 23 insertions, 55 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6e1b80ee0..8b46c2644 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
scores, spaceIndices, outputTypes);
} else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
- inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
+ outputCodePoints, scores, outputTypes);
}
// Copy back the output values
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 532c769c6..3751ae500 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -87,21 +87,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
/* Parameters :
* prevWord: the word before, the one for which we need to look up bigrams.
* prevWordLength: its length.
- * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
- * inputSize: the size of the codes array.
- * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
- * bigramProbability: an array to output frequencies.
+ * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
+ * outBigramProbability: an array to output frequencies.
* outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility.
- * Note: this is not the number of bigrams output in the array, which is the number of
- * bigrams this word has WHOSE first letter also matches the letter the user typed.
- * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
- * used to match the first letter of the second word, but once the user has typed more
- * and the bigrams are used to boost unigram result scores, it makes little sense to
- * reduce their scope to the ones that match the first letter.
*/
-int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints,
- int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
+int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
+ int *const outBigramCodePoints, int *const outBigramProbability,
+ int *const outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
@@ -126,21 +119,16 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
getCodePointsAndProbabilityAndReturnCodePointCount(
mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
bigramBuffer, &unigramProbability);
-
- // inputSize == 0 means we are trying to find bigram predictions.
- if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
- const int bigramProbabilityTemp = bigramsIt.getProbability();
- // Due to space constraints, the probability for bigrams is approximate - the lower the
- // unigram probability, the worse the precision. The theoritical maximum error in
- // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
- // in very bad cases. This means that sometimes, we'll see some bigrams interverted
- // here, but it can't get too bad.
- const int probability = ProbabilityUtils::computeProbabilityForBigram(
- unigramProbability, bigramProbabilityTemp);
- addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
- outputTypes);
- ++bigramCount;
- }
+ // Due to space constraints, the probability for bigrams is approximate - the lower the
+ // unigram probability, the worse the precision. The theoritical maximum error in
+ // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
+ // in very bad cases. This means that sometimes, we'll see some bigrams interverted
+ // here, but it can't get too bad.
+ const int probability = ProbabilityUtils::computeProbabilityForBigram(
+ unigramProbability, bigramsIt.getProbability());
+ addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
+ outputTypes);
+ ++bigramCount;
}
return min(bigramCount, MAX_RESULTS);
}
@@ -157,22 +145,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
mBinaryDictionaryInfo, pos);
}
-bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
- // Checks whether this word starts with same character or neighboring characters of
- // what user typed.
-
- int maxAlt = MAX_ALTERNATIVES;
- const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
- while (maxAlt > 0) {
- if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
- return true;
- }
- inputCodePoints++;
- maxAlt--;
- }
- return false;
-}
-
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
int length1) const {
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 7706a2c22..438c34cac 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -27,8 +27,8 @@ class BigramDictionary {
public:
BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);
- int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize,
- int *outWords, int *frequencies, int *outputTypes) const;
+ int getPredictions(const int *word, int length, int *outBigramCodePoints,
+ int *outBigramProbability, int *outputTypes) const;
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
@@ -37,13 +37,10 @@ class BigramDictionary {
void addWordBigram(int *word, int length, int probability, int *bigramProbability,
int *bigramCodePoints, int *outputTypes) const;
- bool checkFirstCharacter(int *word, int *inputCodePoints) const;
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- // TODO: Re-implement proximity correction for bigram correction
- static const int MAX_ALTERNATIVES = 1;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 4f5d29f6a..4a9e38fe8 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -76,11 +76,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
}
}
-int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize,
- int *outWords, int *frequencies, int *outputTypes) const {
+int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
+ int *outputTypes) const {
if (length <= 0) return 0;
- return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords,
- frequencies, outputTypes);
+ return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
}
int Dictionary::getProbability(const int *word, int length) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 1bf24a85b..9f1e0729d 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -62,8 +62,8 @@ class Dictionary {
const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
int *spaceIndices, int *outputTypes) const;
- int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
- int *frequencies, int *outputTypes) const;
+ int getBigrams(const int *word, int length, int *outWords, int *frequencies,
+ int *outputTypes) const;
int getProbability(const int *word, int length) const;