aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/HostUnitTests.mk2
-rw-r--r--native/jni/NativeFileList.mk1
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp105
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp108
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h47
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp44
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
-rw-r--r--native/jni/src/suggest/core/session/prev_words_info.h47
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/utils/jni_data_utils.h32
10 files changed, 162 insertions, 232 deletions
diff --git a/native/jni/HostUnitTests.mk b/native/jni/HostUnitTests.mk
index 572d36564..6967d9b87 100644
--- a/native/jni/HostUnitTests.mk
+++ b/native/jni/HostUnitTests.mk
@@ -44,7 +44,7 @@ LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
LOCAL_MODULE := liblatinime_host_unittests
LOCAL_MODULE_TAGS := tests
LOCAL_SRC_FILES := $(addprefix $(LATIN_IME_TEST_SRC_DIR)/, $(LATIN_IME_CORE_TEST_FILES))
-LOCAL_STATIC_LIBRARIES += liblatinime_host_static_for_unittests libgtest_host libgtest_main_host
+LOCAL_STATIC_LIBRARIES += liblatinime_host_static_for_unittests
include $(BUILD_HOST_NATIVE_TEST)
endif # Darwin - TODO: Remove this
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index 2dd75c4f5..fe2106140 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -26,7 +26,6 @@ LATIN_IME_CORE_SRC_FILES := \
dic_node_utils.cpp \
dic_nodes_cache.cpp) \
$(addprefix suggest/core/dictionary/, \
- bigram_dictionary.cpp \
dictionary.cpp \
dictionary_utils.cpp \
digraph_utils.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 2654a4a0a..22ad2d0ab 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -178,7 +178,7 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
- jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
+ jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
jintArray outSpaceIndicesArray, jintArray outTypesArray,
jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
@@ -201,20 +201,11 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
int pointerIds[inputSize];
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
int inputCodePoints[inputCodePointsLength];
- const jsize prevWordCodePointsLength =
- prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0;
- int prevWordCodePointsInternal[prevWordCodePointsLength];
- int *prevWordCodePoints = nullptr;
env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
env->GetIntArrayRegion(timesArray, 0, inputSize, times);
env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
- if (prevWordCodePointsForBigrams) {
- env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength,
- prevWordCodePointsInternal);
- prevWordCodePoints = prevWordCodePointsInternal;
- }
const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
int options[numberOfOptions];
@@ -248,8 +239,8 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
float languageWeight;
env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
SuggestionResults suggestionResults(MAX_RESULTS);
- const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength,
- isBeginningOfSentence);
+ const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ prevWordCodePointArrays, isBeginningOfSentenceArray);
if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
@@ -283,18 +274,17 @@ static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
}
-static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
- jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
+static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
+ jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
+ jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
- const jsize word0Length = env->GetArrayLength(word0);
- const jsize word1Length = env->GetArrayLength(word1);
- int word0CodePoints[word0Length];
- int word1CodePoints[word1Length];
- env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
- env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
- return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
+ const jsize wordLength = env->GetArrayLength(word);
+ int wordCodePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
+ const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ prevWordCodePointArrays, isBeginningOfSentenceArray);
+ return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength);
}
// Method to iterate all words in the dictionary for makedict.
@@ -355,7 +345,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
outShortcutProbabilities);
}
-static bool latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
+static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
jint timestamp) {
@@ -378,7 +368,7 @@ static bool latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
-static bool latinime_BinaryDictionary_removeUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
+static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
@@ -390,42 +380,39 @@ static bool latinime_BinaryDictionary_removeUnigramWord(JNIEnv *env, jclass claz
return dictionary->removeUnigramEntry(codePoints, codePointCount);
}
-static bool latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
- jint timestamp) {
+static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
+ jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
+ jintArray word, jint probability, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
- jsize word0Length = env->GetArrayLength(word0);
- int word0CodePoints[word0Length];
- env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
- jsize word1Length = env->GetArrayLength(word1);
- int word1CodePoints[word1Length];
- env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
+ const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ prevWordCodePointArrays, isBeginningOfSentenceArray);
+ jsize wordLength = env->GetArrayLength(word);
+ int wordCodePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const std::vector<int> bigramTargetCodePoints(
- word1CodePoints, word1CodePoints + word1Length);
+ wordCodePoints, wordCodePoints + wordLength);
// Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
timestamp, 0 /* level */, 1 /* count */);
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
}
-static bool latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
+static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
+ jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
+ jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
- jsize word0Length = env->GetArrayLength(word0);
- int word0CodePoints[word0Length];
- env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
- jsize word1Length = env->GetArrayLength(word1);
- int word1CodePoints[word1Length];
- env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
- return dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
+ const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+ prevWordCodePointArrays, isBeginningOfSentenceArray);
+ jsize wordLength = env->GetArrayLength(word);
+ int wordCodePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
+ return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
}
// Returns how many language model params are processed.
@@ -672,7 +659,7 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getSuggestionsNative"),
- const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
+ const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
@@ -686,9 +673,9 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
},
{
- const_cast<char *>("getBigramProbabilityNative"),
- const_cast<char *>("(J[IZ[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
+ const_cast<char *>("getNgramProbabilityNative"),
+ const_cast<char *>("(J[[I[Z[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
},
{
const_cast<char *>("getWordPropertyNative"),
@@ -702,24 +689,24 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
},
{
- const_cast<char *>("addUnigramWordNative"),
+ const_cast<char *>("addUnigramEntryNative"),
const_cast<char *>("(J[II[IIZZZI)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
+ reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry)
},
{
- const_cast<char *>("removeUnigramWordNative"),
+ const_cast<char *>("removeUnigramEntryNative"),
const_cast<char *>("(J[I)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramWord)
+ reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry)
},
{
- const_cast<char *>("addBigramWordsNative"),
- const_cast<char *>("(J[IZ[III)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
+ const_cast<char *>("addNgramEntryNative"),
+ const_cast<char *>("(J[[I[Z[III)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
},
{
- const_cast<char *>("removeBigramWordsNative"),
- const_cast<char *>("(J[IZ[I)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
+ const_cast<char *>("removeNgramEntryNative"),
+ const_cast<char *>("(J[[I[Z[I)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
},
{
const_cast<char *>("addMultipleDictionaryEntriesNative"),
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
deleted file mode 100644
index 295e760d6..000000000
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2010, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define LOG_TAG "LatinIME: bigram_dictionary.cpp"
-
-#include "bigram_dictionary.h"
-
-#include <algorithm>
-#include <cstring>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
-#include "suggest/core/dictionary/dictionary.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/result/suggestion_results.h"
-#include "suggest/core/session/prev_words_info.h"
-#include "utils/char_utils.h"
-
-namespace latinime {
-
-BigramDictionary::BigramDictionary(
- const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
- : mDictionaryStructurePolicy(dictionaryStructurePolicy) {
- if (DEBUG_DICT) {
- AKLOGI("BigramDictionary - constructor");
- }
-}
-
-BigramDictionary::~BigramDictionary() {
-}
-
-/* Parameters :
- * prevWordsInfo: Information of previous words to get the predictions.
- * outSuggestionResults: SuggestionResults to put the predictions.
- */
-void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
- SuggestionResults *const outSuggestionResults) const {
- int unigramProbability = 0;
- int bigramCodePoints[MAX_WORD_LENGTH];
- BinaryDictionaryBigramsIterator bigramsIt =
- prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- const int codePointCount = mDictionaryStructurePolicy->
- getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
- MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
- if (codePointCount <= 0) {
- continue;
- }
- // Due to space constraints, the probability for bigrams is approximate - the lower the
- // unigram probability, the worse the precision. The theoritical maximum error in
- // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
- // in very bad cases. This means that sometimes, we'll see some bigrams interverted
- // here, but it can't get too bad.
- const int probability = mDictionaryStructurePolicy->getProbability(
- unigramProbability, bigramsIt.getProbability());
- outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
- }
-}
-
-// Returns a pointer to the start of the bigram list.
-// If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS.
-int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
- const bool forceLowerCaseSearch) const {
- if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
- forceLowerCaseSearch);
- if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
- return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
-}
-
-int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
- const int *word1, int length1) const {
- int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
- BinaryDictionaryBigramsIterator bigramsIt =
- prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPos
- && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
- return mDictionaryStructurePolicy->getProbability(
- mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
- bigramsIt.getProbability());
- }
- }
- return NOT_A_PROBABILITY;
-}
-
-// TODO: Move functions related to bigram to here
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
deleted file mode 100644
index bd3aed1bd..000000000
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_DICTIONARY_H
-#define LATINIME_BIGRAM_DICTIONARY_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class DictionaryStructureWithBufferPolicy;
-class PrevWordsInfo;
-class SuggestionResults;
-
-class BigramDictionary {
- public:
- BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
-
- void getPredictions(const PrevWordsInfo *const prevWordsInfo,
- SuggestionResults *const outSuggestionResults) const;
- int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
- const int *word1, int length1) const;
- ~BigramDictionary();
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
-
- int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
- const bool forceLowerCaseSearch) const;
-
- const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
-};
-} // namespace latinime
-#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 228260216..fb25f757c 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -23,6 +23,7 @@
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
@@ -37,7 +38,6 @@ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
- mBigramDictionary(mDictionaryStructureWithBufferPolicy.get()),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
logDictionaryInfo(env);
@@ -62,7 +62,29 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
- mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
+ int unigramProbability = 0;
+ int bigramCodePoints[MAX_WORD_LENGTH];
+ BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
+ mDictionaryStructureWithBufferPolicy.get());
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+ continue;
+ }
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ && bigramsIt.getProbability() == NOT_A_PROBABILITY) {
+ continue;
+ }
+ const int codePointCount = mDictionaryStructureWithBufferPolicy->
+ getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
+ MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
+ if (codePointCount <= 0) {
+ continue;
+ }
+ const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
+ unigramProbability, bigramsIt.getProbability());
+ outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
+ }
}
int Dictionary::getProbability(const int *word, int length) const {
@@ -81,10 +103,24 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con
mDictionaryStructureWithBufferPolicy.get(), word, length);
}
-int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
+int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
int length) const {
TimeKeeper::setCurrentTime();
- return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
+ int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
+ length, false /* forceLowerCaseSearch */);
+ if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
+ BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
+ mDictionaryStructureWithBufferPolicy.get());
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ return mDictionaryStructureWithBufferPolicy->getProbability(
+ mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode(
+ nextWordPos), bigramsIt.getProbability());
+ }
+ }
+ return NOT_A_PROBABILITY;
}
bool Dictionary::addUnigramEntry(const int *const word, const int length,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 247ee2421..3b41088fe 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,7 +21,6 @@
#include "defines.h"
#include "jni.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@@ -75,7 +74,7 @@ class Dictionary {
int getMaxProbabilityOfExactMatches(const int *word, int length) const;
- int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
+ int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word, int length) const;
bool addUnigramEntry(const int *const codePoints, const int codePointCount,
@@ -119,7 +118,6 @@ class Dictionary {
const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
mDictionaryStructureWithBufferPolicy;
- const BigramDictionary mBigramDictionary;
const SuggestInterfacePtr mGestureSuggest;
const SuggestInterfacePtr mTypingSuggest;
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index 640f6a2fc..e350c6996 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -25,7 +25,6 @@
namespace latinime {
// TODO: Support n-gram.
-// This class does not take ownership of any code point buffers.
class PrevWordsInfo {
public:
// No prev word information.
@@ -33,21 +32,52 @@ class PrevWordsInfo {
clear();
}
+ PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
+ mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from previous words.
+ PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
+ const size_t prevWordCount) {
+ clear();
+ for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) {
+ if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
+ continue;
+ }
+ memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
+ mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
+ mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from a previous word.
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence) {
clear();
- mPrevWordCodePoints[0] = prevWordCodePoints;
+ if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
+ return;
+ }
+ memmove(mPrevWordCodePoints[0], prevWordCodePoints,
+ sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
mPrevWordCodePointCount[0] = prevWordCodePointCount;
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
bool isValid() const {
- for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
- if (mPrevWordCodePointCount[i] > MAX_WORD_LENGTH) {
- return false;
- }
+ if (mPrevWordCodePointCount[0] > 0) {
+ return true;
+ }
+ if (mIsBeginningOfSentence[0]) {
+ return true;
}
- return true;
+ return false;
}
void getPrevWordsTerminalPtNodePos(
@@ -168,13 +198,12 @@ class PrevWordsInfo {
void clear() {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
- mPrevWordCodePoints[i] = nullptr;
mPrevWordCodePointCount[i] = 0;
mIsBeginningOfSentence[i] = false;
}
}
- const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 5c62b9caf..002593c49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -268,6 +268,10 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
int PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
+ // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
+ // probability, the worse the precision. The theoritical maximum error in resulting probability
+ // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
+ // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (bigramProbability == NOT_A_PROBABILITY) {
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index 3514aeeb0..cb82d3c3b 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "jni.h"
+#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "utils/char_utils.h"
@@ -95,6 +96,37 @@ class JniDataUtils {
}
}
+ static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+ jbooleanArray isBeginningOfSentenceArray) {
+ int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+ int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays);
+ for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) {
+ prevWordCodePointCount[i] = 0;
+ isBeginningOfSentence[i] = false;
+ if (prevWordsCount <= static_cast<int>(i)) {
+ continue;
+ }
+ jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
+ if (!prevWord) {
+ continue;
+ }
+ jsize prevWordLength = env->GetArrayLength(prevWord);
+ if (prevWordLength > MAX_WORD_LENGTH) {
+ continue;
+ }
+ env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
+ prevWordCodePointCount[i] = prevWordLength;
+ jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
+ env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
+ &isBeginningOfSentenceBoolean);
+ isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
+ }
+ return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+ MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ }
+
static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
const jboolean value) {
env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);