diff options
Diffstat (limited to 'native')
10 files changed, 162 insertions, 232 deletions
diff --git a/native/jni/HostUnitTests.mk b/native/jni/HostUnitTests.mk index 572d36564..6967d9b87 100644 --- a/native/jni/HostUnitTests.mk +++ b/native/jni/HostUnitTests.mk @@ -44,7 +44,7 @@ LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR) LOCAL_MODULE := liblatinime_host_unittests LOCAL_MODULE_TAGS := tests LOCAL_SRC_FILES := $(addprefix $(LATIN_IME_TEST_SRC_DIR)/, $(LATIN_IME_CORE_TEST_FILES)) -LOCAL_STATIC_LIBRARIES += liblatinime_host_static_for_unittests libgtest_host libgtest_main_host +LOCAL_STATIC_LIBRARIES += liblatinime_host_static_for_unittests include $(BUILD_HOST_NATIVE_TEST) endif # Darwin - TODO: Remove this diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 2dd75c4f5..fe2106140 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -26,7 +26,6 @@ LATIN_IME_CORE_SRC_FILES := \ dic_node_utils.cpp \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ - bigram_dictionary.cpp \ dictionary.cpp \ dictionary_utils.cpp \ digraph_utils.cpp \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 2654a4a0a..22ad2d0ab 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -178,7 +178,7 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions, - jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence, + jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) { @@ -201,20 +201,11 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, int pointerIds[inputSize]; const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray); int inputCodePoints[inputCodePointsLength]; - const jsize prevWordCodePointsLength = - prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0; - int prevWordCodePointsInternal[prevWordCodePointsLength]; - int *prevWordCodePoints = nullptr; env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates); env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates); env->GetIntArrayRegion(timesArray, 0, inputSize, times); env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds); env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints); - if (prevWordCodePointsForBigrams) { - env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength, - prevWordCodePointsInternal); - prevWordCodePoints = prevWordCodePointsInternal; - } const jsize numberOfOptions = env->GetArrayLength(suggestOptions); int options[numberOfOptions]; @@ -248,8 +239,8 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, float languageWeight; env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight); SuggestionResults suggestionResults(MAX_RESULTS); - const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength, - isBeginningOfSentence); + const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, + prevWordCodePointArrays, isBeginningOfSentenceArray); if (givenSuggestOptions.isGesture() || inputSize > 0) { // TODO: Use SuggestionResults to return suggestions. dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, @@ -283,18 +274,17 @@ static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches( return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength); } -static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, - jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) { +static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz, + jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, + jintArray word) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return JNI_FALSE; - const jsize word0Length = env->GetArrayLength(word0); - const jsize word1Length = env->GetArrayLength(word1); - int word0CodePoints[word0Length]; - int word1CodePoints[word1Length]; - env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); - env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence); - return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length); + const jsize wordLength = env->GetArrayLength(word); + int wordCodePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); + const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, + prevWordCodePointArrays, isBeginningOfSentenceArray); + return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength); } // Method to iterate all words in the dictionary for makedict. @@ -355,7 +345,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, outShortcutProbabilities); } -static bool latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, +static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { @@ -378,7 +368,7 @@ static bool latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); } -static bool latinime_BinaryDictionary_removeUnigramWord(JNIEnv *env, jclass clazz, jlong dict, +static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, jintArray word) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { @@ -390,42 +380,39 @@ static bool latinime_BinaryDictionary_removeUnigramWord(JNIEnv *env, jclass claz return dictionary->removeUnigramEntry(codePoints, codePointCount); } -static bool latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability, - jint timestamp) { +static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict, + jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, + jintArray word, jint probability, jint timestamp) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return false; } - jsize word0Length = env->GetArrayLength(word0); - int word0CodePoints[word0Length]; - env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); - jsize word1Length = env->GetArrayLength(word1); - int word1CodePoints[word1Length]; - env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, + prevWordCodePointArrays, isBeginningOfSentenceArray); + jsize wordLength = env->GetArrayLength(word); + int wordCodePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); const std::vector<int> bigramTargetCodePoints( - word1CodePoints, word1CodePoints + word1Length); + wordCodePoints, wordCodePoints + wordLength); // Use 1 for count to indicate the bigram has inputted. const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, timestamp, 0 /* level */, 1 /* count */); - const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence); return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); } -static bool latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jboolean isBeginningOfSentence, jintArray word1) { +static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, + jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, + jintArray word) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return false; } - jsize word0Length = env->GetArrayLength(word0); - int word0CodePoints[word0Length]; - env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); - jsize word1Length = env->GetArrayLength(word1); - int word1CodePoints[word1Length]; - env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence); - return dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length); + const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, + prevWordCodePointArrays, isBeginningOfSentenceArray); + jsize wordLength = env->GetArrayLength(word); + int wordCodePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); + return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength); } // Returns how many language model params are processed. @@ -672,7 +659,7 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("getSuggestionsNative"), - const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"), + const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) }, { @@ -686,9 +673,9 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches) }, { - const_cast<char *>("getBigramProbabilityNative"), - const_cast<char *>("(J[IZ[I)I"), - reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) + const_cast<char *>("getNgramProbabilityNative"), + const_cast<char *>("(J[[I[Z[I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability) }, { const_cast<char *>("getWordPropertyNative"), @@ -702,24 +689,24 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord) }, { - const_cast<char *>("addUnigramWordNative"), + const_cast<char *>("addUnigramEntryNative"), const_cast<char *>("(J[II[IIZZZI)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) + reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry) }, { - const_cast<char *>("removeUnigramWordNative"), + const_cast<char *>("removeUnigramEntryNative"), const_cast<char *>("(J[I)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramWord) + reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry) }, { - const_cast<char *>("addBigramWordsNative"), - const_cast<char *>("(J[IZ[III)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) + const_cast<char *>("addNgramEntryNative"), + const_cast<char *>("(J[[I[Z[III)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry) }, { - const_cast<char *>("removeBigramWordsNative"), - const_cast<char *>("(J[IZ[I)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) + const_cast<char *>("removeNgramEntryNative"), + const_cast<char *>("(J[[I[Z[I)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry) }, { const_cast<char *>("addMultipleDictionaryEntriesNative"), diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp deleted file mode 100644 index 295e760d6..000000000 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2010, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define LOG_TAG "LatinIME: bigram_dictionary.cpp" - -#include "bigram_dictionary.h" - -#include <algorithm> -#include <cstring> - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/dictionary.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/core/result/suggestion_results.h" -#include "suggest/core/session/prev_words_info.h" -#include "utils/char_utils.h" - -namespace latinime { - -BigramDictionary::BigramDictionary( - const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy) - : mDictionaryStructurePolicy(dictionaryStructurePolicy) { - if (DEBUG_DICT) { - AKLOGI("BigramDictionary - constructor"); - } -} - -BigramDictionary::~BigramDictionary() { -} - -/* Parameters : - * prevWordsInfo: Information of previous words to get the predictions. - * outSuggestionResults: SuggestionResults to put the predictions. - */ -void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, - SuggestionResults *const outSuggestionResults) const { - int unigramProbability = 0; - int bigramCodePoints[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt = - prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - const int codePointCount = mDictionaryStructurePolicy-> - getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), - MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability); - if (codePointCount <= 0) { - continue; - } - // Due to space constraints, the probability for bigrams is approximate - the lower the - // unigram probability, the worse the precision. The theoritical maximum error in - // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 - // in very bad cases. This means that sometimes, we'll see some bigrams interverted - // here, but it can't get too bad. - const int probability = mDictionaryStructurePolicy->getProbability( - unigramProbability, bigramsIt.getProbability()); - outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability); - } -} - -// Returns a pointer to the start of the bigram list. -// If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS. -int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, - const bool forceLowerCaseSearch) const { - if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, - forceLowerCaseSearch); - if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; - return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); -} - -int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, - const int *word1, int length1) const { - int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, - false /* forceLowerCaseSearch */); - if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; - BinaryDictionaryBigramsIterator bigramsIt = - prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPos - && bigramsIt.getProbability() != NOT_A_PROBABILITY) { - return mDictionaryStructurePolicy->getProbability( - mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), - bigramsIt.getProbability()); - } - } - return NOT_A_PROBABILITY; -} - -// TODO: Move functions related to bigram to here -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h deleted file mode 100644 index bd3aed1bd..000000000 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_BIGRAM_DICTIONARY_H -#define LATINIME_BIGRAM_DICTIONARY_H - -#include "defines.h" - -namespace latinime { - -class DictionaryStructureWithBufferPolicy; -class PrevWordsInfo; -class SuggestionResults; - -class BigramDictionary { - public: - BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); - - void getPredictions(const PrevWordsInfo *const prevWordsInfo, - SuggestionResults *const outSuggestionResults) const; - int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, - const int *word1, int length1) const; - ~BigramDictionary(); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); - - int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, - const bool forceLowerCaseSearch) const; - - const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy; -}; -} // namespace latinime -#endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 228260216..fb25f757c 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -23,6 +23,7 @@ #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/result/suggestion_results.h" #include "suggest/core/session/dic_traverse_session.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" @@ -37,7 +38,6 @@ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)), - mBigramDictionary(mDictionaryStructureWithBufferPolicy.get()), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { logDictionaryInfo(env); @@ -62,7 +62,29 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults); + int unigramProbability = 0; + int bigramCodePoints[MAX_WORD_LENGTH]; + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + mDictionaryStructureWithBufferPolicy.get()); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { + continue; + } + if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) + && bigramsIt.getProbability() == NOT_A_PROBABILITY) { + continue; + } + const int codePointCount = mDictionaryStructureWithBufferPolicy-> + getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), + MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability); + if (codePointCount <= 0) { + continue; + } + const int probability = mDictionaryStructureWithBufferPolicy->getProbability( + unigramProbability, bigramsIt.getProbability()); + outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability); + } } int Dictionary::getProbability(const int *word, int length) const { @@ -81,10 +103,24 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con mDictionaryStructureWithBufferPolicy.get(), word, length); } -int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, +int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int length) const { TimeKeeper::setCurrentTime(); - return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length); + int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word, + length, false /* forceLowerCaseSearch */); + if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + mDictionaryStructureWithBufferPolicy.get()); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == nextWordPos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + return mDictionaryStructureWithBufferPolicy->getProbability( + mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode( + nextWordPos), bigramsIt.getProbability()); + } + } + return NOT_A_PROBABILITY; } bool Dictionary::addUnigramEntry(const int *const word, const int length, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 247ee2421..3b41088fe 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -21,7 +21,6 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -75,7 +74,7 @@ class Dictionary { int getMaxProbabilityOfExactMatches(const int *word, int length) const; - int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, + int getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int length) const; bool addUnigramEntry(const int *const codePoints, const int codePointCount, @@ -119,7 +118,6 @@ class Dictionary { const DictionaryStructureWithBufferPolicy::StructurePolicyPtr mDictionaryStructureWithBufferPolicy; - const BigramDictionary mBigramDictionary; const SuggestInterfacePtr mGestureSuggest; const SuggestInterfacePtr mTypingSuggest; diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 640f6a2fc..e350c6996 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -25,7 +25,6 @@ namespace latinime { // TODO: Support n-gram. -// This class does not take ownership of any code point buffers. class PrevWordsInfo { public: // No prev word information. @@ -33,21 +32,52 @@ class PrevWordsInfo { clear(); } + PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) { + for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { + mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i]; + memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i], + sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]); + mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i]; + } + } + + // Construct from previous words. + PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH], + const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, + const size_t prevWordCount) { + clear(); + for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) { + if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) { + continue; + } + memmove(mPrevWordCodePoints[i], prevWordCodePoints[i], + sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]); + mPrevWordCodePointCount[i] = prevWordCodePointCount[i]; + mIsBeginningOfSentence[i] = isBeginningOfSentence[i]; + } + } + + // Construct from a previous word. PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount, const bool isBeginningOfSentence) { clear(); - mPrevWordCodePoints[0] = prevWordCodePoints; + if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) { + return; + } + memmove(mPrevWordCodePoints[0], prevWordCodePoints, + sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount); mPrevWordCodePointCount[0] = prevWordCodePointCount; mIsBeginningOfSentence[0] = isBeginningOfSentence; } bool isValid() const { - for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { - if (mPrevWordCodePointCount[i] > MAX_WORD_LENGTH) { - return false; - } + if (mPrevWordCodePointCount[0] > 0) { + return true; + } + if (mIsBeginningOfSentence[0]) { + return true; } - return true; + return false; } void getPrevWordsTerminalPtNodePos( @@ -168,13 +198,12 @@ class PrevWordsInfo { void clear() { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { - mPrevWordCodePoints[i] = nullptr; mPrevWordCodePointCount[i] = 0; mIsBeginningOfSentence[i] = false; } } - const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 5c62b9caf..002593c49 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -268,6 +268,10 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, int PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { + // Due to space constraints, the probability for bigrams is approximate - the lower the unigram + // probability, the worse the precision. The theoritical maximum error in resulting probability + // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means + // that sometimes, we'll see some bigrams interverted here, but it can't get too bad. if (unigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else if (bigramProbability == NOT_A_PROBABILITY) { diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 3514aeeb0..cb82d3c3b 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -21,6 +21,7 @@ #include "defines.h" #include "jni.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "utils/char_utils.h" @@ -95,6 +96,37 @@ class JniDataUtils { } } + static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays, + jbooleanArray isBeginningOfSentenceArray) { + int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; + int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays); + for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) { + prevWordCodePointCount[i] = 0; + isBeginningOfSentence[i] = false; + if (prevWordsCount <= static_cast<int>(i)) { + continue; + } + jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); + if (!prevWord) { + continue; + } + jsize prevWordLength = env->GetArrayLength(prevWord); + if (prevWordLength > MAX_WORD_LENGTH) { + continue; + } + env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); + prevWordCodePointCount[i] = prevWordLength; + jboolean isBeginningOfSentenceBoolean = JNI_FALSE; + env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, + &isBeginningOfSentenceBoolean); + isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; + } + return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, + MAX_PREV_WORD_COUNT_FOR_N_GRAM); + } + static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, const jboolean value) { env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); |