diff options
Diffstat (limited to 'native/jni/src')
8 files changed, 115 insertions, 37 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index a80c97530..24d04e51f 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -336,6 +336,9 @@ static inline void prof_out(void) { #define MAX_POINTER_COUNT 1 #define MAX_POINTER_COUNT_G 2 +// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported. +#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1 + #define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \ TypeName() = delete diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index f793363a8..847fa1b02 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -26,6 +26,7 @@ #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/result/suggestion_results.h" +#include "suggest/core/session/prev_words_info.h" #include "utils/char_utils.h" namespace latinime { @@ -42,19 +43,18 @@ BigramDictionary::~BigramDictionary() { } /* Parameters : - * prevWord: the word before, the one for which we need to look up bigrams. - * prevWordLength: its length. + * prevWordsInfo: Information of previous words to get the predictions. * outSuggestionResults: SuggestionResults to put the predictions. */ -void BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength, +void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { - int pos = getBigramListPositionForWord(prevWord, prevWordLength, - false /* forceLowerCaseSearch */); + int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), + prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) { // If no bigrams for this exact word, search again in lower case. - pos = getBigramListPositionForWord(prevWord, prevWordLength, - true /* forceLowerCaseSearch */); + pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), + prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */); } // If still no bigrams, we really don't have them! if (NOT_A_DICT_POS == pos) return; @@ -96,9 +96,10 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); } -int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1, - int length1) const { - int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); +int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, + const int *word1, int length1) const { + int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), + prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 12aaf20d3..bd3aed1bd 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -22,15 +22,17 @@ namespace latinime { class DictionaryStructureWithBufferPolicy; +class PrevWordsInfo; class SuggestionResults; class BigramDictionary { public: BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); - void getPredictions(const int *word, int length, + void getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const; - int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const; + int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, + const int *word1, int length1) const; ~BigramDictionary(); private: diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index aa52e63c0..c860d82af 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -44,11 +44,11 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, - int inputSize, int *prevWordCodePoints, int prevWordLength, + int inputSize, const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions, const float languageWeight, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - traverseSession->init(this, prevWordCodePoints, prevWordLength, suggestOptions); + traverseSession->init(this, prevWordsInfo, suggestOptions); const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest; suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, @@ -58,11 +58,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } } -void Dictionary::getPredictions(const int *word, int length, +void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - if (length <= 0) return; - mBigramDictionary.getPredictions(word, length, outSuggestionResults); + mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults); } int Dictionary::getProbability(const int *word, int length) const { @@ -75,10 +74,10 @@ int Dictionary::getProbability(const int *word, int length) const { return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } -int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, +int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1, int length1) const { TimeKeeper::setCurrentTime(); - return mBigramDictionary.getBigramProbability(word0, length0, word1, length1); + return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1); } void Dictionary::addUnigramWord(const int *const word, const int length, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index f0a7e5b6a..b63c61fbb 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -31,6 +31,7 @@ namespace latinime { class DictionaryStructureWithBufferPolicy; class DicTraverseSession; +class PrevWordsInfo; class ProximityInfo; class SuggestionResults; class SuggestOptions; @@ -62,16 +63,17 @@ class Dictionary { void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, - int inputSize, int *prevWordCodePoints, int prevWordLength, + int inputSize, const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions, const float languageWeight, SuggestionResults *const outSuggestionResults) const; - void getPredictions(const int *word, int length, + void getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const; int getProbability(const int *word, int length) const; - int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; + int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, + const int *word1, int length1) const; void addUnigramWord(const int *const codePoints, const int codePointCount, const UnigramProperty *const unigramProperty); diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 77b634e07..b9e9db719 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -20,6 +20,7 @@ #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/core/session/prev_words_info.h" namespace latinime { @@ -28,24 +29,26 @@ namespace latinime { const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION = 256 * 1024; -void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, - int prevWordLength, const SuggestOptions *const suggestOptions) { +void DicTraverseSession::init(const Dictionary *const dictionary, + const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; - if (!prevWord) { - mPrevWordPtNodePos = NOT_A_DICT_POS; + if (!prevWordsInfo->getPrevWordCodePoints()) { + mPrevWordsPtNodePos[0] = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( - prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPtNodePos == NOT_A_DICT_POS) { + mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( + prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), + false /* forceLowerCaseSearch */); + if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( - prevWord, prevWordLength, true /* forceLowerCaseSearch */); + mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( + prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), + true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 50bffebac..90aff06c3 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -29,6 +29,7 @@ namespace latinime { class Dictionary; class DictionaryStructureWithBufferPolicy; +class PrevWordsInfo; class ProximityInfo; class SuggestOptions; @@ -49,18 +50,20 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(nullptr), - mDictionary(nullptr), mSuggestOptions(nullptr), mDicNodesCache(usesLargeCache), - mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1), + : mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr), + mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { // NOTE: mProximityInfoStates is an array of instances. // No need to initialize it explicitly here. + for (size_t i = 0; i < NELEMS(mPrevWordsPtNodePos); ++i) { + mPrevWordsPtNodePos[i] = NOT_A_DICT_POS; + } } // Non virtual inline destructor -- never inherit this class AK_FORCE_INLINE ~DicTraverseSession() {} - void init(const Dictionary *dictionary, const int *prevWord, int prevWordLength, + void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions); // TODO: Remove and merge into init void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints, @@ -76,7 +79,7 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; } + int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -163,7 +166,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordPtNodePos; + int mPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h new file mode 100644 index 000000000..bc685945e --- /dev/null +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PREV_WORDS_INFO_H +#define LATINIME_PREV_WORDS_INFO_H + +#include "defines.h" + +namespace latinime { + +// TODO: Support n-gram. +// TODO: Support beginning of sentence. +// This class does not take ownership of any code point buffers. +class PrevWordsInfo { + public: + // No prev word information. + PrevWordsInfo() { + clear(); + } + + PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount, + const bool isBeginningOfSentence) { + clear(); + mPrevWordCodePoints[0] = prevWordCodePoints; + mPrevWordCodePointCount[0] = prevWordCodePointCount; + mIsBeginningOfSentence[0] = isBeginningOfSentence; + } + const int *getPrevWordCodePoints() const { + return mPrevWordCodePoints[0]; + } + + int getPrevWordCodePointCount() const { + return mPrevWordCodePointCount[0]; + } + + private: + DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); + + void clear() { + for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { + mPrevWordCodePoints[i] = nullptr; + mPrevWordCodePointCount[i] = 0; + mIsBeginningOfSentence[i] = false; + } + } + + const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; +}; +} // namespace latinime +#endif // LATINIME_PREV_WORDS_INFO_H |