diff options
Diffstat (limited to 'native/jni/src')
9 files changed, 72 insertions, 2 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index f88388c75..0bcde2294 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -99,6 +99,11 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length, return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty); } +bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) { + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount); +} + bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { TimeKeeper::setCurrentTime(); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 10010b21c..542ba7291 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -81,6 +81,8 @@ class Dictionary { bool addUnigramEntry(const int *const codePoints, const int codePointCount, const UnigramProperty *const unigramProperty); + bool removeUnigramEntry(const int *const codePoints, const int codePointCount); + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index cda89406c..e2771f97c 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -74,6 +74,9 @@ class DictionaryStructureWithBufferPolicy { const UnigramProperty *const unigramProperty) = 0; // Returns whether the update was success or not. + virtual bool removeUnigramEntry(const int *const word, const int length) = 0; + + // Returns whether the update was success or not. virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) = 0; diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 56c53c1c2..640f6a2fc 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -85,6 +85,14 @@ class PrevWordsInfo { return mPrevWordCodePointCount[n - 1]; } + // n is 1-indexed. + bool isNthPrevWordBeginningOfSentence(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return false; + } + return mIsBeginningOfSentence[n - 1]; + } + private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index e571d8986..805820b05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -256,7 +256,24 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI false /* tryLowerCaseSearch */); // TODO: Support N-gram. if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { - return false; + if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { + const std::vector<UnigramProperty::ShortcutProperty> shortcuts; + const UnigramProperty beginningOfSentenceUnigramProperty( + true /* representsBeginningOfSentence */, true /* isNotAWord */, + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), + prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), + &beginningOfSentenceUnigramProperty)) { + AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); + return false; + } + // Refresh Terminal PtNode positions. + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + } else { + return false; + } } const int word1Pos = getTerminalPtNodePositionOfWord( bigramProperty->getTargetCodePoints()->data(), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index e323652d4..2e948ac4a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -111,6 +111,11 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty); + bool removeUnigramEntry(const int *const word, const int length) { + // Removing unigram entry is not supported. + return false; + } + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 88bbfd966..dce94363a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -88,6 +88,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } + bool removeUnigramEntry(const int *const word, const int length) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary."); + return false; + } + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { // This method should not be called for non-updatable dictionary. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 09c7b7d85..aec3b8ea3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -221,6 +221,11 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le } } +bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) { + // TODO: Implement. + return false; +} + bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { if (!mBuffers->isUpdatable()) { @@ -246,7 +251,24 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI false /* tryLowerCaseSearch */); // TODO: Support N-gram. if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { - return false; + if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { + const std::vector<UnigramProperty::ShortcutProperty> shortcuts; + const UnigramProperty beginningOfSentenceUnigramProperty( + true /* representsBeginningOfSentence */, true /* isNotAWord */, + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), + prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), + &beginningOfSentenceUnigramProperty)) { + AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); + return false; + } + // Refresh Terminal PtNode positions. + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + } else { + return false; + } } const int word1Pos = getTerminalPtNodePositionOfWord( bigramProperty->getTargetCodePoints()->data(), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index d198c97fd..0a20965f3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -93,6 +93,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty); + bool removeUnigramEntry(const int *const word, const int length); + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); |