diff options
author | 2014-06-12 12:26:18 +0900 | |
---|---|---|
committer | 2014-06-12 12:26:18 +0900 | |
commit | 22931cd94155b5623b9fa52c0596a44aa89bf606 (patch) | |
tree | db5ddf2ca681e5f3ec5e077a8f3e197d1c39f506 /native/jni/src | |
parent | d979d416c102c45773f4684473953f6fed8f983f (diff) | |
download | latinime-22931cd94155b5623b9fa52c0596a44aa89bf606.tar.gz latinime-22931cd94155b5623b9fa52c0596a44aa89bf606.tar.xz latinime-22931cd94155b5623b9fa52c0596a44aa89bf606.zip |
Enable Beginning-of-Sentence prediction for contextual dict.
Bug: 14161647
Bug: 14119293
Change-Id: I0c00f13966db88e4de85e245e7bced43c9d474b2
Diffstat (limited to 'native/jni/src')
-rw-r--r-- | native/jni/src/suggest/core/session/prev_words_info.h | 8 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp | 19 |
2 files changed, 26 insertions, 1 deletions
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 56c53c1c2..640f6a2fc 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -85,6 +85,14 @@ class PrevWordsInfo { return mPrevWordCodePointCount[n - 1]; } + // n is 1-indexed. + bool isNthPrevWordBeginningOfSentence(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return false; + } + return mIsBeginningOfSentence[n - 1]; + } + private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 09c7b7d85..1e10f24c5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -246,7 +246,24 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI false /* tryLowerCaseSearch */); // TODO: Support N-gram. if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { - return false; + if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { + const std::vector<UnigramProperty::ShortcutProperty> shortcuts; + const UnigramProperty beginningOfSentenceUnigramProperty( + true /* representsBeginningOfSentence */, true /* isNotAWord */, + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), + prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */), + &beginningOfSentenceUnigramProperty)) { + AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); + return false; + } + // Refresh Terminal PtNode positions. + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + } else { + return false; + } } const int word1Pos = getTerminalPtNodePositionOfWord( bigramProperty->getTargetCodePoints()->data(), |