aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-10-21 10:28:25 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-10-21 10:28:25 +0000
commitdfc82fa3662e3e7a157f20db974b3cef402a9dd0 (patch)
treec70e105c8a34b848e4c610ed6f4f0843862fb7c7
parent2105aa3ea64ddf9cb0428d2bc717ff6fce12733e (diff)
parent88bb28c132d87f15a52e9a0b8a45950f39eb19ad (diff)
downloadlatinime-dfc82fa3662e3e7a157f20db974b3cef402a9dd0.tar.gz
latinime-dfc82fa3662e3e7a157f20db974b3cef402a9dd0.tar.xz
latinime-dfc82fa3662e3e7a157f20db974b3cef402a9dd0.zip
Merge changes I210acb81,Ie9508788
* changes: Make NgramProperty have NgramContext. Create .cpp file for NgramContext.
-rw-r--r--native/jni/NativeFileList.mk3
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp18
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp5
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h3
-rw-r--r--native/jni/src/suggest/core/dictionary/property/ngram_property.h14
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.h4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h5
-rw-r--r--native/jni/src/suggest/core/session/ngram_context.cpp123
-rw-r--r--native/jni/src/suggest/core/session/ngram_context.h121
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h3
15 files changed, 188 insertions, 150 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index b896f386f..7299ed3c0 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -40,6 +40,7 @@ LATIN_IME_CORE_SRC_FILES := \
proximity_info_state_utils.cpp) \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
+ suggest/core/session/ngram_context.cpp \
$(addprefix suggest/core/result/, \
suggestion_results.cpp \
suggestions_output_utils.cpp) \
@@ -55,7 +56,7 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_pt_updating_helper.cpp \
dynamic_pt_writing_utils.cpp \
patricia_trie_reading_utils.cpp \
- shortcut/shortcut_list_reading_utils.cpp ) \
+ shortcut/shortcut_list_reading_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \
ver2_patricia_trie_node_reader.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index b01acead7..e72226cec 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -409,9 +409,10 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
// Use 1 for count to indicate the ngram has inputted.
- const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
+ const NgramProperty ngramProperty(ngramContext,
+ CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
- return dictionary->addNgramEntry(&ngramContext, &ngramProperty);
+ return dictionary->addNgramEntry(&ngramProperty);
}
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@@ -527,12 +528,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
// Use 1 for count to indicate the bigram has inputted.
- const NgramProperty ngramProperty(
- CodePointArrayView(word1CodePoints, word1Length).toVector(),
- bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
const NgramContext ngramContext(word0CodePoints, word0Length,
false /* isBeginningOfSentence */);
- dictionary->addNgramEntry(&ngramContext, &ngramProperty);
+ const NgramProperty ngramProperty(ngramContext,
+ CodePointArrayView(word1CodePoints, word1Length).toVector(),
+ bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
+ dictionary->addNgramEntry(&ngramProperty);
}
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;
@@ -642,11 +643,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- const NgramContext ngramContext(wordCodePoints, wordCodePointCount,
- wordProperty.getUnigramProperty()->representsBeginningOfSentence());
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
- if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext,
- &ngramProperty)) {
+ if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false;
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 697e99ffb..bfe17cc4c 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -140,10 +140,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
}
-bool Dictionary::addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty) {
+bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) {
TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty);
+ return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty);
}
bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 843aec473..a5e986d15 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -85,8 +85,7 @@ class Dictionary {
bool removeUnigramEntry(const CodePointArrayView codePoints);
- bool addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty);
+ bool addNgramEntry(const NgramProperty *const ngramProperty);
bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView codePoints);
diff --git a/native/jni/src/suggest/core/dictionary/property/ngram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
index 8709799f9..e67b4da31 100644
--- a/native/jni/src/suggest/core/dictionary/property/ngram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
@@ -21,15 +21,20 @@
#include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
+#include "suggest/core/session/ngram_context.h"
namespace latinime {
class NgramProperty {
public:
- NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
- const HistoricalInfo historicalInfo)
- : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
- mHistoricalInfo(historicalInfo) {}
+ NgramProperty(const NgramContext &ngramContext, const std::vector<int> &&targetCodePoints,
+ const int probability, const HistoricalInfo historicalInfo)
+ : mNgramContext(ngramContext), mTargetCodePoints(std::move(targetCodePoints)),
+ mProbability(probability), mHistoricalInfo(historicalInfo) {}
+
+ const NgramContext *getNgramContext() const {
+ return &mNgramContext;
+ }
const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints;
@@ -48,6 +53,7 @@ class NgramProperty {
DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty);
DISALLOW_ASSIGNMENT_OPERATOR(NgramProperty);
+ const NgramContext mNgramContext;
const std::vector<int> mTargetCodePoints;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h
index 0c23e8225..01b8987b5 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.h
@@ -34,9 +34,9 @@ class WordProperty {
: mCodePoints(), mUnigramProperty(), mNgrams() {}
WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
- const std::vector<NgramProperty> *const bigrams)
+ const std::vector<NgramProperty> *const ngrams)
: mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
- mNgrams(*bigrams) {}
+ mNgrams(*ngrams) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index ceda5c03f..33a0fbc19 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -40,7 +40,6 @@ class UnigramProperty;
* This class abstracts the structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
-// TODO: Use word id instead of terminal PtNode position.
class DictionaryStructureWithBufferPolicy {
public:
typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
@@ -81,8 +80,7 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not.
- virtual bool addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty) = 0;
+ virtual bool addNgramEntry(const NgramProperty *const ngramProperty) = 0;
// Returns whether the update was success or not.
virtual bool removeNgramEntry(const NgramContext *const ngramContext,
@@ -106,7 +104,6 @@ class DictionaryStructureWithBufferPolicy {
virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) = 0;
- // Used for testing.
virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0;
// Method to iterate all words in the dictionary.
diff --git a/native/jni/src/suggest/core/session/ngram_context.cpp b/native/jni/src/suggest/core/session/ngram_context.cpp
new file mode 100644
index 000000000..17ef9ae60
--- /dev/null
+++ b/native/jni/src/suggest/core/session/ngram_context.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/session/ngram_context.h"
+
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
+
+namespace latinime {
+
+NgramContext::NgramContext() : mPrevWordCount(0) {}
+
+NgramContext::NgramContext(const NgramContext &ngramContext)
+ : mPrevWordCount(ngramContext.mPrevWordCount) {
+ for (size_t i = 0; i < mPrevWordCount; ++i) {
+ mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
+ mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
+ }
+}
+
+NgramContext::NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
+ const size_t prevWordCount)
+ : mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
+ clear();
+ for (size_t i = 0; i < mPrevWordCount; ++i) {
+ if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
+ continue;
+ }
+ memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
+ mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
+ mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
+ }
+}
+
+NgramContext::NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
+ const bool isBeginningOfSentence) : mPrevWordCount(1) {
+ clear();
+ if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
+ return;
+ }
+ memmove(mPrevWordCodePoints[0], prevWordCodePoints,
+ sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
+ mPrevWordCodePointCount[0] = prevWordCodePointCount;
+ mIsBeginningOfSentence[0] = isBeginningOfSentence;
+}
+
+bool NgramContext::isValid() const {
+ if (mPrevWordCodePointCount[0] > 0) {
+ return true;
+ }
+ if (mIsBeginningOfSentence[0]) {
+ return true;
+ }
+ return false;
+}
+
+const CodePointArrayView NgramContext::getNthPrevWordCodePoints(const size_t n) const {
+ if (n <= 0 || n > mPrevWordCount) {
+ return CodePointArrayView();
+ }
+ return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]);
+}
+
+bool NgramContext::isNthPrevWordBeginningOfSentence(const size_t n) const {
+ if (n <= 0 || n > mPrevWordCount) {
+ return false;
+ }
+ return mIsBeginningOfSentence[n - 1];
+}
+
+/* static */ int NgramContext::getWordId(
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
+ const int *const wordCodePoints, const int wordCodePointCount,
+ const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
+ return NOT_A_WORD_ID;
+ }
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, codePointCount,
+ MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_WORD_ID;
+ }
+ }
+ const CodePointArrayView codePointArrayView(codePoints, codePointCount);
+ const int wordId = dictStructurePolicy->getWordId(codePointArrayView,
+ false /* forceLowerCaseSearch */);
+ if (wordId != NOT_A_WORD_ID || !tryLowerCaseSearch) {
+ // Return the id when when the word was found or doesn't try lower case search.
+ return wordId;
+ }
+ // Check bigrams for lower-cased previous word if original was not found. Useful for
+ // auto-capitalized words like "The [current_word]".
+ return dictStructurePolicy->getWordId(codePointArrayView, true /* forceLowerCaseSearch */);
+}
+
+void NgramContext::clear() {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = 0;
+ mIsBeginningOfSentence[i] = false;
+ }
+}
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/session/ngram_context.h b/native/jni/src/suggest/core/session/ngram_context.h
index 64c71410f..9b36199c9 100644
--- a/native/jni/src/suggest/core/session/ngram_context.h
+++ b/native/jni/src/suggest/core/session/ngram_context.h
@@ -20,145 +20,54 @@
#include <array>
#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/char_utils.h"
#include "utils/int_array_view.h"
namespace latinime {
-// Rename to NgramContext.
+class DictionaryStructureWithBufferPolicy;
+
class NgramContext {
public:
// No prev word information.
- NgramContext() : mPrevWordCount(0) {
- clear();
- }
-
- NgramContext(const NgramContext &ngramContext)
- : mPrevWordCount(ngramContext.mPrevWordCount) {
- for (size_t i = 0; i < mPrevWordCount; ++i) {
- mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
- memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
- sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
- mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
- }
- }
-
+ NgramContext();
+ // Copy constructor to use this class with std::vector and use this class as a return value.
+ NgramContext(const NgramContext &ngramContext);
// Construct from previous words.
NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
- const size_t prevWordCount)
- : mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
- clear();
- for (size_t i = 0; i < mPrevWordCount; ++i) {
- if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
- continue;
- }
- memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
- sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
- mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
- mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
- }
- }
-
+ const size_t prevWordCount);
// Construct from a previous word.
NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
- const bool isBeginningOfSentence) : mPrevWordCount(1) {
- clear();
- if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
- return;
- }
- memmove(mPrevWordCodePoints[0], prevWordCodePoints,
- sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
- mPrevWordCodePointCount[0] = prevWordCodePointCount;
- mIsBeginningOfSentence[0] = isBeginningOfSentence;
- }
+ const bool isBeginningOfSentence);
size_t getPrevWordCount() const {
return mPrevWordCount;
}
-
- // TODO: Remove.
- const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const {
- return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
- std::min(mPrevWordCount, maxPrevWordCount));
- }
-
- bool isValid() const {
- if (mPrevWordCodePointCount[0] > 0) {
- return true;
- }
- if (mIsBeginningOfSentence[0]) {
- return true;
- }
- return false;
- }
+ bool isValid() const;
template<size_t N>
const WordIdArrayView getPrevWordIds(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
- std::array<int, N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
+ WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
- prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy,
- mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
- mIsBeginningOfSentence[i], tryLowerCaseSearch);
+ prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
+ mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
}
return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
}
// n is 1-indexed.
- const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const {
- if (n <= 0 || n > mPrevWordCount) {
- return CodePointArrayView();
- }
- return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]);
- }
-
+ const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
// n is 1-indexed.
- bool isNthPrevWordBeginningOfSentence(const size_t n) const {
- if (n <= 0 || n > mPrevWordCount) {
- return false;
- }
- return mIsBeginningOfSentence[n - 1];
- }
+ bool isNthPrevWordBeginningOfSentence(const size_t n) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
- const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
- if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
- return NOT_A_WORD_ID;
- }
- int codePoints[MAX_WORD_LENGTH];
- int codePointCount = wordCodePointCount;
- memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
- if (isBeginningOfSentence) {
- codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
- codePointCount, MAX_WORD_LENGTH);
- if (codePointCount <= 0) {
- return NOT_A_WORD_ID;
- }
- }
- const CodePointArrayView codePointArrayView(codePoints, codePointCount);
- const int wordId = dictStructurePolicy->getWordId(
- codePointArrayView, false /* forceLowerCaseSearch */);
- if (wordId != NOT_A_WORD_ID || !tryLowerCaseSearch) {
- // Return the id when when the word was found or doesn't try lower case search.
- return wordId;
- }
- // Check bigrams for lower-cased previous word if original was not found. Useful for
- // auto-capitalized words like "The [current_word]".
- return dictStructurePolicy->getWordId(codePointArrayView, true /* forceLowerCaseSearch */);
- }
-
- void clear() {
- for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
- mPrevWordCodePointCount[i] = 0;
- mIsBeginningOfSentence[i] = false;
- }
- }
+ const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
+ void clear();
const size_t mPrevWordCount;
int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 29f9ba37f..66cb0513b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -344,8 +344,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
}
-bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty) {
+bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
@@ -355,6 +354,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
mDictBuffer->getTailPosition());
return false;
}
+ const NgramContext *const ngramContext = ngramProperty->getNgramContext();
if (!ngramContext->isValid()) {
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false;
@@ -463,9 +463,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
}
const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
? NOT_A_PROBABILITY : probability;
- const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
+ const NgramProperty ngramProperty(*ngramContext, wordCodePoints.toVector(), probabilityForNgram,
historicalInfo);
- if (!addNgramEntry(ngramContext, &ngramProperty)) {
+ if (!addNgramEntry(&ngramProperty)) {
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false;
}
@@ -585,6 +585,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
bigramEntry.getProbability();
ngrams.emplace_back(
+ NgramContext(wordCodePoints.data(), wordCodePoints.size(),
+ ptNodeParams.representsBeginningOfSentence()),
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, *historicalInfo);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 2cda0d3fa..0480876ed 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -113,8 +113,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
- bool addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty);
+ bool addNgramEntry(const NgramProperty *const ngramProperty);
bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index ca44da9fb..66fd18a52 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -451,6 +451,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
bigramWord1CodePoints, &word1Probability);
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
ngrams.emplace_back(
+ NgramContext(wordCodePoints.data(), wordCodePoints.size(),
+ ptNodeParams.representsBeginningOfSentence()),
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
probability, HistoricalInfo());
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index b17681388..8933962ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -93,8 +93,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- bool addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty) {
+ bool addNgramEntry(const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index d90066cec..1669375e2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -264,8 +264,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return true;
}
-bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty) {
+bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
@@ -275,6 +274,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
mDictBuffer->getTailPosition());
return false;
}
+ const NgramContext *const ngramContext = ngramProperty->getNgramContext();
if (!ngramContext->isValid()) {
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false;
@@ -453,7 +453,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy->isDecayingDict()) {
- return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy);
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
+ mHeaderPolicy);
}
return false;
}
@@ -503,12 +504,16 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
prevWordIds)) {
const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getWordId(),
MAX_WORD_LENGTH, bigramWord1CodePoints);
- const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
- const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
- const int probability = probabilityEntry.hasHistoricalInfo() ?
+ const ProbabilityEntry ngramProbabilityEntry = entry.getProbabilityEntry();
+ const HistoricalInfo *const historicalInfo = ngramProbabilityEntry.getHistoricalInfo();
+ const int probability = ngramProbabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
- probabilityEntry.getProbability();
- ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
+ ngramProbabilityEntry.getProbability();
+ ngrams.emplace_back(
+ NgramContext(
+ wordCodePoints.data(), wordCodePoints.size(),
+ probabilityEntry.representsBeginningOfSentence()),
+ CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, *historicalInfo);
}
// Fetch shortcut information.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index e3611cb32..13700b390 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -92,8 +92,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
- bool addNgramEntry(const NgramContext *const ngramContext,
- const NgramProperty *const ngramProperty);
+ bool addNgramEntry(const NgramProperty *const ngramProperty);
bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints);