aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp75
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h17
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h31
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp12
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp17
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h8
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h6
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h4
-rw-r--r--native/jni/src/suggest/core/suggest.cpp36
-rw-r--r--native/jni/src/suggest/core/suggest.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h4
22 files changed, 295 insertions, 71 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6a366121d..7f47493b2 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -46,8 +46,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
sourceDirChars[sourceDirUtf8Length] = '\0';
DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
- sourceDirChars, static_cast<int>(sourceDirUtf8Length),
- static_cast<int>(dictOffset), static_cast<int>(dictSize),
+ sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE);
if (!dictionaryStructureWithBufferPolicy) {
return 0;
@@ -59,6 +58,35 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
return reinterpret_cast<jlong>(dictionary);
}
+static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
+ jstring filePath) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
+ char filePathChars[filePathUtf8Length + 1];
+ env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
+ filePathChars[filePathUtf8Length] = '\0';
+ dictionary->flush(filePathChars);
+}
+
+static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
+ jlong dict) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return false;
+ return dictionary->needsToRunGC();
+}
+
+static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
+ jstring filePath) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
+ char filePathChars[filePathUtf8Length + 1];
+ env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
+ filePathChars[filePathUtf8Length] = '\0';
+ dictionary->flushWithGC(filePathChars);
+}
+
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
@@ -160,8 +188,8 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
return dictionary->getProbability(codePoints, wordLength);
}
-static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jintArray word1) {
+static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
+ jlong dict, jintArray word0, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0);
@@ -170,7 +198,8 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass claz
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length);
+ return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints,
+ word1Length);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
@@ -241,6 +270,16 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length);
}
+static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
+ jlong dict, jint unigramProbability, jint bigramProbability) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return NOT_A_PROBABILITY;
+ }
+ return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
+ bigramProbability);
+}
+
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("openNative"),
@@ -253,6 +292,21 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
+ const_cast<char *>("flushNative"),
+ const_cast<char *>("(JLjava/lang/String;)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
+ },
+ {
+ const_cast<char *>("needsToRunGCNative"),
+ const_cast<char *>("(J)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
+ },
+ {
+ const_cast<char *>("flushWithGCNative"),
+ const_cast<char *>("(JLjava/lang/String;)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
+ },
+ {
const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
@@ -263,9 +317,9 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
},
{
- const_cast<char *>("isValidBigramNative"),
- const_cast<char *>("(J[I[I)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)
+ const_cast<char *>("getBigramProbabilityNative"),
+ const_cast<char *>("(J[I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
const_cast<char *>("calcNormalizedScoreNative"),
@@ -291,6 +345,11 @@ static const JNINativeMethod sMethods[] = {
const_cast<char *>("removeBigramWordsNative"),
const_cast<char *>("(J[I[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
+ },
+ {
+ const_cast<char *>("calculateProbabilityNative"),
+ const_cast<char *>("(JII)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
}
};
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 377015371..41ef9d2b2 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -143,7 +143,7 @@ class DicNode {
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
dicNode->mDicNodeProperties.getDepth(),
- dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevSpacePositions,
+ dicNode->mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
@@ -321,8 +321,13 @@ class DicNode {
DUMP_WORD_AND_SCORE("OUTPUT");
}
- void outputSpacePositionsResult(int *spaceIndices) const {
- mDicNodeState.mDicNodeStatePrevWord.outputSpacePositions(spaceIndices);
+ int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
+ const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
+ if (inputIndex == NOT_AN_INDEX) {
+ return NOT_AN_INDEX;
+ } else {
+ return pInfoState->getInputIndexOfSampledPoint(inputIndex);
+ }
}
bool hasMultipleWords() const {
@@ -573,7 +578,11 @@ class DicNode {
}
}
- AK_FORCE_INLINE void updateInputIndexG(DicNode_InputStateG *inputStateG) {
+ AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
+ if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) {
+ mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex(
+ inputStateG->mInputIndex);
+ }
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
inputStateG->mInputIndex, inputStateG->mPrevCodePoint,
inputStateG->mTerminalDiffCost, inputStateG->mRawLength);
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index b7af97018..b8986203d 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/layout/proximity_info_state.h"
namespace latinime {
@@ -29,9 +30,8 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordNodePos(NOT_A_DICT_POS) {
+ mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
- memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
}
virtual ~DicNodeStatePrevWord() {}
@@ -42,7 +42,7 @@ class DicNodeStatePrevWord {
mPrevWordStart = 0;
mPrevWordProbability = -1;
mPrevWordNodePos = NOT_A_DICT_POS;
- memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
+ mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
void init(const int prevWordNodePos) {
@@ -51,7 +51,7 @@ class DicNodeStatePrevWord {
mPrevWordStart = 0;
mPrevWordProbability = -1;
mPrevWordNodePos = prevWordNodePos;
- memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
+ mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
// Init by copy
@@ -61,14 +61,14 @@ class DicNodeStatePrevWord {
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
mPrevWordNodePos = prevWord->mPrevWordNodePos;
+ mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
- memcpy(mPrevSpacePositions, prevWord->mPrevSpacePositions, sizeof(mPrevSpacePositions));
}
void init(const int16_t prevWordCount, const int16_t prevWordProbability,
const int prevWordNodePos, const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, const int *const prevSpacePositions,
- const int lastInputIndex) {
+ const int *const src1, const int16_t length1,
+ const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
mPrevWordNodePos = prevWordNodePos;
@@ -80,8 +80,7 @@ class DicNodeStatePrevWord {
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
mPrevWordStart = length0;
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
- memcpy(mPrevSpacePositions, prevSpacePositions, sizeof(mPrevSpacePositions));
- mPrevSpacePositions[mPrevWordCount - 1] = lastInputIndex;
+ mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex;
}
void truncate(const int offset) {
@@ -96,11 +95,12 @@ class DicNodeStatePrevWord {
mPrevWordLength = newPrevWordLength;
}
- void outputSpacePositions(int *spaceIndices) const {
- // Convert uint16_t to int
- for (int i = 0; i < MAX_RESULTS; i++) {
- spaceIndices[i] = mPrevSpacePositions[i];
- }
+ void setSecondWordFirstInputIndex(const int inputIndex) {
+ mSecondWordFirstInputIndex = inputIndex;
+ }
+
+ int getSecondWordFirstInputIndex() const {
+ return mSecondWordFirstInputIndex;
}
// TODO: remove
@@ -138,8 +138,6 @@ class DicNodeStatePrevWord {
// TODO: Move to private
int mPrevWord[MAX_WORD_LENGTH];
- // TODO: Move to private
- int mPrevSpacePositions[MAX_RESULTS];
private:
// Caution!!!
@@ -150,6 +148,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
int mPrevWordNodePos;
+ int mSecondWordFirstInputIndex;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 425b07624..5ba71c168 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -150,24 +150,26 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
}
-bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
+int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
int length1) const {
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
- if (NOT_A_DICT_POS == pos) return false;
+ if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
- if (NOT_A_DICT_POS == nextWordPos) return false;
+ if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
BinaryDictionaryBigramsIterator bigramsIt(
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) {
- return true;
+ return mDictionaryStructurePolicy->getProbability(
+ mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
+ bigramsIt.getProbability());
}
}
- return false;
+ return NOT_A_PROBABILITY;
}
// TODO: Move functions related to bigram to here
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 99b964c49..8af7ee75d 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -29,7 +29,7 @@ class BigramDictionary {
int getPredictions(const int *word, int length, int *outBigramCodePoints,
int *outBigramProbability, int *outputTypes) const;
- bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
+ int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
private:
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 29fe7ab94..ec1b63a12 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -93,8 +93,9 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}
-bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
- return mBigramDictionary->isValidBigram(word0, length0, word1, length1);
+int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
+ int length1) const {
+ return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
@@ -112,6 +113,18 @@ void Dictionary::removeBigramWords(const int *const word0, const int length0,
mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
}
+void Dictionary::flush(const char *const filePath) {
+ mDictionaryStructureWithBufferPolicy->flush(filePath);
+}
+
+void Dictionary::flushWithGC(const char *const filePath) {
+ mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+}
+
+bool Dictionary::needsToRunGC() {
+ return mDictionaryStructureWithBufferPolicy->needsToRunGC();
+}
+
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
const int BUFFER_SIZE = 16;
int dictionaryIdCodePointBuffer[BUFFER_SIZE];
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 0afe5a54b..974447468 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -67,7 +67,7 @@ class Dictionary {
int getProbability(const int *word, int length) const;
- bool isValidBigram(const int *word0, int length0, const int *word1, int length1) const;
+ int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability);
@@ -77,6 +77,12 @@ class Dictionary {
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC();
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy;
}
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index 01bf81864..c94060fa9 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -130,6 +130,10 @@ class ProximityInfoState {
return mSampledInputYs[index];
}
+ int getInputIndexOfSampledPoint(const int sampledIndex) const {
+ return mSampledInputIndice[sampledIndex];
+ }
+
bool hasSpaceProximity(const int index) const;
int getLengthCache(const int index) const {
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index c8cbbcfdf..24d1b8ba1 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -74,6 +74,12 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) = 0;
+ virtual void flush(const char *const filePath) = 0;
+
+ virtual void flushWithGC(const char *const filePath) = 0;
+
+ virtual bool needsToRunGC() const = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index e2ef5fc76..e0b1c67d9 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -113,7 +113,9 @@ class DicTraverseSession {
if (usedPointerCount != 1) {
return false;
}
- *pointerId = usedPointerId;
+ if (pointerId) {
+ *pointerId = usedPointerId;
+ }
return true;
}
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index e788e914a..0c925be25 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -117,7 +117,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
* Outputs the final list of suggestions (i.e., terminal nodes).
*/
int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *spaceIndices, int *outputTypes) const {
+ int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const {
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
const int terminalSize = 0;
#else
@@ -139,6 +139,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
&outputCodePoints[0], &outputTypes[0], &frequencies[0]);
if (hasMostProbableString) {
+ outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
++outputWordIndex;
}
@@ -160,6 +161,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|| (traverseSession->getInputSize()
>= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
&& terminals[0].hasMultipleWords())) : false;
+ // TODO: have partial commit work even with multiple pointers.
+ const bool outputSecondWordFirstLetterInputIndex =
+ traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
++terminalIndex) {
@@ -194,18 +198,21 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
terminalDicNode->isExactMatch()
|| (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|| (isValidWord && SCORING->doesAutoCorrectValidWord()));
- maxScore = max(maxScore, finalScore);
-
- // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
- // Index for top typing suggestion should be 0.
- if (isValidWord && outputWordIndex == 0) {
- terminalDicNode->outputSpacePositionsResult(spaceIndices);
+ if (maxScore < finalScore && isValidWord) {
+ maxScore = finalScore;
}
// Don't output invalid words. However, we still need to submit their shortcuts if any.
if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore;
+ if (outputSecondWordFirstLetterInputIndex) {
+ outputIndicesToPartialCommit[outputWordIndex] =
+ terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0));
+ } else {
+ outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
+ }
// Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
@@ -220,8 +227,19 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex,
- finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
+ const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
+ outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes,
+ sameAsTyped);
+ const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0));
+ for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
+ if (outputSecondWordFirstLetterInputIndex) {
+ outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
+ } else {
+ outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
+ }
+ }
+ outputWordIndex = updatedOutputWordIndex;
}
DicNode::managedDelete(terminalDicNode);
}
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 875cbe4e0..b24019632 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -55,7 +55,7 @@ class Suggest : public SuggestInterface {
void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
const bool spaceSubstitution) const;
int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *outputIndices, int *outputTypes) const;
+ int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const;
void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index b8a5f27e9..4c44d22fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -54,11 +54,13 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
}
}
-bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
+bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
+ int *outBigramsCount) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
+ *outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags flags;
do {
// The buffer address can be changed after calling buffer writing methods.
@@ -91,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
toPos)) {
return false;
}
+ (*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(flags));
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index c45e26acc..dafb62d80 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -45,8 +45,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
- // positions after bigram lists. This method skips invalid bigram entries.
- bool copyAllBigrams(int *const fromPos, int *const toPos);
+ // positions after bigram lists. This method skips invalid bigram entries and write the valid
+ // bigram entry count to outBigramsCount.
+ bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
index 434858d67..ff80dd2f6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -27,12 +27,12 @@
namespace latinime {
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path, const int pathLength,
- const int bufOffset, const int size, const bool isUpdatable) {
+ ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable) {
// Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
// impl classes of DictionaryStructureWithBufferPolicy.
- const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, pathLength, bufOffset,
- size, isUpdatable);
+ const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable);
if (!mmapedBuffer) {
return 0;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
index 1cb7a89c4..8cebc3b16 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
@@ -27,8 +27,7 @@ namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
- const char *const path, const int pathLength, const int bufOffset, const int size,
- const bool isUpdatable);
+ const char *const path, const int bufOffset, const int size, const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index cccc09041..2198a13c9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -243,4 +243,31 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
return writingHelper.removeBigramWords(word0Pos, word1Pos);
}
+void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFile(filePath, mBuffer->getBuffer(), mHeaderPolicy.getSize());
+}
+
+void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return;
+ }
+ // TODO: Implement.
+}
+
+bool DynamicPatriciaTriePolicy::needsToRunGC() const {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ // TODO: Implement.
+ return false;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 50c724012..2cbb0ff3b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -85,6 +85,12 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC() const;
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index b80b9b33f..a67c0d94a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -16,6 +16,9 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include <cstdio>
+#include <cstring>
+
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
@@ -27,6 +30,8 @@
namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
+ ".tmp";
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
@@ -125,11 +130,45 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
- if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
+ if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
- // TODO: Implement.
- return false;
+ return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
+ const uint8_t *const headerBuf, const int headerSize) {
+ const int tmpFileNameBufSize = strlen(fileName)
+ + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1;
+ char tmpFileName[tmpFileNameBufSize];
+ snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
+ TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ FILE *const file = fopen(tmpFileName, "wb");
+ if (!file) {
+ return;
+ }
+ // Write header.
+ if (fwrite(headerBuf, headerSize, 1, file) < 1) {
+ fclose(file);
+ remove(tmpFileName);
+ return;
+ }
+ // Write data in original buffer.
+ if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */),
+ mBuffer->getOriginalBufferSize(), 1, file) < 1) {
+ fclose(file);
+ remove(tmpFileName);
+ return;
+ }
+ // Write data in additional buffer.
+ if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */),
+ mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) {
+ fclose(file);
+ remove(tmpFileName);
+ return;
+ }
+ fclose(file);
+ rename(tmpFileName, fileName);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
@@ -193,13 +232,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
const int originalBigramListPos, const int originalShortcutListPos,
int *const writingPos) {
const int nodePos = *writingPos;
- // Create node flags and write them.
- const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
- probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
- originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
- 3 /* childrenPositionFieldSize */);
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
writingPos)) {
return false;
}
@@ -212,7 +247,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
codePointCount, writingPos)) {
- return false;;
+ return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
@@ -235,12 +270,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
+ int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
- if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
+ if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
return false;
}
}
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
+ probability != NOT_A_PROBABILITY /* isTerminal */,
+ originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
+ bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ int flagsFieldPos = nodePos;
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
+ &flagsFieldPos)) {
+ return false;
+ }
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index 20e35abcf..faf7a4e1b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#include <stdint.h>
+
#include "defines.h"
namespace latinime {
@@ -46,10 +48,14 @@ class DynamicPatriciaTrieWritingHelper {
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
+ void writeToDictFile(const char *const fileName, const uint8_t *const headerBuf,
+ const int headerSize);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
static const int CHILDREN_POSITION_FIELD_SIZE;
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 75d976205..cee3e4ab2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -96,6 +96,22 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
+ void flush(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ }
+
+ void flushWithGC(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ }
+
+ bool needsToRunGC() const {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6febd7832..6b69116eb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -29,8 +29,8 @@ namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int pathLength,
- const int bufferOffset, const int bufferSize, const bool isUpdatable) {
+ static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
+ const int bufferSize, const bool isUpdatable) {
const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
const int mmapFd = open(path, openMode);
if (mmapFd < 0) {