aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp34
-rw-r--r--native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp12
-rw-r--r--native/jni/src/defines.h3
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp21
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp14
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h8
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp21
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h26
-rw-r--r--native/jni/src/suggest/core/session/prev_words_info.h65
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp117
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
28 files changed, 285 insertions, 153 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 18b78c4df..28aaf2d1a 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -28,6 +28,7 @@
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
+#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
@@ -190,7 +191,9 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
DicTraverseSession *traverseSession =
reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
-
+ if (!traverseSession) {
+ return;
+ }
// Input values
int xCoordinates[inputSize];
int yCoordinates[inputSize];
@@ -245,15 +248,15 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
float languageWeight;
env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
SuggestionResults suggestionResults(MAX_RESULTS);
+ const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength,
+ false /* isStartOfSentence */);
if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
- times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints,
- prevWordCodePointsLength, &givenSuggestOptions, languageWeight,
- &suggestionResults);
+ times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
+ &givenSuggestOptions, languageWeight, &suggestionResults);
} else {
- dictionary->getPredictions(prevWordCodePoints, prevWordCodePointsLength,
- &suggestionResults);
+ dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
}
suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
outScoresArray, outSpaceIndicesArray, outTypesArray,
@@ -280,8 +283,8 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints,
- word1Length);
+ const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
+ return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
}
// Method to iterate all words in the dictionary for makedict.
@@ -467,16 +470,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
return languageModelParamCount;
}
-static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
- jlong dict, jint unigramProbability, jint bigramProbability) {
- Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) {
- return NOT_A_PROBABILITY;
- }
- return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
- bigramProbability);
-}
-
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
jstring query) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@@ -670,11 +663,6 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
},
{
- const_cast<char *>("calculateProbabilityNative"),
- const_cast<char *>("(JII)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
- },
- {
const_cast<char *>("getPropertyNative"),
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
index 386643332..766064153 100644
--- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
+++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
@@ -22,6 +22,7 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/session/prev_words_info.h"
namespace latinime {
class Dictionary;
@@ -34,16 +35,19 @@ static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring l
static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession,
jlong dictionary, jintArray previousWord, jint previousWordLength) {
DicTraverseSession *ts = reinterpret_cast<DicTraverseSession *>(traverseSession);
+ if (!ts) {
+ return;
+ }
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
if (!previousWord) {
- DicTraverseSession::initSessionInstance(
- ts, dict, 0 /* prevWord */, 0 /* prevWordLength*/, 0 /* suggestOptions */);
+ PrevWordsInfo prevWordsInfo;
+ ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
return;
}
int prevWord[previousWordLength];
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
- DicTraverseSession::initSessionInstance(
- ts, dict, prevWord, previousWordLength, 0 /* suggestOptions */);
+ PrevWordsInfo prevWordsInfo(prevWord, previousWordLength, false /* isStartOfSentence */);
+ ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
}
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index a80c97530..24d04e51f 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -336,6 +336,9 @@ static inline void prof_out(void) {
#define MAX_POINTER_COUNT 1
#define MAX_POINTER_COUNT_G 2
+// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
+#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1
+
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
TypeName() = delete
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index f793363a8..847fa1b02 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -26,6 +26,7 @@
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/result/suggestion_results.h"
+#include "suggest/core/session/prev_words_info.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -42,19 +43,18 @@ BigramDictionary::~BigramDictionary() {
}
/* Parameters :
- * prevWord: the word before, the one for which we need to look up bigrams.
- * prevWordLength: its length.
+ * prevWordsInfo: Information of previous words to get the predictions.
* outSuggestionResults: SuggestionResults to put the predictions.
*/
-void BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
+void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const {
- int pos = getBigramListPositionForWord(prevWord, prevWordLength,
- false /* forceLowerCaseSearch */);
+ int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
+ prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) {
// If no bigrams for this exact word, search again in lower case.
- pos = getBigramListPositionForWord(prevWord, prevWordLength,
- true /* forceLowerCaseSearch */);
+ pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
+ prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
}
// If still no bigrams, we really don't have them!
if (NOT_A_DICT_POS == pos) return;
@@ -96,9 +96,10 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
}
-int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
- int length1) const {
- int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
+int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
+ const int *word1, int length1) const {
+ int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
+ prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
index 12aaf20d3..bd3aed1bd 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@@ -22,15 +22,17 @@
namespace latinime {
class DictionaryStructureWithBufferPolicy;
+class PrevWordsInfo;
class SuggestionResults;
class BigramDictionary {
public:
BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
- void getPredictions(const int *word, int length,
+ void getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const;
- int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const;
+ int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
+ const int *word1, int length1) const;
~BigramDictionary();
private:
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index fdc893653..c860d82af 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -44,12 +44,11 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, int *prevWordCodePoints, int prevWordLength,
+ int inputSize, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions, const float languageWeight,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
- DicTraverseSession::initSessionInstance(
- traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
+ traverseSession->init(this, prevWordsInfo, suggestOptions);
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
@@ -59,11 +58,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
}
}
-void Dictionary::getPredictions(const int *word, int length,
+void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
- if (length <= 0) return;
- mBigramDictionary.getPredictions(word, length, outSuggestionResults);
+ mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
}
int Dictionary::getProbability(const int *word, int length) const {
@@ -76,10 +74,10 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}
-int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
+int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1,
int length1) const {
TimeKeeper::setCurrentTime();
- return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
+ return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1);
}
void Dictionary::addUnigramWord(const int *const word, const int length,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index f0a7e5b6a..b63c61fbb 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -31,6 +31,7 @@ namespace latinime {
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
+class PrevWordsInfo;
class ProximityInfo;
class SuggestionResults;
class SuggestOptions;
@@ -62,16 +63,17 @@ class Dictionary {
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, int *prevWordCodePoints, int prevWordLength,
+ int inputSize, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions, const float languageWeight,
SuggestionResults *const outSuggestionResults) const;
- void getPredictions(const int *word, int length,
+ void getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const;
int getProbability(const int *word, int length) const;
- int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
+ int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
+ const int *word1, int length1) const;
void addUnigramWord(const int *const codePoints, const int codePointCount,
const UnigramProperty *const unigramProperty);
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 77b634e07..b9e9db719 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -20,6 +20,7 @@
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/session/prev_words_info.h"
namespace latinime {
@@ -28,24 +29,26 @@ namespace latinime {
const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
256 * 1024;
-void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
- int prevWordLength, const SuggestOptions *const suggestOptions) {
+void DicTraverseSession::init(const Dictionary *const dictionary,
+ const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
- if (!prevWord) {
- mPrevWordPtNodePos = NOT_A_DICT_POS;
+ if (!prevWordsInfo->getPrevWordCodePoints()) {
+ mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
- prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
+ mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
+ prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
+ false /* forceLowerCaseSearch */);
+ if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
- prevWord, prevWordLength, true /* forceLowerCaseSearch */);
+ mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
+ prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
+ true /* forceLowerCaseSearch */);
}
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 843ca85a0..90aff06c3 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -29,6 +29,7 @@ namespace latinime {
class Dictionary;
class DictionaryStructureWithBufferPolicy;
+class PrevWordsInfo;
class ProximityInfo;
class SuggestOptions;
@@ -44,32 +45,25 @@ class DicTraverseSession {
dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION);
}
- static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession,
- const Dictionary *const dictionary, const int *prevWord, const int prevWordLength,
- const SuggestOptions *const suggestOptions) {
- if (traverseSession) {
- DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
- tSession->init(dictionary, prevWord, prevWordLength, suggestOptions);
- }
- }
-
static AK_FORCE_INLINE void releaseSessionInstance(DicTraverseSession *traverseSession) {
delete traverseSession;
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
- : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(nullptr),
- mDictionary(nullptr), mSuggestOptions(nullptr), mDicNodesCache(usesLargeCache),
- mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
+ : mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr),
+ mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
// NOTE: mProximityInfoStates is an array of instances.
// No need to initialize it explicitly here.
+ for (size_t i = 0; i < NELEMS(mPrevWordsPtNodePos); ++i) {
+ mPrevWordsPtNodePos[i] = NOT_A_DICT_POS;
+ }
}
// Non virtual inline destructor -- never inherit this class
AK_FORCE_INLINE ~DicTraverseSession() {}
- void init(const Dictionary *dictionary, const int *prevWord, int prevWordLength,
+ void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions);
// TODO: Remove and merge into init
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
@@ -85,9 +79,7 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
- int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
- // TODO: REMOVE
- void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
+ int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@@ -174,7 +166,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
- int mPrevWordPtNodePos;
+ int mPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
new file mode 100644
index 000000000..bc685945e
--- /dev/null
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PREV_WORDS_INFO_H
+#define LATINIME_PREV_WORDS_INFO_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// TODO: Support n-gram.
+// TODO: Support beginning of sentence.
+// This class does not take ownership of any code point buffers.
+class PrevWordsInfo {
+ public:
+ // No prev word information.
+ PrevWordsInfo() {
+ clear();
+ }
+
+ PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
+ const bool isBeginningOfSentence) {
+ clear();
+ mPrevWordCodePoints[0] = prevWordCodePoints;
+ mPrevWordCodePointCount[0] = prevWordCodePointCount;
+ mIsBeginningOfSentence[0] = isBeginningOfSentence;
+ }
+ const int *getPrevWordCodePoints() const {
+ return mPrevWordCodePoints[0];
+ }
+
+ int getPrevWordCodePointCount() const {
+ return mPrevWordCodePointCount[0];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+
+ void clear() {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePoints[i] = nullptr;
+ mPrevWordCodePointCount[i] = 0;
+ mIsBeginningOfSentence[i] = false;
+ }
+ }
+
+ const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+};
+} // namespace latinime
+#endif // LATINIME_PREV_WORDS_INFO_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index da24302c2..479d15164 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -143,6 +143,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
return FormatUtils::VERSION_4;
+ case FormatUtils::VERSION_4_DEV:
+ return FormatUtils::VERSION_4_DEV;
default:
return FormatUtils::UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 2a9028a9e..a8f8f284b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -100,6 +100,7 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
return false;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_4_DEV:
return buffer->writeUintAndAdvancePosition(version /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index c4d18608c..59f1f29e9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -19,6 +19,9 @@
#include <climits>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
@@ -42,7 +45,7 @@ namespace latinime {
if (isUpdatable) {
AKLOGE("One file dictionaries don't support updating. path: %s", path);
ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
+ return nullptr;
}
return newPolicyForFileDict(path, bufOffset, size);
}
@@ -54,26 +57,43 @@ namespace latinime {
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
switch (dictFormatVersion) {
- case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4: {
- HeaderPolicy headerPolicy(dictFormatVersion, locale, attributeMap);
- Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
- Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
- Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
- if (!DynamicPtWritingUtils::writeEmptyDictionary(
- dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
- AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
- }
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
- new Ver4PatriciaTriePolicy(std::move(dictBuffers)));
+ return newPolicyForOnMemoryV4Dict<backward::v401::Ver4DictConstants,
+ backward::v401::Ver4DictBuffers,
+ backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v401::Ver4PatriciaTriePolicy>(
+ dictFormatVersion, locale, attributeMap);
+ }
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV: {
+ return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
+ dictFormatVersion, locale, attributeMap);
}
default:
AKLOGE("DICT: dictionary format %d is not supported for on memory dictionary",
formatVersion);
break;
}
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
+ return nullptr;
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryV4Dict(
+ const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
+ HeaderPolicy headerPolicy(formatVersion, locale, attributeMap);
+ DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
+ DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
+ if (!DynamicPtWritingUtils::writeEmptyDictionary(
+ dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
+ return nullptr;
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new StructurePolicy(std::move(dictBuffers)));
}
/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
@@ -84,10 +104,10 @@ namespace latinime {
getHeaderFilePathInDictDir(path, headerFilePathBufSize, headerFilePath);
// Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
// MmappedBufferPtr if the instance has the responsibility.
- MmappedBuffer::MmappedBufferPtr mmappedBuffer(
- MmappedBuffer::openBuffer(headerFilePath, isUpdatable));
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer =
+ MmappedBuffer::openBuffer(headerFilePath, isUpdatable);
if (!mmappedBuffer) {
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
+ return nullptr;
}
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
@@ -95,34 +115,50 @@ namespace latinime {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
break;
- case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4: {
- const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
- char dictPath[dictDirPathBufSize];
- if (!FileUtils::getFilePathWithoutSuffix(headerFilePath,
- Ver4DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) {
- AKLOGE("Dictionary file name is not valid as a ver4 dictionary. path: %s", path);
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
- }
- Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
- Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer),
- formatVersion));
- if (!dictBuffers || !dictBuffers->isValid()) {
- AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
- path);
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
- }
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
- new Ver4PatriciaTriePolicy(std::move(dictBuffers)));
+ return newPolicyForV4Dict<backward::v401::Ver4DictConstants,
+ backward::v401::Ver4DictBuffers,
+ backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr,
+ backward::v401::Ver4PatriciaTriePolicy>(
+ headerFilePath, formatVersion, std::move(mmappedBuffer));
+ }
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV: {
+ return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
+ headerFilePath, formatVersion, std::move(mmappedBuffer));
}
default:
AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
break;
}
ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
+ return nullptr;
+}
+
+template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyForV4Dict(
+ const char *const headerFilePath, const FormatUtils::FORMAT_VERSION formatVersion,
+ MmappedBuffer::MmappedBufferPtr &&mmappedBuffer) {
+ const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
+ char dictPath[dictDirPathBufSize];
+ if (!FileUtils::getFilePathWithoutSuffix(headerFilePath,
+ DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) {
+ AKLOGE("Dictionary file name is not valid as a ver4 dictionary. path: %s", path);
+ ASSERT(false);
+ return nullptr;
+ }
+ DictBuffersPtr dictBuffers =
+ DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer), formatVersion);
+ if (!dictBuffers || !dictBuffers->isValid()) {
+ AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
+ path);
+ ASSERT(false);
+ return nullptr;
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new StructurePolicy(std::move(dictBuffers)));
}
/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
@@ -133,7 +169,7 @@ namespace latinime {
MmappedBuffer::MmappedBufferPtr mmappedBuffer(
MmappedBuffer::openBuffer(path, bufOffset, size, false /* isUpdatable */));
if (!mmappedBuffer) {
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
+ return nullptr;
}
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
mmappedBuffer->getBufferSize())) {
@@ -142,6 +178,7 @@ namespace latinime {
new PatriciaTriePolicy(std::move(mmappedBuffer)));
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_4_DEV:
AKLOGE("Given path is a file but the format is version 4. path: %s", path);
break;
default:
@@ -149,7 +186,7 @@ namespace latinime {
break;
}
ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
+ return nullptr;
}
/* static */ void DictionaryStructureWithBufferPolicyFactory::getHeaderFilePathInDictDir(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
index f71447e23..768454d8d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
@@ -22,6 +22,8 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -32,16 +34,26 @@ class DictionaryStructureWithBufferPolicyFactory {
const int size, const bool isUpdatable);
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- newPolicyForOnMemoryDict(const int formatVersion,
- const std::vector<int> &locale,
+ newPolicyForOnMemoryDict(const int formatVersion, const std::vector<int> &locale,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyForOnMemoryV4Dict(const FormatUtils::FORMAT_VERSION formatVersion,
+ const std::vector<int> &locale,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
+
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
newPolicyForDirectoryDict(const char *const path, const bool isUpdatable);
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class StructurePolicy>
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr newPolicyForV4Dict(
+ const char *const headerFilePath, const FormatUtils::FORMAT_VERSION formatVersion,
+ MmappedBuffer::MmappedBufferPtr &&mmappedBuffer);
+
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
newPolicyForFileDict(const char *const path, const int bufOffset, const int size);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
index 8f42df6d2..028e9ecbf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -29,10 +29,10 @@ bool DynamicPtGcEventListeners
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
bool isUselessPtNode = !ptNodeParams->isTerminal();
- if (ptNodeParams->isTerminal()) {
+ if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
bool needsToKeepPtNode = true;
- if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
- &needsToKeepPtNode)) {
+ if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ ptNodeParams, &needsToKeepPtNode)) {
AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC.");
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index bef401f87..5704c2e90 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -160,7 +160,8 @@ class PtNodeParams {
}
AK_FORCE_INLINE bool representsNonWordInfo() const {
- return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]);
+ return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
+ && isNotAWord();
}
// Parent node position
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 56f19dbae..d53922763 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -38,8 +38,6 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int level = 0;
int count = 0;
if (mHasHistoricalInfo) {
- probability = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition(
@@ -47,7 +45,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else {
- probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
}
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
@@ -65,21 +64,13 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
- const int bigramFlags = createAndGetBigramFlags(
- mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
- bigramEntryToWrite->hasNext());
+ const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
if (mHasHistoricalInfo) {
- if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
- bigramEntryToWrite->getProbability());
- return false;
- }
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
@@ -99,6 +90,13 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
historicalInfo->getCount());
return false;
}
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ bigramEntryToWrite->getProbability());
+ return false;
+ }
}
const int targetTerminalIdToWrite =
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index 944e0f9e2..b8bdb63a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -95,9 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
- int createAndGetBigramFlags(const int probability, const bool hasNext) const {
- return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
- | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
+ int createAndGetBigramFlags(const bool hasNext) const {
+ return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
}
bool runGCBigramList(const int bigramListPos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 8373dc549..7da9e3072 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -115,9 +115,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
} else if (bigramProbability == NOT_A_PROBABILITY) {
return ProbabilityUtils::backoff(unigramProbability);
} else {
- // bigramProbability is a bigram probability delta.
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
- bigramProbability);
+ return bigramProbability;
}
}
}
@@ -398,7 +396,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
- getProbability(word1Probability, bigramEntry.getProbability());
+ bigramEntry.getProbability();
bigrams.emplace_back(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index f31c50253..e868ddf6f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -213,13 +213,16 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
// Delete unigrams.
while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
const PtNodeParams ptNodeParams =
ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
return false;
}
- priorityQueue.pop();
}
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 80970c7f8..105363db5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -23,6 +23,7 @@
#include <sys/types.h>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -40,10 +41,16 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
TimeKeeper::setCurrentTime();
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
switch (formatVersion) {
- case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
- return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap,
- formatVersion);
+ return createEmptyV4DictFile<backward::v401::Ver4DictConstants,
+ backward::v401::Ver4DictBuffers,
+ backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr>(
+ filePath, localeAsCodePointVector, attributeMap, formatVersion);
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
+ case FormatUtils::VERSION_4_DEV:
+ return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
+ Ver4DictBuffers::Ver4DictBuffersPtr>(
+ filePath, localeAsCodePointVector, attributeMap, formatVersion);
default:
AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
filePath, dictVersion);
@@ -51,14 +58,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
}
}
+template<class DictConstants, class DictBuffers, class DictBuffersPtr>
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
const std::vector<int> localeAsCodePointVector,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
const FormatUtils::FORMAT_VERSION formatVersion) {
HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap);
- Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
- Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
- Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE));
+ DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
+ DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
headerPolicy.fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
0 /* unigramCount */, 0 /* bigramCount */,
0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index a822989db..5df5856d2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -45,6 +45,12 @@ class DictFileWritingUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
+ static bool createEmptyV401DictFile(const char *const filePath,
+ const std::vector<int> localeAsCodePointVector,
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
+ const FormatUtils::FORMAT_VERSION formatVersion);
+
+ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
static bool createEmptyV4DictFile(const char *const filePath,
const std::vector<int> localeAsCodePointVector,
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index a8518cdca..ba405b07e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -33,6 +33,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
return VERSION_4_ONLY_FOR_TESTING;
case VERSION_4:
return VERSION_4;
+ case VERSION_4_DEV:
+ return VERSION_4_DEV;
default:
return UNKNOWN_VERSION;
}
@@ -62,6 +64,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
return VERSION_4_ONLY_FOR_TESTING;
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
return VERSION_4;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_DEV) {
+ return VERSION_4_DEV;
} else {
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 20dfb9d8c..c47f30ca4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -33,6 +33,7 @@ class FormatUtils {
VERSION_2 = 2,
VERSION_4_ONLY_FOR_TESTING = 399,
VERSION_4 = 401,
+ VERSION_4_DEV = 402,
UNKNOWN_VERSION = -1
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
index d3e0c237f..4a126ff85 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
@@ -33,7 +33,7 @@ namespace latinime {
const int mmapFd = open(path, O_RDONLY);
if (mmapFd < 0) {
AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
- return MmappedBufferPtr(nullptr);
+ return nullptr;
}
const int pagesize = sysconf(_SC_PAGESIZE);
const int offset = bufferOffset % pagesize;
@@ -45,13 +45,13 @@ namespace latinime {
if (mmappedBuffer == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
close(mmapFd);
- return MmappedBufferPtr(nullptr);
+ return nullptr;
}
uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
if (!buffer) {
AKLOGE("DICT: buffer is null");
close(mmapFd);
- return MmappedBufferPtr(nullptr);
+ return nullptr;
}
return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
mmapFd, isUpdatable));
@@ -61,7 +61,7 @@ namespace latinime {
const char *const path, const bool isUpdatable) {
const int fileSize = FileUtils::getFileSize(path);
if (fileSize == -1) {
- return MmappedBufferPtr(nullptr);
+ return nullptr;
} else if (fileSize == 0) {
return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
} else {
@@ -76,7 +76,7 @@ namespace latinime {
const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
fileName);
if (filePathLength >= filePathBufferSize) {
- return MmappedBufferPtr(nullptr);
+ return nullptr;
}
return openBuffer(filePath, isUpdatable);
}
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index fa9600c74..3fc566e7a 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -37,6 +37,7 @@ const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
const float ScoringParams::PROXIMITY_COST = 0.0694f;
const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f;
const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f;
+const float ScoringParams::INTENTIONAL_OMISSION_COST = 0.1f;
const float ScoringParams::OMISSION_COST = 0.467f;
const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.345f;
const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index b66962019..b12de6d87 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -44,6 +44,7 @@ class ScoringParams {
static const float PROXIMITY_COST;
static const float FIRST_CHAR_PROXIMITY_COST;
static const float FIRST_PROXIMITY_COST;
+ static const float INTENTIONAL_OMISSION_COST;
static const float OMISSION_COST;
static const float OMISSION_COST_SAME_CHAR;
static const float OMISSION_COST_FIRST_CHAR;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 0ba439b47..84077174d 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -54,12 +54,15 @@ class TypingWeighting : public Weighting {
float getOmissionCost(const DicNode *const parentDicNode, const DicNode *const dicNode) const {
const bool isZeroCostOmission = parentDicNode->isZeroCostOmission();
+ const bool isIntentionalOmission = parentDicNode->canBeIntentionalOmission();
const bool sameCodePoint = dicNode->isSameNodeCodePoint(parentDicNode);
// If the traversal omitted the first letter then the dicNode should now be on the second.
const bool isFirstLetterOmission = dicNode->getNodeCodePointCount() == 2;
float cost = 0.0f;
if (isZeroCostOmission) {
cost = 0.0f;
+ } else if (isIntentionalOmission) {
+ cost = ScoringParams::INTENTIONAL_OMISSION_COST;
} else if (isFirstLetterOmission) {
cost = ScoringParams::OMISSION_COST_FIRST_CHAR;
} else {