aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h4
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h5
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h2
-rw-r--r--native/jni/src/suggest/core/policy/scoring.h8
-rw-r--r--native/jni/src/suggest/core/result/suggestion_results.cpp7
-rw-r--r--native/jni/src/suggest/core/result/suggestion_results.h12
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.cpp33
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.h4
-rw-r--r--native/jni/src/suggest/core/suggest.cpp4
-rw-r--r--native/jni/src/suggest/core/suggest.h3
-rw-r--r--native/jni/src/suggest/core/suggest_interface.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h8
31 files changed, 179 insertions, 98 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 57e18884d..e55c9eb8a 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -301,7 +301,7 @@ static inline void prof_out(void) {
#define NOT_A_DICT_POS (S_INT_MIN)
#define NOT_A_WORD_ID (S_INT_MIN)
#define NOT_A_TIMESTAMP (-1)
-#define NOT_A_LANGUAGE_WEIGHT (-1.0f)
+#define NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL (-1.0f)
// A special value to mean the first word confidence makes no sense in this case,
// e.g. this is not a multi-word suggestion.
@@ -338,7 +338,7 @@ static inline void prof_out(void) {
#define MAX_POINTER_COUNT_G 2
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
-#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1
+#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
TypeName() = delete
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index ec61783cb..5214077dc 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -295,8 +295,9 @@ class DicNode {
}
// Used to prune nodes
- float getCompoundDistance(const float languageWeight) const {
- return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
+ float getCompoundDistance(const float weightOfLangModelVsSpatialModel) const {
+ return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(
+ weightOfLangModelVsSpatialModel);
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index c19d48eb9..3a54c2599 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -103,8 +103,10 @@ class DicNodeStateScoring {
return getCompoundDistance(1.0f);
}
- float getCompoundDistance(const float languageWeight) const {
- return mSpatialDistance + mLanguageDistance * languageWeight;
+ float getCompoundDistance(
+ const float weightOfLangModelVsSpatialModel) const {
+ return mSpatialDistance
+ + mLanguageDistance * weightOfLangModelVsSpatialModel;
}
float getNormalizedCompoundDistance() const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index f9f36ce44..e4084b0f5 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -47,14 +47,14 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, const PrevWordsInfo *const prevWordsInfo,
- const SuggestOptions *const suggestOptions, const float languageWeight,
+ const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime();
traverseSession->init(this, prevWordsInfo, suggestOptions);
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
- languageWeight, outSuggestionResults);
+ weightOfLangModelVsSpatialModel, outSuggestionResults);
if (DEBUG_DICT) {
outSuggestionResults->dumpSuggestions();
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index f6482ab78..324e3504a 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -66,7 +66,7 @@ class Dictionary {
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, const PrevWordsInfo *const prevWordsInfo,
- const SuggestOptions *const suggestOptions, const float languageWeight,
+ const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const;
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h
index 9e75cace4..ce3684a1c 100644
--- a/native/jni/src/suggest/core/policy/scoring.h
+++ b/native/jni/src/suggest/core/policy/scoring.h
@@ -32,9 +32,11 @@ class Scoring {
const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
const bool boostExactMatches) const = 0;
virtual void getMostProbableString(const DicTraverseSession *const traverseSession,
- const float languageWeight, SuggestionResults *const outSuggestionResults) const = 0;
- virtual float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession,
- DicNode *const terminals, const int size) const = 0;
+ const float weightOfLangModelVsSpatialModel,
+ SuggestionResults *const outSuggestionResults) const = 0;
+ virtual float getAdjustedWeightOfLangModelVsSpatialModel(
+ DicTraverseSession *const traverseSession, DicNode *const terminals,
+ const int size) const = 0;
virtual float getDoubleLetterDemotionDistanceCost(
const DicNode *const terminalDicNode) const = 0;
virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
diff --git a/native/jni/src/suggest/core/result/suggestion_results.cpp b/native/jni/src/suggest/core/result/suggestion_results.cpp
index 4c10bd08a..3756d1092 100644
--- a/native/jni/src/suggest/core/result/suggestion_results.cpp
+++ b/native/jni/src/suggest/core/result/suggestion_results.cpp
@@ -23,7 +23,7 @@ namespace latinime {
void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCount,
jintArray outputCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
- jfloatArray outLanguageWeight) {
+ jfloatArray outWeightOfLangModelVsSpatialModel) {
int outputIndex = 0;
while (!mSuggestedWords.empty()) {
const SuggestedWord &suggestedWord = mSuggestedWords.top();
@@ -44,7 +44,8 @@ void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCo
mSuggestedWords.pop();
}
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, outputIndex);
- JniDataUtils::putFloatToArray(env, outLanguageWeight, 0 /* index */, mLanguageWeight);
+ JniDataUtils::putFloatToArray(env, outWeightOfLangModelVsSpatialModel, 0 /* index */,
+ mWeightOfLangModelVsSpatialModel);
}
void SuggestionResults::addPrediction(const int *const codePoints, const int codePointCount,
@@ -89,7 +90,7 @@ void SuggestionResults::getSortedScores(int *const outScores) const {
}
void SuggestionResults::dumpSuggestions() const {
- AKLOGE("language weight: %f", mLanguageWeight);
+ AKLOGE("weight of language model vs spatial model: %f", mWeightOfLangModelVsSpatialModel);
std::vector<SuggestedWord> suggestedWords;
auto copyOfSuggestedWords = mSuggestedWords;
while (!copyOfSuggestedWords.empty()) {
diff --git a/native/jni/src/suggest/core/result/suggestion_results.h b/native/jni/src/suggest/core/result/suggestion_results.h
index 8e845e2d3..738c78a9f 100644
--- a/native/jni/src/suggest/core/result/suggestion_results.h
+++ b/native/jni/src/suggest/core/result/suggestion_results.h
@@ -29,13 +29,15 @@ namespace latinime {
class SuggestionResults {
public:
explicit SuggestionResults(const int maxSuggestionCount)
- : mMaxSuggestionCount(maxSuggestionCount), mLanguageWeight(NOT_A_LANGUAGE_WEIGHT),
+ : mMaxSuggestionCount(maxSuggestionCount),
+ mWeightOfLangModelVsSpatialModel(NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL),
mSuggestedWords() {}
// Returns suggestion count.
void outputSuggestions(JNIEnv *env, jintArray outSuggestionCount, jintArray outCodePointsArray,
jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
- jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray outLanguageWeight);
+ jintArray outAutoCommitFirstWordConfidenceArray,
+ jfloatArray outWeightOfLangModelVsSpatialModel);
void addPrediction(const int *const codePoints, const int codePointCount, const int score);
void addSuggestion(const int *const codePoints, const int codePointCount,
const int score, const int type, const int indexToPartialCommit,
@@ -43,8 +45,8 @@ class SuggestionResults {
void getSortedScores(int *const outScores) const;
void dumpSuggestions() const;
- void setLanguageWeight(const float languageWeight) {
- mLanguageWeight = languageWeight;
+ void setWeightOfLangModelVsSpatialModel(const float weightOfLangModelVsSpatialModel) {
+ mWeightOfLangModelVsSpatialModel = weightOfLangModelVsSpatialModel;
}
int getSuggestionCount() const {
@@ -55,7 +57,7 @@ class SuggestionResults {
DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionResults);
const int mMaxSuggestionCount;
- float mLanguageWeight;
+ float mWeightOfLangModelVsSpatialModel;
std::priority_queue<
SuggestedWord, std::vector<SuggestedWord>, SuggestedWord::Comparator> mSuggestedWords;
};
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
index 6e0193772..3283f6deb 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -34,7 +34,8 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
/* static */ void SuggestionsOutputUtils::outputSuggestions(
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
- const float languageWeight, SuggestionResults *const outSuggestionResults) {
+ const float weightOfLangModelVsSpatialModel,
+ SuggestionResults *const outSuggestionResults) {
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
const int terminalSize = 0;
#else
@@ -44,12 +45,15 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
for (int index = terminalSize - 1; index >= 0; --index) {
traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
}
- // Compute a language weight when an invalid language weight is passed.
- // NOT_A_LANGUAGE_WEIGHT (-1) is assumed as an invalid language weight.
- const float languageWeightToOutputSuggestions = (languageWeight < 0.0f) ?
- scoringPolicy->getAdjustedLanguageWeight(
- traverseSession, terminals.data(), terminalSize) : languageWeight;
- outSuggestionResults->setLanguageWeight(languageWeightToOutputSuggestions);
+ // Compute a weight of language model when an invalid weight is passed.
+ // NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL (-1) is taken as an invalid value.
+ const float weightOfLangModelVsSpatialModelToOutputSuggestions =
+ (weightOfLangModelVsSpatialModel < 0.0f)
+ ? scoringPolicy->getAdjustedWeightOfLangModelVsSpatialModel(traverseSession,
+ terminals.data(), terminalSize)
+ : weightOfLangModelVsSpatialModel;
+ outSuggestionResults->setWeightOfLangModelVsSpatialModel(
+ weightOfLangModelVsSpatialModelToOutputSuggestions);
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
// a long multiple words suggestion.
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
@@ -65,16 +69,16 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
// Output suggestion results here
for (auto &terminalDicNode : terminals) {
outputSuggestionsOfDicNode(scoringPolicy, traverseSession, &terminalDicNode,
- languageWeightToOutputSuggestions, boostExactMatches, forceCommitMultiWords,
- outputSecondWordFirstLetterInputIndex, outSuggestionResults);
+ weightOfLangModelVsSpatialModelToOutputSuggestions, boostExactMatches,
+ forceCommitMultiWords, outputSecondWordFirstLetterInputIndex, outSuggestionResults);
}
- scoringPolicy->getMostProbableString(traverseSession, languageWeightToOutputSuggestions,
- outSuggestionResults);
+ scoringPolicy->getMostProbableString(traverseSession,
+ weightOfLangModelVsSpatialModelToOutputSuggestions, outSuggestionResults);
}
/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
- const DicNode *const terminalDicNode, const float languageWeight,
+ const DicNode *const terminalDicNode, const float weightOfLangModelVsSpatialModel,
const bool boostExactMatches, const bool forceCommitMultiWords,
const bool outputSecondWordFirstLetterInputIndex,
SuggestionResults *const outSuggestionResults) {
@@ -83,8 +87,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
}
const float doubleLetterCost =
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
- const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
- + doubleLetterCost;
+ const float compoundDistance =
+ terminalDicNode->getCompoundDistance(weightOfLangModelVsSpatialModel)
+ + doubleLetterCost;
const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.h b/native/jni/src/suggest/core/result/suggestions_output_utils.h
index b099b4776..bf8497828 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.h
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.h
@@ -33,7 +33,7 @@ class SuggestionsOutputUtils {
* Outputs the final list of suggestions (i.e., terminal nodes).
*/
static void outputSuggestions(const Scoring *const scoringPolicy,
- DicTraverseSession *traverseSession, const float languageWeight,
+ DicTraverseSession *traverseSession, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults);
private:
@@ -44,7 +44,7 @@ class SuggestionsOutputUtils {
static void outputSuggestionsOfDicNode(const Scoring *const scoringPolicy,
DicTraverseSession *traverseSession, const DicNode *const terminalDicNode,
- const float languageWeight, const bool boostExactMatches,
+ const float weightOfLangModelVsSpatialModel, const bool boostExactMatches,
const bool forceCommitMultiWords, const bool outputSecondWordFirstLetterInputIndex,
SuggestionResults *const outSuggestionResults);
static void outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 947d41f4b..457414f2b 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -45,7 +45,7 @@ const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
*/
void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
- int inputSize, const float languageWeight,
+ int inputSize, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const {
PROF_OPEN;
PROF_START(0);
@@ -68,7 +68,7 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
PROF_END(1);
PROF_START(2);
SuggestionsOutputUtils::outputSuggestions(
- SCORING, tSession, languageWeight, outSuggestionResults);
+ SCORING, tSession, weightOfLangModelVsSpatialModel, outSuggestionResults);
PROF_END(2);
PROF_CLOSE;
}
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 788e0314b..65d5918cf 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -49,7 +49,8 @@ class Suggest : public SuggestInterface {
AK_FORCE_INLINE virtual ~Suggest() {}
void getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
int *times, int *pointerIds, int *inputCodePoints, int inputSize,
- const float languageWeight, SuggestionResults *const outSuggestionResults) const;
+ const float weightOfLangModelVsSpatialModel,
+ SuggestionResults *const outSuggestionResults) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest);
diff --git a/native/jni/src/suggest/core/suggest_interface.h b/native/jni/src/suggest/core/suggest_interface.h
index a6e5aefae..a05aa9c80 100644
--- a/native/jni/src/suggest/core/suggest_interface.h
+++ b/native/jni/src/suggest/core/suggest_interface.h
@@ -28,7 +28,8 @@ class SuggestInterface {
public:
virtual void getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize,
- const float languageWeight, SuggestionResults *const suggestionResults) const = 0;
+ const float weightOfLangModelVsSpatialModel,
+ SuggestionResults *const suggestionResults) const = 0;
SuggestInterface() {}
virtual ~SuggestInterface() {}
private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 87cf0cd3b..daf40d4f9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -65,7 +65,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
+ mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -97,7 +98,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
+ mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
// Copy header information
HeaderPolicy(const HeaderPolicy *const headerPolicy)
@@ -118,7 +120,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mForgettingCurveDurationToLevelDown(
headerPolicy->mForgettingCurveDurationToLevelDown),
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
- mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
+ mMaxBigramCount(headerPolicy->mMaxBigramCount),
+ mCodePointTable(headerPolicy->mCodePointTable) {}
// Temporary dummy header.
HeaderPolicy()
@@ -128,7 +131,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
- mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {}
+ mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0),
+ mCodePointTable(nullptr) {}
~HeaderPolicy() {}
@@ -139,6 +143,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
switch (mDictFormatVersion) {
case FormatUtils::VERSION_2:
return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_201:
+ return FormatUtils::VERSION_201;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
@@ -250,6 +256,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mDictFormatVersion >= FormatUtils::VERSION_4;
}
+ const int *getCodePointTable() const {
+ return mCodePointTable;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
@@ -295,6 +305,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mForgettingCurveDurationToLevelDown;
const int mMaxUnigramCount;
const int mMaxBigramCount;
+ const int *const mCodePointTable;
const std::vector<int> readLocale() const;
float readMultipleWordCostMultiplier() const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index d2c3d2fe0..41a8b13b8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -18,6 +18,7 @@
#include <cctype>
#include <cstdio>
+#include <memory>
#include <vector>
#include "defines.h"
@@ -34,12 +35,13 @@ namespace latinime {
const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
-const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
+const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;
const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
+const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";
const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
@@ -73,20 +75,32 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
return;
}
int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
- int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
+ std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
while (pos < headerSize) {
+ // The values in the header don't use the code point table for their encoding.
const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
- MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
+ MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
std::vector<int> key;
key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
- MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
+ MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
std::vector<int> value;
- value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
+ value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
headerAttributes->insert(AttributeMap::value_type(key, value));
}
}
+/* static */ const int *HeaderReadWriteUtils::readCodePointTable(
+ AttributeMap *const headerAttributes) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
+ AttributeMap::const_iterator it = headerAttributes->find(keyVector);
+ if (it == headerAttributes->end()) {
+ return nullptr;
+ }
+ return it->second.data();
+}
+
/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
int *const writingPos) {
@@ -96,7 +110,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
}
switch (version) {
case FormatUtils::VERSION_2:
- // Version 2 dictionary writing is not supported.
+ case FormatUtils::VERSION_201:
+ // Version 2 or 201 dictionary writing is not supported.
return false;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index 1ab2eec69..5dd91b26c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -46,6 +46,9 @@ class HeaderReadWriteUtils {
static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);
+ static const int *readCodePointTable(
+ DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);
+
static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
const FormatUtils::FORMAT_VERSION version, int *const writingPos);
@@ -101,6 +104,8 @@ class HeaderReadWriteUtils {
static const int HEADER_FLAG_SIZE;
static const int HEADER_SIZE_FIELD_SIZE;
+ static const char *const CODE_POINT_TABLE_KEY;
+
// Value for the "flags" field. It's unused at the moment.
static const DictionaryFlags NO_FLAGS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
index 82399f190..5c639b19c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp
@@ -23,6 +23,7 @@
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
@@ -59,8 +60,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
const int parentPos =
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
int codePoints[MAX_WORD_LENGTH];
- const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
int terminalIdFieldPos = NOT_A_DICT_POS;
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
int probability = NOT_A_PROBABILITY;
@@ -98,7 +99,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
// The destination position is stored at the same place as the parent position.
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
} else {
- return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
newSiblingNodePos);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index 9fa93efc9..372c9e36f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -114,7 +114,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
mmappedBuffer->getReadOnlyByteArrayView());
switch (formatVersion) {
case FormatUtils::VERSION_2:
- AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
+ case FormatUtils::VERSION_201:
+ AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
break;
case FormatUtils::VERSION_4: {
return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
@@ -175,6 +176,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
}
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
case FormatUtils::VERSION_2:
+ case FormatUtils::VERSION_201:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
index e64a13cc4..6a498b2f4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
@@ -61,19 +61,20 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
}
/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
- return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+ const int *const codePointTable, int *const pos) {
+ return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, codePointTable, pos);
}
// Returns the number of read characters.
/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer,
- const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
+ const NodeFlags flags, const int maxLength, const int *const codePointTable,
+ int *const outBuffer, int *const pos) {
int length = 0;
if (hasMultipleChars(flags)) {
- length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
- pos);
+ length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, codePointTable,
+ outBuffer, pos);
} else {
- const int codePoint = getCodePointAndAdvancePosition(buffer, pos);
+ const int codePoint = getCodePointAndAdvancePosition(buffer, codePointTable, pos);
if (codePoint == NOT_A_CODE_POINT) {
// CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is
// CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR
@@ -92,12 +93,12 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
// Returns the number of skipped characters.
/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
- const int maxLength, int *const pos) {
+ const int maxLength, const int *const codePointTable, int *const pos) {
if (hasMultipleChars(flags)) {
return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
} else {
if (maxLength > 0) {
- getCodePointAndAdvancePosition(buffer, pos);
+ getCodePointAndAdvancePosition(buffer, codePointTable, pos);
return 1;
} else {
return 0;
@@ -134,7 +135,7 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
- const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy, const int *const codePointTable,
NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
int *const outBigramPos, int *const outSiblingPos) {
@@ -142,7 +143,7 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
*outFlags = flags;
*outCodePointCount = getCharsAndAdvancePosition(
- dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+ dictBuf, flags, MAX_WORD_LENGTH, codePointTable, outCodePoint, &readingPos);
*outProbability = isTerminal(flags) ?
readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
*outChildrenPos = hasChildrenInFlags(flags) ?
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
index c3f09c3b1..a69ec4435 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h
@@ -34,15 +34,17 @@ class PatriciaTrieReadingUtils {
static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
- static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+ static int getCodePointAndAdvancePosition(const uint8_t *const buffer,
+ const int *const codePointTable, int *const pos);
// Returns the number of read characters.
static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
- const int maxLength, int *const outBuffer, int *const pos);
+ const int maxLength, const int *const codePointTable, int *const outBuffer,
+ int *const pos);
// Returns the number of skipped characters.
static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
- const int maxLength, int *const pos);
+ const int maxLength, const int *const codePointTable, int *const pos);
static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
@@ -106,9 +108,10 @@ class PatriciaTrieReadingUtils {
static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const DictionaryBigramsStructurePolicy *const bigramPolicy,
- NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
- int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
- int *const outBigramPos, int *const outSiblingPos);
+ const int *const codePointTable, NodeFlags *const outFlags,
+ int *const outCodePointCount, int *const outCodePoint, int *const outProbability,
+ int *const outChildrenPos, int *const outShortcutPos, int *const outBigramPos,
+ int *const outSiblingPos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
index 7cb7dff9a..40b872055 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
@@ -45,7 +45,9 @@ const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
/* static */ int ShortcutListReadingUtils::readShortcutTarget(const ReadOnlyByteArrayView buffer,
const int maxLength, int *const outWord, int *const pos) {
- return ByteArrayUtils::readStringAndAdvancePosition(buffer.data(), maxLength, outWord, pos);
+ // TODO: Use codePointTable for shortcuts.
+ return ByteArrayUtils::readStringAndAdvancePosition(buffer.data(), maxLength,
+ nullptr /* codePointTable */, outWord, pos);
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 64b767dac..6e7dba9ff 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -81,6 +81,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
int pos = getRootPosition();
int wordPos = 0;
+ const int *const codePointTable = mHeaderPolicy.getCodePointTable();
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
// only traverse PtNodes that are actually a part of the terminal we are searching, so each
// time we enter this loop we are one depth level further than last time.
@@ -112,21 +113,21 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mBuffer.data(), &pos);
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mBuffer.data(), &pos);
+ mBuffer.data(), codePointTable, &pos);
if (ptNodePos == startPos) {
// We found the position. Copy the rest of the code points in the buffer and return
// the length.
outCodePoints[wordPos] = character;
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mBuffer.data(), &pos);
+ mBuffer.data(), codePointTable, &pos);
// We count code points in order to avoid infinite loops if the file is broken
// or if there is some other bug
int charCount = maxCodePointCount;
while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
outCodePoints[++wordPos] = nextChar;
nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mBuffer.data(), &pos);
+ mBuffer.data(), codePointTable, &pos);
}
}
*outUnigramProbability =
@@ -138,7 +139,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// first and possibly the probability.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
PatriciaTrieReadingUtils::skipCharacters(mBuffer.data(), flags, MAX_WORD_LENGTH,
- &pos);
+ codePointTable, &pos);
}
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(), &pos);
@@ -189,17 +190,17 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
mBuffer.data(), &lastCandidatePtNodePos);
const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mBuffer.data(), &lastCandidatePtNodePos);
+ mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
// We copy all the characters in this PtNode to the buffer
outCodePoints[wordPos] = lastChar;
if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mBuffer.data(), &lastCandidatePtNodePos);
+ mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
int charCount = maxCodePointCount;
while (-1 != nextChar && --charCount > 0) {
outCodePoints[++wordPos] = nextChar;
nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mBuffer.data(), &lastCandidatePtNodePos);
+ mBuffer.data(), codePointTable, &lastCandidatePtNodePos);
}
}
++wordPos;
@@ -404,9 +405,11 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
int shortcutPos = NOT_A_DICT_POS;
int bigramPos = NOT_A_DICT_POS;
int siblingPos = NOT_A_DICT_POS;
+ const int *const codePointTable = mHeaderPolicy.getCodePointTable();
PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, &mShortcutListPolicy,
- &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
- &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ &mBigramListPolicy, codePointTable, &flags, &mergedNodeCodePointCount,
+ mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos,
+ &siblingPos);
// Skip PtNodes don't start with Unicode code point because they represent non-word information.
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 70e8d847e..3cdf6cd16 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -43,10 +43,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
: mMmappedBuffer(std::move(mmappedBuffer)),
mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
- FormatUtils::VERSION_2),
+ FormatUtils::detectFormatVersion(mMmappedBuffer->getReadOnlyByteArrayView())),
mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer),
- mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy),
+ mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy,
+ mHeaderPolicy.getCodePointTable()),
mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(),
mIsCorrupted(false) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
index 74cdf7929..dc0ed96d0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -38,8 +38,8 @@ const PtNodeParams Ver2ParticiaTrieNodeReader::fetchPtNodeParamsInBufferFromPtNo
int bigramPos = NOT_A_DICT_POS;
int siblingPos = NOT_A_DICT_POS;
PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortuctPolicy,
- mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
- &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ mBigramPolicy, mCodePointTable, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
+ &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
if (mergedNodeCodePointCount <= 0) {
AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
ASSERT(false);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
index 0f6769dc8..24ec5bcca 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -33,8 +33,10 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader {
public:
Ver2ParticiaTrieNodeReader(const ReadOnlyByteArrayView buffer,
const DictionaryBigramsStructurePolicy *const bigramPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy) {}
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const int *const codePointTable)
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy),
+ mCodePointTable(codePointTable) {}
virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const;
@@ -44,6 +46,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader {
const ReadOnlyByteArrayView mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramPolicy;
const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+ const int *const mCodePointTable;
};
} // namespace latinime
#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 731092efd..d795239fc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
@@ -51,7 +52,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
int codePoints[MAX_WORD_LENGTH];
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
int terminalIdFieldPos = NOT_A_DICT_POS;
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
int probability = NOT_A_PROBABILITY;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index ecbe7922c..da2c30cd6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -42,8 +42,10 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC
if (readingPosIsInAdditionalBuffer) {
*pos -= mOriginalBuffer.size();
}
+ // Code point table is not used for dynamic format.
*outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
- getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
+ getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount,
+ nullptr /* codePointTable */, outCodePoints, pos);
if (readingPosIsInAdditionalBuffer) {
*pos += mOriginalBuffer.size();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 4b3c98988..abb979050 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -147,11 +147,18 @@ class ByteArrayUtils {
*/
static AK_FORCE_INLINE int readCodePoint(const uint8_t *const buffer, const int pos) {
int p = pos;
- return readCodePointAndAdvancePosition(buffer, &p);
+ return readCodePointAndAdvancePosition(buffer, nullptr /* codePointTable */, &p);
}
static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
- const uint8_t *const buffer, int *const pos) {
+ const uint8_t *const buffer, const int *const codePointTable, int *const pos) {
+ /*
+ * codePointTable is an array to convert the most frequent characters in this dictionary to
+ * 1 byte code points. It is only made of the original code points of the most frequent
+ * characters used in this dictionary. 0x20 - 0xFF is used for the 1 byte characters.
+ * The original code points are restored by picking the code points at the indices of the
+ * codePointTable. The indices are calculated by subtracting 0x20 from the firstByte.
+ */
const uint8_t firstByte = readUint8(buffer, *pos);
if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
@@ -162,6 +169,9 @@ class ByteArrayUtils {
}
} else {
*pos += 1;
+ if (codePointTable) {
+ return codePointTable[firstByte - MINIMUM_ONE_BYTE_CHARACTER_VALUE];
+ }
return firstByte;
}
}
@@ -173,12 +183,13 @@ class ByteArrayUtils {
*/
// Returns the length of the string.
static int readStringAndAdvancePosition(const uint8_t *const buffer,
- const int maxLength, int *const outBuffer, int *const pos) {
+ const int maxLength, const int *const codePointTable, int *const outBuffer,
+ int *const pos) {
int length = 0;
- int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ int codePoint = readCodePointAndAdvancePosition(buffer, codePointTable, pos);
while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
outBuffer[length++] = codePoint;
- codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ codePoint = readCodePointAndAdvancePosition(buffer, codePointTable, pos);
}
return length;
}
@@ -187,9 +198,9 @@ class ByteArrayUtils {
static int advancePositionToBehindString(
const uint8_t *const buffer, const int maxLength, int *const pos) {
int length = 0;
- int codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ int codePoint = readCodePointAndAdvancePosition(buffer, nullptr /* codePointTable */, pos);
while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
- codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ codePoint = readCodePointAndAdvancePosition(buffer, nullptr /* codePointTable */, pos);
length++;
}
return length;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index e6e7167c2..0cffe569d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -29,6 +29,8 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
switch (formatVersion) {
case VERSION_2:
return VERSION_2;
+ case VERSION_201:
+ return VERSION_201;
case VERSION_4_ONLY_FOR_TESTING:
return VERSION_4_ONLY_FOR_TESTING;
case VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 51ad9877c..96310086b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -32,6 +32,7 @@ class FormatUtils {
enum FORMAT_VERSION {
// These MUST have the same values as the relevant constants in FormatSpec.java.
VERSION_2 = 2,
+ VERSION_201 = 201,
VERSION_4_ONLY_FOR_TESTING = 399,
VERSION_4 = 402,
VERSION_4_DEV = 403,
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 52c4251f0..0240bcf54 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -33,10 +33,12 @@ class TypingScoring : public Scoring {
static const TypingScoring *getInstance() { return &sInstance; }
AK_FORCE_INLINE void getMostProbableString(const DicTraverseSession *const traverseSession,
- const float languageWeight, SuggestionResults *const outSuggestionResults) const {}
+ const float weightOfLangModelVsSpatialModel,
+ SuggestionResults *const outSuggestionResults) const {}
- AK_FORCE_INLINE float getAdjustedLanguageWeight(DicTraverseSession *const traverseSession,
- DicNode *const terminals, const int size) const {
+ AK_FORCE_INLINE float getAdjustedWeightOfLangModelVsSpatialModel(
+ DicTraverseSession *const traverseSession, DicNode *const terminals,
+ const int size) const {
return 1.0f;
}