diff options
Diffstat (limited to 'native/jni/src')
63 files changed, 669 insertions, 371 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index ef03d2b6d..92f39ea25 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -125,7 +125,7 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - void initAsPassingChild(DicNode *parentDicNode) { + void initAsPassingChild(const DicNode *parentDicNode) { mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; const int codePoint = parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt( diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index bf2a0000d..4445f4aaf 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -48,7 +48,7 @@ namespace latinime { /////////////////////////////////// // Traverse node expansion utils // /////////////////////////////////// -/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, +/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *const childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 0d60e5796..00e80c604 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -35,7 +35,7 @@ class DicNodeUtils { const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); - static void getAllChildDicNodes(DicNode *dicNode, + static void getAllChildDicNodes(const DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *childDicNodes); static float getBigramNodeImprobability( diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index cb28e57d8..54cde1988 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -52,7 +52,7 @@ class DicNodeVector { return static_cast<int>(mDicNodes.size()); } - void pushPassingChild(DicNode *dicNode) { + void pushPassingChild(const DicNode *dicNode) { ASSERT(!mLock); mDicNodes.emplace_back(); mDicNodes.back().initAsPassingChild(dicNode); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index fe3167a61..f88388c75 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -19,6 +19,7 @@ #include "suggest/core/dictionary/dictionary.h" #include "defines.h" +#include "suggest/core/dictionary/dictionary_utils.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/result/suggestion_results.h" #include "suggest/core/session/dic_traverse_session.h" @@ -74,38 +75,50 @@ int Dictionary::getProbability(const int *word, int length) const { return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } +int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const { + TimeKeeper::setCurrentTime(); + return DictionaryUtils::getMaxProbabilityOfExactMatches( + mDictionaryStructureWithBufferPolicy.get(), word, length); +} + int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int length) const { TimeKeeper::setCurrentTime(); return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length); } -void Dictionary::addUnigramEntry(const int *const word, const int length, +bool Dictionary::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { + if (unigramProperty->representsBeginningOfSentence() + && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy() + ->supportsBeginningOfSentence()) { + AKLOGE("The dictionary doesn't support Beginning-of-Sentence."); + return false; + } TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty); + return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty); } -void Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); + return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); } -void Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, const int length) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length); + return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length); } -void Dictionary::flush(const char *const filePath) { +bool Dictionary::flush(const char *const filePath) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->flush(filePath); + return mDictionaryStructureWithBufferPolicy->flush(filePath); } -void Dictionary::flushWithGC(const char *const filePath) { +bool Dictionary::flushWithGC(const char *const filePath) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); + return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); } bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 817d9f7fc..10010b21c 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -57,6 +57,7 @@ class Dictionary { static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000; + static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000; Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy); @@ -72,21 +73,23 @@ class Dictionary { int getProbability(const int *word, int length) const; + int getMaxProbabilityOfExactMatches(const int *word, int length) const; + int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int length) const; - void addUnigramEntry(const int *const codePoints, const int codePointCount, + bool addUnigramEntry(const int *const codePoints, const int codePointCount, const UnigramProperty *const unigramProperty); - void addNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - void removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, const int length); - void flush(const char *const filePath); + bool flush(const char *const filePath); - void flushWithGC(const char *const filePath); + bool flushWithGC(const char *const filePath); bool needsToRunGC(const bool mindsBlockByGC); diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp new file mode 100644 index 000000000..b94966cbe --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/dictionary/dictionary_utils.h" + +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_priority_queue.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/dictionary/digraph_utils.h" +#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" + +namespace latinime { + +/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + const int *const codePoints, const int codePointCount) { + std::vector<DicNode> current; + std::vector<DicNode> next; + + // No prev words information. + PrevWordsInfo emptyPrevWordsInfo; + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + emptyPrevWordsInfo.getPrevWordsTerminalPtNodePos(dictionaryStructurePolicy, + prevWordsPtNodePos, false /* tryLowerCaseSearch */); + current.emplace_back(); + DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordsPtNodePos, ¤t.front()); + for (int i = 0; i < codePointCount; ++i) { + // The base-lower input is used to ignore case errors and accent errors. + const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]); + for (const DicNode &dicNode : current) { + if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) { + next.emplace_back(dicNode); + next.back().advanceDigraphIndex(); + continue; + } + processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next); + } + current.clear(); + current.swap(next); + } + + int maxProbability = NOT_A_PROBABILITY; + for (const DicNode &dicNode : current) { + if (!dicNode.isTerminalDicNode()) { + continue; + } + // dicNode can contain case errors, accent errors, intentional omissions or digraphs. + maxProbability = std::max(maxProbability, dicNode.getProbability()); + } + return maxProbability; +} + +/* static */ void DictionaryUtils::processChildDicNodes( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + const int inputCodePoint, const DicNode *const parentDicNode, + std::vector<DicNode> *const outDicNodes) { + DicNodeVector childDicNodes; + DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes); + for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) { + DicNode *const childDicNode = childDicNodes[childIndex]; + const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint()); + if (inputCodePoint == codePoint) { + outDicNodes->emplace_back(*childDicNode); + } + if (childDicNode->canBeIntentionalOmission()) { + processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode, + outDicNodes); + } + if (DigraphUtils::hasDigraphForCodePoint( + dictionaryStructurePolicy->getHeaderStructurePolicy(), + childDicNode->getNodeCodePoint())) { + childDicNode->advanceDigraphIndex(); + if (childDicNode->getNodeCodePoint() == codePoint) { + childDicNode->advanceDigraphIndex(); + outDicNodes->emplace_back(*childDicNode); + } + } + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.h b/native/jni/src/suggest/core/dictionary/dictionary_utils.h new file mode 100644 index 000000000..358ebf674 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_UTILS_H +#define LATINIME_DICTIONARY_UTILS_H + +#include <vector> + +#include "defines.h" + +namespace latinime { + +class DictionaryStructureWithBufferPolicy; +class DicNode; + +class DictionaryUtils { + public: + static int getMaxProbabilityOfExactMatches( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + const int *const codePoints, const int codePointCount); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryUtils); + + static void processChildDicNodes( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + const int inputCodePoint, const DicNode *const parentDicNode, + std::vector<DicNode> *const outDicNodes); +}; +} // namespace latinime +#endif // LATINIME_DICTIONARY_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp index 0635fef7e..b6bf7a98c 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp @@ -31,4 +31,8 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80; const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH; +const ErrorTypeUtils::ErrorType + ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = + ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION; + } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h index 0e8e5b635..e3e76b238 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.h +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h @@ -51,6 +51,11 @@ class ErrorTypeUtils { return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0; } + static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) { + return (containedErrorTypes + & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0; + } + static bool isEditCorrectionError(const ErrorType errorType) { return (errorType & EDIT_CORRECTION) != 0; } @@ -67,6 +72,7 @@ class ErrorTypeUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils); static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH; + static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION; }; } // namespace latinime #endif // LATINIME_ERROR_TYPE_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h index d2551057b..902eb000f 100644 --- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h @@ -48,15 +48,21 @@ class UnigramProperty { }; UnigramProperty() - : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY), - mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {} - - UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp, const int level, const int count, - const std::vector<ShortcutProperty> *const shortcuts) - : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), + : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false), + mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), + mShortcuts() {} + + UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, + const bool isBlacklisted, const int probability, const int timestamp, const int level, + const int count, const std::vector<ShortcutProperty> *const shortcuts) + : mRepresentsBeginningOfSentence(representsBeginningOfSentence), + mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {} + bool representsBeginningOfSentence() const { + return mRepresentsBeginningOfSentence; + } + bool isNotAWord() const { return mIsNotAWord; } @@ -94,6 +100,7 @@ class UnigramProperty { DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty); // TODO: Make members const. + bool mRepresentsBeginningOfSentence; bool mIsNotAWord; bool mIsBlacklisted; int mProbability; diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h index 845e629e6..a61227626 100644 --- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy { virtual const std::vector<int> *getLocale() const = 0; + virtual bool supportsBeginningOfSentence() const = 0; + protected: DictionaryHeaderStructurePolicy() {} diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 3fd815f98..cda89406c 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -81,9 +81,11 @@ class DictionaryStructureWithBufferPolicy { virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, const int length) = 0; - virtual void flush(const char *const filePath) = 0; + // Returns whether the flush was success or not. + virtual bool flush(const char *const filePath) = 0; - virtual void flushWithGC(const char *const filePath) = 0; + // Returns whether the GC and flush were success or not. + virtual bool flushWithGC(const char *const filePath) = 0; virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0; diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp index a307cb45d..23908255b 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp @@ -89,6 +89,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0; const bool isExactMatch = ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); + const bool isExactMatchWithIntentionalOmission = + ErrorTypeUtils::isExactMatchWithIntentionalOmission( + terminalDicNode->getContainedErrorTypes()); const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); // Heuristic: We exclude probability=0 first-char-uppercase words from exact match. // (e.g. "AMD" and "and") @@ -96,7 +99,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; && !(isPossiblyOffensiveWord && isFirstCharUppercase); const int outputTypeFlags = (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) - | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); + | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) + | (isExactMatchWithIntentionalOmission ? + Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index e4de1f4cc..56c53c1c2 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -20,11 +20,11 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "utils/char_utils.h" namespace latinime { // TODO: Support n-gram. -// TODO: Support beginning of sentence. // This class does not take ownership of any code point buffers. class PrevWordsInfo { public: @@ -52,8 +52,7 @@ class PrevWordsInfo { void getPrevWordsTerminalPtNodePos( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, - int *const outPrevWordsTerminalPtNodePos, - const bool tryLowerCaseSearch) const { + int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, mPrevWordCodePoints[i], mPrevWordCodePointCount[i], @@ -63,17 +62,11 @@ class PrevWordsInfo { BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { - int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], - mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */); - // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the - // dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) { - // If no bigrams for this exact word, search again in lower case. - pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], - mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */); - } - return BinaryDictionaryBigramsIterator( - dictStructurePolicy->getBigramsStructurePolicy(), pos); + const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch( + dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], + mIsBeginningOfSentence[0]); + return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(), + bigramListPos); } // n is 1-indexed. @@ -99,11 +92,21 @@ class PrevWordsInfo { const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { - if (!dictStructurePolicy || !wordCodePoints) { + if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { return NOT_A_DICT_POS; } + int codePoints[MAX_WORD_LENGTH]; + int codePointCount = wordCodePointCount; + memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount); + if (isBeginningOfSentence) { + codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, + codePointCount, MAX_WORD_LENGTH); + if (codePointCount <= 0) { + return NOT_A_DICT_POS; + } + } const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( - wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */); + codePoints, codePointCount, false /* forceLowerCaseSearch */); if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) { // Return the position when when the word was found or doesn't try lower case // search. @@ -112,7 +115,36 @@ class PrevWordsInfo { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". return dictStructurePolicy->getTerminalPtNodePositionOfWord( - wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */); + codePoints, codePointCount, true /* forceLowerCaseSearch */); + } + + static int getBigramListPositionForWordWithTryingLowerCaseSearch( + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, + const int *const wordCodePoints, const int wordCodePointCount, + const bool isBeginningOfSentence) { + if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { + return NOT_A_DICT_POS; + } + int codePoints[MAX_WORD_LENGTH]; + int codePointCount = wordCodePointCount; + memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount); + if (isBeginningOfSentence) { + codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, + codePointCount, MAX_WORD_LENGTH); + if (codePointCount <= 0) { + return NOT_A_DICT_POS; + } + } + int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, + codePointCount, false /* forceLowerCaseSearch */); + // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the + // dictionary or has no bigrams + if (NOT_A_DICT_POS == pos) { + // If no bigrams for this exact word, search again in lower case. + pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, + codePointCount, true /* forceLowerCaseSearch */); + } + return pos; } static int getBigramListPositionForWord( diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 479d15164..87cf0cd3b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -246,6 +246,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return &mLocale; } + bool supportsBeginningOfSentence() const { + return mDictFormatVersion >= FormatUtils::VERSION_4; + } + private: DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt index 9e29e836c..9e29e836c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/Readme.txt diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp index 7ad072f09..3e8e059f2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp @@ -22,19 +22,19 @@ * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const { @@ -285,6 +285,6 @@ bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigra return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos); } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h index adf687bac..61623468e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h @@ -22,28 +22,28 @@ * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H -#define LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H +#ifndef LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H +#define LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class BigramDictContent; -} // namespace v401 +} // namespace v402 } // namespace backward class BigramProperty; namespace backward { -namespace v401 { -} // namespace v401 +namespace v402 { +} // namespace v402 } // namespace backward class HeaderPolicy; namespace backward { -namespace v401 { +namespace v402 { class TerminalPositionLookupTable; class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { @@ -87,7 +87,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { const TerminalPositionLookupTable *const mTerminalPositionLookupTable; const HeaderPolicy *const mHeaderPolicy; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_BIGRAM_LIST_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp index 1e53ff94a..e2dd93c5e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp @@ -21,20 +21,22 @@ * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( int *const bigramEntryPos) const { const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); - if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) { - AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d", - *bigramEntryPos, bigramListBuffer->getTailPosition()); + const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize(); + if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) { + AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, " + "bufSize: %d", *bigramEntryPos, bigramEntryTailPos, + bigramListBuffer->getTailPosition()); ASSERT(false); return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY, Ver4DictConstants::NOT_A_TERMINAL_ID); @@ -47,8 +49,6 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( int level = 0; int count = 0; if (mHasHistoricalInfo) { - probability = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos); timestamp = bigramListBuffer->readUintAndAdvancePosition( Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos); level = bigramListBuffer->readUintAndAdvancePosition( @@ -56,7 +56,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( count = bigramListBuffer->readUintAndAdvancePosition( Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos); } else { - probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; + probability = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos); } const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); @@ -74,21 +75,13 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( bool BigramDictContent::writeBigramEntryAndAdvancePosition( const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); - const int bigramFlags = createAndGetBigramFlags( - mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(), - bigramEntryToWrite->hasNext()); + const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext()); if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); return false; } if (mHasHistoricalInfo) { - if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(), - Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, - bigramEntryToWrite->getProbability()); - return false; - } const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { @@ -108,6 +101,13 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition( historicalInfo->getCount()); return false; } + } else { + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(), + Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, + bigramEntryToWrite->getProbability()); + return false; + } } const int targetTerminalIdToWrite = (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ? @@ -219,6 +219,6 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, return true; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h index f9c474b4a..b554e5676 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h @@ -21,18 +21,18 @@ * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h */ -#ifndef LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H -#define LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H +#ifndef LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H +#define LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class BigramDictContent : public SparseTableDictContent { public: @@ -104,9 +104,22 @@ class BigramDictContent : public SparseTableDictContent { private: DISALLOW_COPY_AND_ASSIGN(BigramDictContent); - int createAndGetBigramFlags(const int probability, const bool hasNext) const { - return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) - | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0); + int createAndGetBigramFlags(const bool hasNext) const { + return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0; + } + + int getBigramEntrySize() const { + if (mHasHistoricalInfo) { + return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE + + Ver4DictConstants::TIME_STAMP_FIELD_SIZE + + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE + + Ver4DictConstants::WORD_COUNT_FIELD_SIZE + + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + } else { + return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE + + Ver4DictConstants::PROBABILITY_SIZE + + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + } } bool runGCBigramList(const int bigramListPos, @@ -116,7 +129,7 @@ class BigramDictContent : public SparseTableDictContent { bool mHasHistoricalInfo; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H */ +#endif /* LATINIME_BACKWARD_V402_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h index 82c4b53a8..40968b4d8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h @@ -21,16 +21,16 @@ * suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h */ -#ifndef LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H -#define LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H +#ifndef LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H +#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/historical_info.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class BigramEntry { public: @@ -104,7 +104,7 @@ class BigramEntry { const HistoricalInfo mHistoricalInfo; const int mTargetTerminalId; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H */ +#endif /* LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h index 39e29001c..0f2f25534 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h @@ -21,14 +21,14 @@ * suggest/policyimpl/dictionary/structure/v4/content/dict_content.h */ -#ifndef LATINIME_BACKWARD_V401_DICT_CONTENT_H -#define LATINIME_BACKWARD_V401_DICT_CONTENT_H +#ifndef LATINIME_BACKWARD_V402_DICT_CONTENT_H +#define LATINIME_BACKWARD_V402_DICT_CONTENT_H #include "defines.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class DictContent { public: @@ -41,7 +41,7 @@ class DictContent { private: DISALLOW_COPY_AND_ASSIGN(DictContent); }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_DICT_CONTENT_H */ +#endif /* LATINIME_BACKWARD_V402_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp index 337b97c05..c671647d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp @@ -21,16 +21,16 @@ * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const { if (terminalId < 0 || terminalId >= mSize) { @@ -166,6 +166,6 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit return true; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h index db3070994..3734797d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h @@ -21,18 +21,18 @@ * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h */ -#ifndef LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H -#define LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H +#ifndef LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H +#define LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class ProbabilityEntry; @@ -68,7 +68,7 @@ class ProbabilityDictContent : public SingleDictContent { bool mHasHistoricalInfo; int mSize; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H */ +#endif /* LATINIME_BACKWARD_V402_PROBABILITY_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h index d341e7b07..8ccfa33dc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h @@ -21,16 +21,16 @@ * suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h */ -#ifndef LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H -#define LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H +#ifndef LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H +#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/historical_info.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class ProbabilityEntry { public: @@ -84,7 +84,7 @@ class ProbabilityEntry { const int mProbability; const HistoricalInfo mHistoricalInfo; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H */ +#endif /* LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp index 3214807ad..56bc8b98d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.cpp @@ -21,13 +21,13 @@ * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount, int *const outCodePoint, int *const outCodePointCount, int *const outProbability, @@ -194,6 +194,6 @@ int ShortcutDictContent::createAndGetShortcutFlags(const int probability, | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0); } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h index 75fd4f3b2..179cec5bb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h @@ -21,17 +21,17 @@ * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h */ -#ifndef LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H -#define LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H +#ifndef LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H +#define LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class ShortcutDictContent : public SparseTableDictContent { public: @@ -95,7 +95,7 @@ class ShortcutDictContent : public SparseTableDictContent { int createAndGetShortcutFlags(const int probability, const bool hasNext) const; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H */ +#endif /* LATINIME_BACKWARD_V402_SHORTCUT_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h index a519cd835..6433650b0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h @@ -21,19 +21,19 @@ * suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h */ -#ifndef LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H -#define LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H +#ifndef LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H +#define LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class SingleDictContent : public DictContent { public: @@ -80,7 +80,7 @@ class SingleDictContent : public DictContent { BufferWithExtendableBuffer mExpandableContentBuffer; const bool mIsValid; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H */ +#endif /* LATINIME_BACKWARD_V402_SINGLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp index 638132c3d..7c9b4967a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.cpp @@ -21,11 +21,11 @@ * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { bool SparseTableDictContent::flush(const char *const dictPath, const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix, @@ -45,6 +45,6 @@ bool SparseTableDictContent::flush(const char *const dictPath, return true; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h index b95de2eda..c7233edd3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h @@ -21,12 +21,12 @@ * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h */ -#ifndef LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H -#define LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H +#ifndef LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H +#define LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -34,7 +34,7 @@ namespace latinime { namespace backward { -namespace v401 { +namespace v402 { // TODO: Support multiple contents. class SparseTableDictContent : public DictContent { @@ -116,7 +116,7 @@ class SparseTableDictContent : public DictContent { SparseTable mAddressLookupTable; const bool mIsValid; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H */ +#endif /* LATINIME_BACKWARD_V402_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp index ab8a3ae43..a9f841779 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.cpp @@ -21,14 +21,14 @@ * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const { if (terminalId < 0 || terminalId >= mSize) { @@ -106,6 +106,6 @@ bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminal return true; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h index dbf0e6088..eadfe0faa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h @@ -21,18 +21,18 @@ * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h */ -#ifndef LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H -#define LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H +#ifndef LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H +#define LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H #include <unordered_map> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class TerminalPositionLookupTable : public SingleDictContent { public: @@ -67,7 +67,7 @@ class TerminalPositionLookupTable : public SingleDictContent { int mSize; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif // LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H +#endif // LATINIME_BACKWARD_V402_TERMINAL_POSITION_LOOKUP_TABLE_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h index 6a4e83c0d..941fda748 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h @@ -22,18 +22,18 @@ * suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H -#define LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H +#ifndef LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H +#define LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { public: @@ -112,7 +112,7 @@ class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { ShortcutDictContent *const mShortcutDictContent; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif // LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H +#endif // LATINIME_BACKWARD_V402_VER4_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp index 55ead01a9..93f192976 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp @@ -21,7 +21,7 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h" #include <cerrno> #include <cstring> @@ -33,7 +33,7 @@ namespace latinime { namespace backward { -namespace v401 { +namespace v402 { /* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers( const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer, @@ -65,6 +65,7 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, return false; } } + umask(S_IWGRP | S_IWOTH); if (mkdir(tmpDirPath, S_IRWXU) == -1) { AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); return false; @@ -150,6 +151,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), mIsUpdatable(true) {} -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h index 716ed931b..e775be52e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h @@ -21,24 +21,24 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H -#define LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H +#ifndef LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H +#define LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H #include <memory> #include "defines.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/shortcut_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { class Ver4DictBuffers { public: @@ -146,7 +146,7 @@ class Ver4DictBuffers { ShortcutDictContent mShortcutDictContent; const int mIsUpdatable; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_DICT_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp index 793b44ed4..81d85f495 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.cpp @@ -21,11 +21,11 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { // These values MUST match the definitions in FormatSpec.java. const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie"; @@ -76,6 +76,6 @@ const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F; const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h index 17afeb156..88ebd6a75 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h @@ -21,14 +21,14 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H -#define LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H +#ifndef LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H +#define LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H #include "defines.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { // TODO: Create PtConstants under the pt_common and move some constant values there. // Note that there are corresponding definitions in FormatSpec.java. @@ -78,7 +78,7 @@ class Ver4DictConstants { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_DICT_CONSTANTS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp index 80b51b292..82399f190 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp @@ -21,19 +21,19 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( const int ptNodePos, const int siblingNodePos) const { @@ -104,6 +104,6 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce } } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h index 0531b0a29..4032a67fa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h @@ -21,8 +21,8 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H -#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H +#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" @@ -30,18 +30,18 @@ namespace latinime { namespace backward { -namespace v401 { +namespace v402 { -} // namespace v401 +} // namespace v402 } // namespace backward class BufferWithExtendableBuffer; namespace backward { -namespace v401 { -} // namespace v401 +namespace v402 { +} // namespace v402 } // namespace backward class HeaderPolicy; namespace backward { -namespace v401 { +namespace v402 { class ProbabilityDictContent; /* @@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader { const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, const int siblingNodePos) const; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp index 8de6bacfc..4220a9561 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp @@ -21,24 +21,24 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3; @@ -424,6 +424,6 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, return true; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h index 7f1851d63..08226ea26 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h @@ -21,29 +21,29 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H -#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H +#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H +#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { -} // namespace v401 +} // namespace v402 } // namespace backward class BufferWithExtendableBuffer; namespace backward { -namespace v401 { -} // namespace v401 +namespace v402 { +} // namespace v402 } // namespace backward class HeaderPolicy; namespace backward { -namespace v401 { +namespace v402 { class Ver4BigramListPolicy; class Ver4DictBuffers; class Ver4PatriciaTrieNodeReader; @@ -139,7 +139,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { Ver4BigramListPolicy *const mBigramPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 97e1120a3..e571d8986 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -22,7 +22,7 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h" #include <vector> @@ -33,13 +33,13 @@ #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { // Note that there are corresponding definitions in Java side in BinaryDictionaryTests and // BinaryDictionaryDecayingTests. @@ -70,13 +70,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // valid terminal DicNode. isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } + readingHelper.readNextSiblingNode(ptNodeParams); + if (ptNodeParams.representsNonWordInfo()) { + // Skip PtNodes that represent non-word information. + continue; + } childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); - readingHelper.readNextSiblingNode(ptNodeParams); } if (readingHelper.isError()) { mIsCorrupted = true; @@ -122,9 +126,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, } else if (bigramProbability == NOT_A_PROBABILITY) { return ProbabilityUtils::backoff(unigramProbability); } else { - // bigramProbability is a bigram probability delta. - return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, - bigramProbability); + return bigramProbability; } } } @@ -189,9 +191,19 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; - if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, + int codePointsToAdd[MAX_WORD_LENGTH]; + int codePointCountToAdd = length; + memmove(codePointsToAdd, word, sizeof(int) * length); + if (unigramProperty->representsBeginningOfSentence()) { + codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, + codePointCountToAdd, MAX_WORD_LENGTH); + } + if (codePointCountToAdd <= 0) { + return false; + } + if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd, unigramProperty, &addedNewUnigram)) { - if (addedNewUnigram) { + if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { mUnigramCount++; } if (unigramProperty->getShortcuts().size() > 0) { @@ -221,8 +233,6 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { - const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1); - const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1); if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -232,15 +242,20 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH - || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { - AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. " - "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size()); + if (!prevWordsInfo->isValid()) { + AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { + if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + AKLOGE("The word is too long to insert the ngram to the dictionary. " + "length: %d", bigramProperty->getTargetCodePoints()->size()); + return false; + } + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + // TODO: Support N-gram. + if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalPtNodePositionOfWord( @@ -250,7 +265,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI return false; } bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) { + if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, + &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -261,11 +277,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const int *const word1, const int length1) { - const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1); - const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1); + const int *const word, const int length) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -273,22 +287,26 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) { - AKLOGE("Either src word or target word is too long to remove the bigram to from the " - "dictionary. length0: %d, length1: %d", length0, length1); + if (!prevWordsInfo->isValid()) { + AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); return false; } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { + if (length > MAX_WORD_LENGTH) { + AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length); + } + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSerch */); + // TODO: Support N-gram. + if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + const int wordPos = getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_DICT_POS) { + if (wordPos == NOT_A_DICT_POS) { return false; } - if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { mBigramCount--; return true; } else { @@ -296,26 +314,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } -void Ver4PatriciaTriePolicy::flush(const char *const filePath) { +bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); - return; + return false; } if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { AKLOGE("Cannot flush the dictionary to file."); mIsCorrupted = true; + return false; } + return true; } -void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { +bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); - return; + return false; } if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { AKLOGE("Cannot flush the dictionary to file with GC."); mIsCorrupted = true; + return false; } + return true; } bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { @@ -409,7 +431,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code const int probability = bigramEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability( bigramEntry.getHistoricalInfo(), mHeaderPolicy) : - getProbability(word1Probability, bigramEntry.getProbability()); + bigramEntry.getProbability(); bigrams.emplace_back(&word1, probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount()); @@ -432,8 +454,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code shortcuts.emplace_back(&target, shortcutProbability); } } - const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), + ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); return WordProperty(&codePointVector, &unigramProperty, &bigrams); @@ -475,6 +497,6 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const return nextToken; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 95813881d..e323652d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -22,8 +22,8 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H -#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H +#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H +#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H #include <vector> @@ -31,29 +31,29 @@ #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { -} // namespace v401 +} // namespace v402 } // namespace backward class DicNode; namespace backward { -namespace v401 { -} // namespace v401 +namespace v402 { +} // namespace v402 } // namespace backward class DicNodeVector; namespace backward { -namespace v401 { +namespace v402 { class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: @@ -114,12 +114,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, - const int length); + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, + const int length1); - void flush(const char *const filePath); + bool flush(const char *const filePath); - void flushWithGC(const char *const filePath); + bool flushWithGC(const char *const filePath); bool needsToRunGC(const bool mindsBlockByGC) const; @@ -162,7 +162,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { std::vector<int> mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif // LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H +#endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp index 6cc36fbef..80d531198 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.cpp @@ -21,19 +21,19 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { /* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition( const uint8_t *const buffer, int *pos) { return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h index 7417c261e..3579c26d6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_reading_utils.h @@ -21,8 +21,8 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H -#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H +#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H +#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H #include <cstdint> @@ -30,13 +30,13 @@ namespace latinime { namespace backward { -namespace v401 { +namespace v402 { -} // namespace v401 +} // namespace v402 } // namespace backward class BufferWithExtendableBuffer; namespace backward { -namespace v401 { +namespace v402 { class Ver4PatriciaTrieReadingUtils { public: @@ -46,7 +46,7 @@ class Ver4PatriciaTrieReadingUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils); }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp index 10f27beb7..99eed0f67 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp @@ -21,26 +21,26 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h" #include <cstring> #include <queue> #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const { @@ -222,13 +222,16 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( // Delete unigrams. while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) { const int ptNodePos = priorityQueue.top().getDictPos(); + priorityQueue.pop(); const PtNodeParams ptNodeParams = ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (ptNodeParams.representsNonWordInfo()) { + continue; + } if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) { AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos); return false; } - priorityQueue.pop(); } return true; } @@ -296,6 +299,6 @@ bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTermi return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams); } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h index be44aaa33..9034ee656 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h @@ -21,22 +21,22 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H -#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H +#ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H +#define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { -} // namespace v401 +} // namespace v402 } // namespace backward class HeaderPolicy; namespace backward { -namespace v401 { +namespace v402 { class Ver4DictBuffers; class Ver4PatriciaTrieNodeReader; class Ver4PatriciaTrieNodeWriter; @@ -133,8 +133,8 @@ class Ver4PatriciaTrieWritingHelper { Ver4DictBuffers *const mBuffers; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp index 33e4e55e2..537a6d420 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.cpp @@ -21,7 +21,7 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp */ -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" @@ -29,7 +29,7 @@ namespace latinime { namespace backward { -namespace v401 { +namespace v402 { bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, int *const outPtNodeCount, int *const outFirstPtNodePos) const { @@ -85,6 +85,6 @@ bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLin return true; } -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h index 3a7eefa44..4f8056801 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h @@ -21,21 +21,21 @@ * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h */ -#ifndef LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H -#define LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H +#ifndef LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H +#define LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" namespace latinime { namespace backward { -namespace v401 { +namespace v402 { -} // namespace v401 +} // namespace v402 } // namespace backward class BufferWithExtendableBuffer; namespace backward { -namespace v401 { +namespace v402 { class Ver4PtNodeArrayReader : public PtNodeArrayReader { public: @@ -51,7 +51,7 @@ class Ver4PtNodeArrayReader : public PtNodeArrayReader { const BufferWithExtendableBuffer *const mBuffer; }; -} // namespace v401 +} // namespace v402 } // namespace backward } // namespace latinime -#endif /* LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H */ +#endif /* LATINIME_BACKWARD_V402_VER4_PT_NODE_ARRAY_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index f93d2894c..e4b5fa267 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -19,9 +19,9 @@ #include <climits> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" @@ -58,10 +58,10 @@ namespace latinime { FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion); switch (dictFormatVersion) { case FormatUtils::VERSION_4: { - return newPolicyForOnMemoryV4Dict<backward::v401::Ver4DictConstants, - backward::v401::Ver4DictBuffers, - backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr, - backward::v401::Ver4PatriciaTriePolicy>( + return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants, + backward::v402::Ver4DictBuffers, + backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr, + backward::v402::Ver4PatriciaTriePolicy>( dictFormatVersion, locale, attributeMap); } case FormatUtils::VERSION_4_ONLY_FOR_TESTING: @@ -116,10 +116,10 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str AKLOGE("Given path is a directory but the format is version 2. path: %s", path); break; case FormatUtils::VERSION_4: { - return newPolicyForV4Dict<backward::v401::Ver4DictConstants, - backward::v401::Ver4DictBuffers, - backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr, - backward::v401::Ver4PatriciaTriePolicy>( + return newPolicyForV4Dict<backward::v402::Ver4DictConstants, + backward::v402::Ver4DictBuffers, + backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr, + backward::v402::Ver4PatriciaTriePolicy>( headerFilePath, formatVersion, std::move(mmappedBuffer)); } case FormatUtils::VERSION_4_ONLY_FOR_TESTING: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp index 028e9ecbf..1f00fc6ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp @@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners } } else { mValueStack.back() += 1; - if (ptNodeParams->isTerminal()) { + if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) { mValidUnigramCount += 1; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 5704c2e90..b2e60a837 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -160,7 +160,12 @@ class PtNodeParams { } AK_FORCE_INLINE bool representsNonWordInfo() const { - return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]) + return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0]) + && isNotAWord(); + } + + AK_FORCE_INLINE int representsBeginningOfSentence() const { + return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE && isNotAWord(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 30dcfba37..a6a470c4e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin shortcuts.emplace_back(&shortcutTarget, shortcutProbability); } } - const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), + ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); return WordProperty(&codePointVector, &unigramProperty, &bigrams); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 6240d46aa..88bbfd966 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -102,14 +102,16 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } - void flush(const char *const filePath) { + bool flush(const char *const filePath) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: flush() is called for non-updatable dictionary."); + return false; } - void flushWithGC(const char *const filePath) { + bool flushWithGC(const char *const filePath) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); + return false; } bool needsToRunGC(const bool mindsBlockByGC) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index d53922763..e1ceaee49 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -23,9 +23,11 @@ namespace latinime { const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( int *const bigramEntryPos) const { const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); - if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) { - AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d", - *bigramEntryPos, bigramListBuffer->getTailPosition()); + const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize(); + if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) { + AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, " + "bufSize: %d", *bigramEntryPos, bigramEntryTailPos, + bigramListBuffer->getTailPosition()); ASSERT(false); return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY, Ver4DictConstants::NOT_A_TERMINAL_ID); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index b8bdb63a8..52447a336 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -99,6 +99,20 @@ class BigramDictContent : public SparseTableDictContent { return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0; } + int getBigramEntrySize() const { + if (mHasHistoricalInfo) { + return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE + + Ver4DictConstants::TIME_STAMP_FIELD_SIZE + + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE + + Ver4DictConstants::WORD_COUNT_FIELD_SIZE + + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + } else { + return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE + + Ver4DictConstants::PROBABILITY_SIZE + + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + } + } + bool runGCBigramList(const int bigramListPos, const BigramDictContent *const sourceBigramDictContent, const int toPos, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 439e90e44..09c7b7d85 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -61,7 +61,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } readingHelper.readNextSiblingNode(ptNodeParams); - if (!ptNodeParams.representsNonWordInfo()) { + if (ptNodeParams.representsNonWordInfo()) { // Skip PtNodes that represent non-word information. continue; } @@ -181,9 +181,19 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; - if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, + int codePointsToAdd[MAX_WORD_LENGTH]; + int codePointCountToAdd = length; + memmove(codePointsToAdd, word, sizeof(int) * length); + if (unigramProperty->representsBeginningOfSentence()) { + codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd, + codePointCountToAdd, MAX_WORD_LENGTH); + } + if (codePointCountToAdd <= 0) { + return false; + } + if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd, unigramProperty, &addedNewUnigram)) { - if (addedNewUnigram) { + if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { mUnigramCount++; } if (unigramProperty->getShortcuts().size() > 0) { @@ -294,26 +304,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } -void Ver4PatriciaTriePolicy::flush(const char *const filePath) { +bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); - return; + return false; } if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { AKLOGE("Cannot flush the dictionary to file."); mIsCorrupted = true; + return false; } + return true; } -void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { +bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); - return; + return false; } if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { AKLOGE("Cannot flush the dictionary to file with GC."); mIsCorrupted = true; + return false; } + return true; } bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { @@ -430,8 +444,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code shortcuts.emplace_back(&target, shortcutProbability); } } - const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), + ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); return WordProperty(&codePointVector, &unigramProperty, &bigrams); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 008f2e423..d198c97fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -99,9 +99,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, const int length1); - void flush(const char *const filePath); + bool flush(const char *const filePath); - void flushWithGC(const char *const filePath); + bool flushWithGC(const char *const filePath); bool needsToRunGC(const bool mindsBlockByGC) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 105363db5..4da339b0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -23,7 +23,7 @@ #include <sys/types.h> #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -42,9 +42,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion); switch (formatVersion) { case FormatUtils::VERSION_4: - return createEmptyV4DictFile<backward::v401::Ver4DictConstants, - backward::v401::Ver4DictBuffers, - backward::v401::Ver4DictBuffers::Ver4DictBuffersPtr>( + return createEmptyV4DictFile<backward::v402::Ver4DictConstants, + backward::v402::Ver4DictBuffers, + backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>( filePath, localeAsCodePointVector, attributeMap, formatVersion); case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4_DEV: diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index ba405b07e..1916ea560 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -50,7 +50,7 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { case MAGIC_NUMBER: - // Version 2 header is as follows: + // The layout of the header is as follows: // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE // Dictionary format version number (2 bytes) // Options (2 bytes) @@ -58,17 +58,7 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; // Conceptually this converts the hardcoded value of the bytes in the file into // the symbolic value we use in the code. But we want the constants to be the // same so we use them for both here. - if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) { - return VERSION_2; - } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) { - return VERSION_4_ONLY_FOR_TESTING; - } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) { - return VERSION_4; - } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_DEV) { - return VERSION_4_DEV; - } else { - return UNKNOWN_VERSION; - } + return getFormatVersion(ByteArrayUtils::readUint16(dict, 4)); default: return UNKNOWN_VERSION; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index c47f30ca4..55ad5799f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -32,8 +32,8 @@ class FormatUtils { // These MUST have the same values as the relevant constants in FormatSpec.java. VERSION_2 = 2, VERSION_4_ONLY_FOR_TESTING = 399, - VERSION_4 = 401, - VERSION_4_DEV = 402, + VERSION_4 = 402, + VERSION_4_DEV = 403, UNKNOWN_VERSION = -1 }; diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 634c45b04..f28ed5682 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -18,6 +18,7 @@ #define LATINIME_CHAR_UTILS_H #include <cctype> +#include <cstring> #include <vector> #include "defines.h" @@ -93,6 +94,19 @@ class CharUtils { static unsigned short latin_tolower(const unsigned short c); static const std::vector<int> EMPTY_STRING; + // Returns updated code point count. Returns 0 when the code points cannot be marked as a + // Beginning-of-Sentence. + static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints, + const int codePointCount, const int maxCodePoint) { + if (codePointCount >= maxCodePoint) { + // the code points cannot be marked as a Beginning-of-Sentence. + return 0; + } + memmove(codePoints + 1, codePoints, sizeof(int) * codePointCount); + codePoints[0] = CODE_POINT_BEGINNING_OF_SENTENCE; + return codePointCount + 1; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); |