diff options
Diffstat (limited to 'native/jni/src')
24 files changed, 282 insertions, 243 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 4ff4bc2e4..150eb6762 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -21,7 +21,6 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_proximity_filter.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -33,17 +32,17 @@ namespace latinime { // Node initialization utils // /////////////////////////////// -/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, +/* static */ void DicNodeUtils::initAsRoot( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(), - prevWordNodePos); + newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNode *const prevWordLastNode, DicNode *const newRootNode) { newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition()); + prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); } /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { @@ -67,12 +66,13 @@ namespace latinime { } /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) { - getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes); + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + DicNodeVector *childDicNodes) { + getProximityChildDicNodes(dicNode, dictionaryStructurePolicy, 0, 0, false, childDicNodes); } /* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { @@ -82,7 +82,7 @@ namespace latinime { if (!dicNode->isLeavingNode()) { DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); } else { - binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode, + dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, &childrenFilter, childDicNodes); } } @@ -94,12 +94,13 @@ namespace latinime { * Computes the combined bigram / unigram cost for the given dicNode. */ /* static */ float DicNodeUtils::getBigramNodeImprobability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap) { if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap); + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) / static_cast<float>(MAX_PROBABILITY); @@ -107,7 +108,7 @@ namespace latinime { } /* static */ int DicNodeUtils::getBigramNodeProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap) { const int unigramProbability = node->getProbability(); const int wordPos = node->getPos(); @@ -118,8 +119,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), - prevWordPos, wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, + wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 4f12b29f4..8dc984fe1 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -23,10 +23,10 @@ namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeProximityFilter; class DicNodeVector; +class DictionaryStructureWithBufferPolicy; class ProximityInfoState; class MultiBigramMap; @@ -34,18 +34,22 @@ class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, const int16_t length1, int *dest); - static void initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static void initAsRoot( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const int prevWordNodePos, DicNode *newRootNode); - static void initAsRootWithPreviousWord(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static void initAsRootWithPreviousWord( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNode *prevWordLastNode, DicNode *newRootNode); static void initByCopy(DicNode *srcNode, DicNode *destNode); static void getAllChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes); - static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + DicNodeVector *childDicNodes); + static float getBigramNodeImprobability( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *const multiBigramMap); // TODO: Move to private static void getProximityChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes); @@ -54,7 +58,8 @@ class DicNodeUtils { // Max number of bigrams to look up static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500; - static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static int getBigramNodeProbability( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap); static void createAndGetPassingChildNode(DicNode *dicNode, const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index fdaa562e5..ebe76467a 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -22,15 +22,16 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/probability_utils.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" namespace latinime { -BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo) { +BigramDictionary::BigramDictionary( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy) + : mDictionaryStructurePolicy(dictionaryStructurePolicy) { if (DEBUG_DICT) { AKLOGI("BigramDictionary - constructor"); } @@ -112,12 +113,11 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt( + mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - const int length = structurePolicy-> + const int length = mDictionaryStructurePolicy-> getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the @@ -139,12 +139,10 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; - return structurePolicy->getBigramsPositionOfNode(pos); + return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, @@ -152,13 +150,12 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt( + mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 438c34cac..99b964c49 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -21,11 +21,11 @@ namespace latinime { -class BinaryDictionaryInfo; +class DictionaryStructureWithBufferPolicy; class BigramDictionary { public: - BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); + BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; @@ -40,7 +40,7 @@ class BigramDictionary { int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy; }; } // namespace latinime #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index 818b2af56..e50baae0b 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -20,32 +20,17 @@ #include <stdint.h> #include "defines.h" -#include "jni.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" -#include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" -#include "utils/log_utils.h" namespace latinime { class BinaryDictionaryInfo { public: - AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf, + AK_FORCE_INLINE BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable) : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd), - mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), - mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( - mDictBuf, mDictSize)), - mDictionaryHeader(dictBuf), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), - // TODO: Remove. - mStructurePolicy(DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructurePolicy(this)) { - logDictionaryInfo(env); - } + mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable) {} - ~BinaryDictionaryInfo() { - delete mStructurePolicy; - } + ~BinaryDictionaryInfo() {} AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -63,30 +48,12 @@ class BinaryDictionaryInfo { return mDictBufOffset; } - AK_FORCE_INLINE const uint8_t *getDictRoot() const { - return mDictRoot; - } - - AK_FORCE_INLINE BinaryDictionaryFormatUtils::FORMAT_VERSION getFormat() const { - return mDictionaryFormat; - } - - // TODO: Move to DictionaryStructurePolicy. - AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { - return &mDictionaryHeader; - } - AK_FORCE_INLINE bool isDynamicallyUpdatable() const { // TODO: Support dynamic dictionary formats. const bool isUpdatableDictionaryFormat = false; return mIsUpdatable && isUpdatableDictionaryFormat; } - // TODO: remove - AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const { - return mStructurePolicy; - } - private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo); @@ -95,40 +62,6 @@ class BinaryDictionaryInfo { const int mMmapFd; const int mDictBufOffset; const bool mIsUpdatable; - const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; - // TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the - // DictionaryStructurePolicy. - const BinaryDictionaryHeader mDictionaryHeader; - const uint8_t *const mDictRoot; - // TODO: remove - const DictionaryStructureWithBufferPolicy *const mStructurePolicy; - - AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { - const int BUFFER_SIZE = 16; - int dictionaryIdCodePointBuffer[BUFFER_SIZE]; - int versionStringCodePointBuffer[BUFFER_SIZE]; - int dateStringCodePointBuffer[BUFFER_SIZE]; - mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary", - dictionaryIdCodePointBuffer, BUFFER_SIZE); - mDictionaryHeader.readHeaderValueOrQuestionMark("version", - versionStringCodePointBuffer, BUFFER_SIZE); - mDictionaryHeader.readHeaderValueOrQuestionMark("date", - dateStringCodePointBuffer, BUFFER_SIZE); - - char dictionaryIdCharBuffer[BUFFER_SIZE]; - char versionStringCharBuffer[BUFFER_SIZE]; - char dateStringCharBuffer[BUFFER_SIZE]; - intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, - dictionaryIdCharBuffer, BUFFER_SIZE); - intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, - versionStringCharBuffer, BUFFER_SIZE); - intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, - dateStringCharBuffer, BUFFER_SIZE); - - LogUtils::logToJava(env, - "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i", - dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize); - } }; } #endif /* LATINIME_BINARY_DICTIONARY_INFO_H */ diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 891b80331..af00e9927 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -18,33 +18,39 @@ #include "suggest/core/dictionary/dictionary.h" -#include <map> // TODO: remove #include <stdint.h> #include "defines.h" -#include "jni.h" #include "suggest/core/dictionary/bigram_dictionary.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" +#include "utils/log_utils.h" namespace latinime { Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable) - : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd, + : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd, dictBufOffset, isUpdatable), - mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), + mDictionaryStructureWithBufferPolicy(DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructureWithBufferPolicy( + static_cast<const uint8_t *>(dict), dictSize)), + mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { + logDictionaryInfo(env); } Dictionary::~Dictionary() { delete mBigramDictionary; delete mGestureSuggest; delete mTypingSuggest; + delete mDictionaryStructureWithBufferPolicy; } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, @@ -83,14 +89,12 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo.getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == pos) { return NOT_A_PROBABILITY; } - return structurePolicy->getUnigramProbability(pos); + return getDictionaryStructurePolicy()->getUnigramProbability(pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { @@ -126,4 +130,33 @@ void Dictionary::removeBigramWords(const int *const word0, const int length0, // TODO: Support dynamic update } +void Dictionary::logDictionaryInfo(JNIEnv *const env) const { + const int BUFFER_SIZE = 16; + int dictionaryIdCodePointBuffer[BUFFER_SIZE]; + int versionStringCodePointBuffer[BUFFER_SIZE]; + int dateStringCodePointBuffer[BUFFER_SIZE]; + const DictionaryHeaderStructurePolicy *const headerPolicy = + getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, + BUFFER_SIZE); + headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, + BUFFER_SIZE); + headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE); + + char dictionaryIdCharBuffer[BUFFER_SIZE]; + char versionStringCharBuffer[BUFFER_SIZE]; + char dateStringCharBuffer[BUFFER_SIZE]; + intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, + dictionaryIdCharBuffer, BUFFER_SIZE); + intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, + versionStringCharBuffer, BUFFER_SIZE); + intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, + dateStringCharBuffer, BUFFER_SIZE); + + LogUtils::logToJava(env, + "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i", + dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, + mBinaryDictionaryInfo.getDictSize()); +} + } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 9f1e0729d..17ce47974 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -26,6 +26,7 @@ namespace latinime { class BigramDictionary; +class DictionaryStructureWithBufferPolicy; class DicTraverseSession; class ProximityInfo; class SuggestInterface; @@ -77,19 +78,27 @@ class Dictionary { void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); + // TODO: Remove. const BinaryDictionaryInfo *getBinaryDictionaryInfo() const { return &mBinaryDictionaryInfo; } + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { + return mDictionaryStructureWithBufferPolicy; + } + virtual ~Dictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); const BinaryDictionaryInfo mBinaryDictionaryInfo; + DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; const BigramDictionary *mBigramDictionary; SuggestInterface *mGestureSuggest; SuggestInterface *mTypingSuggest; + + void logDictionaryInfo(JNIEnv *const env) const; }; } // namespace latinime #endif // LATINIME_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp index af378b1b7..3271c1bfb 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp @@ -19,7 +19,7 @@ #include <cstdlib> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_header.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "utils/char_utils.h" namespace latinime { @@ -35,8 +35,9 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; /* static */ bool DigraphUtils::hasDigraphForCodePoint( - const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) { - const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header); + const DictionaryHeaderStructurePolicy *const headerPolicy, + const int compositeGlyphCodePoint) { + const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy); if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) { return true; } @@ -45,11 +46,11 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = // Returns the digraph type associated with the given dictionary. /* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary( - const BinaryDictionaryHeader *const header) { - if (header->requiresGermanUmlautProcessing()) { + const DictionaryHeaderStructurePolicy *const headerPolicy) { + if (headerPolicy->requiresGermanUmlautProcessing()) { return DIGRAPH_TYPE_GERMAN_UMLAUT; } - if (header->requiresFrenchLigatureProcessing()) { + if (headerPolicy->requiresFrenchLigatureProcessing()) { return DIGRAPH_TYPE_FRENCH_LIGATURES; } return DIGRAPH_TYPE_NONE; diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h index 9d74fe3a6..6ae16e390 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.h +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h @@ -21,7 +21,7 @@ namespace latinime { -class BinaryDictionaryHeader; +class DictionaryHeaderStructurePolicy; class DigraphUtils { public: @@ -39,14 +39,15 @@ class DigraphUtils { typedef struct { int first; int second; int compositeGlyph; } digraph_t; - static bool hasDigraphForCodePoint( - const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint); + static bool hasDigraphForCodePoint(const DictionaryHeaderStructurePolicy *const headerPolicy, + const int compositeGlyphCodePoint); static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils); - static DigraphType getDigraphTypeForDictionary(const BinaryDictionaryHeader *const header); + static DigraphType getDigraphTypeForDictionary( + const DictionaryHeaderStructurePolicy *const headerPolicy); static int getAllDigraphsForDigraphTypeAndReturnSize( const DigraphType digraphType, const digraph_t **const digraphs); static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint); diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h new file mode 100644 index 000000000..a6829b476 --- /dev/null +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H +#define LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H + +#include "defines.h" + +namespace latinime { + +/* + * This class abstracts structure of dictionaries. + * Implement this policy to support additional dictionaries. + */ +class DictionaryHeaderStructurePolicy { + public: + virtual ~DictionaryHeaderStructurePolicy() {} + + virtual bool supportsDynamicUpdate() const = 0; + + virtual bool requiresGermanUmlautProcessing() const = 0; + + virtual bool requiresFrenchLigatureProcessing() const = 0; + + virtual float getMultiWordCostMultiplier() const = 0; + + virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue, + int outValueSize) const = 0; + + protected: + DictionaryHeaderStructurePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictionaryHeaderStructurePolicy); +}; +} // namespace latinime +#endif /* LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 6c97067cf..1b34f03f0 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -24,6 +24,7 @@ namespace latinime { class DicNode; class DicNodeVector; class DictionaryBigramsStructurePolicy; +class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; /* @@ -65,6 +66,8 @@ class DictionaryStructureWithBufferPolicy { virtual int getBigramsPositionOfNode(const int nodePos) const = 0; + virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; + virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index 58729229f..f9b777df2 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -148,7 +148,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_TERMINAL: { const float languageImprobability = DicNodeUtils::getBigramNodeImprobability( - traverseSession->getBinaryDictionaryInfo(), dicNode, multiBigramMap); + traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap); return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability); } case CT_TERMINAL_INSERTION: diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 11a147bda..0ca583f90 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -17,31 +17,29 @@ #include "suggest/core/session/dic_traverse_session.h" #include "defines.h" -#include "jni.h" -#include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; - const BinaryDictionaryInfo *const binaryDictionaryInfo = - mDictionary->getBinaryDictionaryInfo(); - mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier(); + mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() + ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { mPrevWordPos = NOT_A_VALID_WORD_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (mPrevWordPos == NOT_A_VALID_WORD_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } @@ -56,8 +54,9 @@ void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, maxSpatialDistance, maxPointerCount); } -const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const { - return mDictionary->getBinaryDictionaryInfo(); +const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStructurePolicy() + const { + return mDictionary->getDictionaryStructurePolicy(); } void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) { diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 5c4cef02d..23de5cc65 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -28,8 +28,8 @@ namespace latinime { -class BinaryDictionaryInfo; class Dictionary; +class DictionaryStructureWithBufferPolicy; class ProximityInfo; class SuggestOptions; @@ -75,8 +75,7 @@ class DicTraverseSession { const int maxPointerCount); void resetCache(const int nextActiveCacheSize, const int maxWords); - // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. - const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const; //-------------------- // getters and setters diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index bc1f25d3e..3b77227a0 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -19,13 +19,12 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" -// TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/shortcut_utils.h" #include "suggest/core/layout/proximity_info.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/scoring.h" #include "suggest/core/policy/traversal.h" #include "suggest/core/policy/weighting.h" @@ -108,7 +107,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo MAX_RESULTS); // Create a new dic node here DicNode rootNode; - DicNodeUtils::initAsRoot(traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), traverseSession->getPrevWordPos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } @@ -212,11 +211,10 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen } if (!terminalDicNode->hasMultipleWords()) { - const DictionaryStructureWithBufferPolicy *const structurePolicy = - traverseSession->getBinaryDictionaryInfo()->getStructurePolicy(); BinaryDictionaryShortcutIterator shortcutIt( - structurePolicy->getShortcutsStructurePolicy(), - structurePolicy->getShortcutPositionOfNode(terminalDicNode->getPos())); + traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), + traverseSession->getDictionaryStructurePolicy() + ->getShortcutPositionOfNode(terminalDicNode->getPos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); @@ -299,7 +297,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } DicNodeUtils::getAllChildDicNodes( - &dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); + &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int childDicNodesSize = childDicNodes.getSizeAndLock(); for (int i = 0; i < childDicNodesSize; ++i) { @@ -310,7 +308,8 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { continue; } if (DigraphUtils::hasDigraphForCodePoint( - traverseSession->getBinaryDictionaryInfo()->getHeader(), + traverseSession->getDictionaryStructurePolicy() + ->getHeaderStructurePolicy(), childDicNode->getNodeCodePoint())) { correctionDicNode.initByCopy(childDicNode); correctionDicNode.advanceDigraphIndex(); @@ -448,7 +447,7 @@ void Suggest::processDicNodeAsOmission( DicTraverseSession *traverseSession, DicNode *dicNode) const { DicNodeVector childDicNodes; DicNodeUtils::getAllChildDicNodes( - dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); + dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { @@ -473,7 +472,8 @@ void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes; - DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::getProximityChildDicNodes(dicNode, + traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { @@ -491,14 +491,15 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes1; - DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::getProximityChildDicNodes(dicNode, + traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1); const int childSize1 = childDicNodes1.getSizeAndLock(); for (int i = 0; i < childSize1; i++) { if (childDicNodes1[i]->hasChildren()) { DicNodeVector childDicNodes2; DicNodeUtils::getProximityChildDicNodes( - childDicNodes1[i], traverseSession->getBinaryDictionaryInfo(), + childDicNodes1[i], traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2); const int childSize2 = childDicNodes2.getSizeAndLock(); for (int j = 0; j < childSize2; j++) { @@ -539,7 +540,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode // Create a non-cached node here. DicNode newDicNode; DicNodeUtils::initAsRootWithPreviousWord( - traverseSession->getBinaryDictionaryInfo(), dicNode, &newDicNode); + traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); const CorrectionType correctionType = spaceSubstitution ? CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION; Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp index 324992a48..3054e4ea6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -16,21 +16,23 @@ #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include <stdint.h> + #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_policy.h" namespace latinime { /* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructurePolicy( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { + ::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf, + const int dictSize) { + switch (BinaryDictionaryFormatUtils::detectFormatVersion(dictBuf, dictSize)) { case BinaryDictionaryFormatUtils::VERSION_2: - return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot()); + return new PatriciaTriePolicy(dictBuf); case BinaryDictionaryFormatUtils::VERSION_3: - return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot()); + return new DynamicPatriciaTriePolicy(dictBuf); default: ASSERT(false); return 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h index 95f82aabe..53eb8f927 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -17,18 +17,17 @@ #ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H #define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H -#include "defines.h" +#include <stdint.h> +#include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { -class BinaryDictionaryInfo; - class DictionaryStructureWithBufferPolicyFactory { public: - static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy( - const BinaryDictionaryInfo *const binaryDictionaryInfo); + static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy( + const uint8_t *const dictBuf, const int dictSize); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index caca36977..56475b137 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" namespace latinime { @@ -31,8 +32,9 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - DynamicPatriciaTriePolicy(const uint8_t *const dictRoot) - : mDictRoot(dictRoot), mBigramListPolicy(dictRoot), mShortcutListPolicy(dictRoot) {} + DynamicPatriciaTriePolicy(const uint8_t *const dictBuf) + : mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()), + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~DynamicPatriciaTriePolicy() {} @@ -56,6 +58,10 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return &mHeaderPolicy; + } + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { return &mBigramListPolicy; } @@ -68,6 +74,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; + const HeaderPolicy mHeaderPolicy; // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 16f534c21..eb828b58c 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -14,25 +14,17 @@ * limitations under the License. */ -#include "suggest/core/dictionary/binary_dictionary_header.h" - -#include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" namespace latinime { -const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY = +const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; -const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; -const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; - -BinaryDictionaryHeader::BinaryDictionaryHeader(const uint8_t *const dictBuf) - : mDictBuf(dictBuf), - mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(mDictBuf)), - mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(mDictBuf)), - mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} +const float HeaderPolicy::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; +const float HeaderPolicy::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; -float BinaryDictionaryHeader::readMultiWordCostMultiplier() const { - const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt( +float HeaderPolicy::readMultiWordCostMultiplier() const { + const int headerValue = HeaderReadingUtils::readHeaderValueInt( mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY); if (headerValue == S_INT_MIN) { // not found diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 4d9295229..e3e6fc077 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -14,41 +14,40 @@ * limitations under the License. */ -#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H -#define LATINIME_BINARY_DICTIONARY_HEADER_H +#ifndef LATINIME_HEADER_POLICY_H +#define LATINIME_HEADER_POLICY_H #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/policyimpl/dictionary/header/header_reading_utils.h" namespace latinime { -class BinaryDictionaryInfo; - -/** - * This class abstracts dictionary header structures and provide interface to access dictionary - * header information. - */ -// TODO:: Move header classes to policyimpl. -class BinaryDictionaryHeader { +class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit BinaryDictionaryHeader(const uint8_t *const dictBuf); + explicit HeaderPolicy(const uint8_t *const dictBuf) + : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), + mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), + mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} + + ~HeaderPolicy() {} AK_FORCE_INLINE int getSize() const { return mSize; } AK_FORCE_INLINE bool supportsDynamicUpdate() const { - return BinaryDictionaryHeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags); + return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags); } AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { - return BinaryDictionaryHeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags); + return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags); } AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { - return BinaryDictionaryHeaderReadingUtils::requiresFrenchLigatureProcessing( + return HeaderReadingUtils::requiresFrenchLigatureProcessing( mDictionaryFlags); } @@ -63,7 +62,7 @@ class BinaryDictionaryHeader { outValue[0] = '\0'; return; } - if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mDictBuf, + if (!HeaderReadingUtils::readHeaderValue(mDictBuf, key, outValue, outValueSize)) { outValue[0] = '?'; outValue[1] = '\0'; @@ -71,18 +70,19 @@ class BinaryDictionaryHeader { } private: - DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader); + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER; static const float MULTI_WORD_COST_MULTIPLIER_SCALE; const uint8_t *const mDictBuf; - const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags; + const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; const int mSize; const float mMultiWordCostMultiplier; float readMultiWordCostMultiplier() const; }; + } // namespace latinime -#endif // LATINIME_BINARY_DICTIONARY_HEADER_H +#endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp index bcf0e612c..70f45dfa8 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" +#include "suggest/policyimpl/dictionary/header/header_reading_utils.h" #include <cctype> #include <cstdlib> @@ -24,40 +24,40 @@ namespace latinime { -const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4; +const int HeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; +const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; +const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; +const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; +const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; + +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::NO_FLAGS = 0; // Flags for special processing // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or // something very bad (like, the apocalypse) will happen. Please update both at the same time. -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; -/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { +/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() - return ByteArrayUtils::readUint32(dictBuf, - VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE); + return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE + + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE); } -/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::getFlags(const uint8_t *const dictBuf) { - return ByteArrayUtils::readUint16(dictBuf, VERSION_2_HEADER_MAGIC_NUMBER_SIZE - + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); +/* static */ HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) { + return ByteArrayUtils::readUint16(dictBuf, + HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } // Returns if the key is found or not and reads the found value into outValue. -/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf, +/* static */ bool HeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf, const char *const key, int *outValue, const int outValueSize) { if (outValueSize <= 0) { return false; @@ -72,8 +72,8 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags if(ByteArrayUtils::compareStringInBufferWithCharArray( dictBuf, key, headerSize - pos, &pos) == 0) { // The key was found. - const int length = ByteArrayUtils::readStringAndAdvancePosition( - dictBuf, outValueSize, outValue, &pos); + const int length = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, outValueSize, + outValue, &pos); // Add a 0 terminator to the string. outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; return true; @@ -84,7 +84,7 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags return false; } -/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt( +/* static */ int HeaderReadingUtils::readHeaderValueInt( const uint8_t *const dictBuf, const char *const key) { const int bufferSize = LARGEST_INT_DIGIT_COUNT; int intBuffer[bufferSize]; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h index deae9be27..c94919640 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_DICTIONARY_HEADER_READING_UTILS_H -#define LATINIME_DICTIONARY_HEADER_READING_UTILS_H +#ifndef LATINIME_HEADER_READING_UTILS_H +#define LATINIME_HEADER_READING_UTILS_H #include <stdint.h> @@ -23,8 +23,7 @@ namespace latinime { -// TODO:: Move header classes to policyimpl. -class BinaryDictionaryHeaderReadingUtils { +class HeaderReadingUtils { public: typedef uint16_t DictionaryFlags; @@ -47,8 +46,8 @@ class BinaryDictionaryHeaderReadingUtils { } static AK_FORCE_INLINE int getHeaderOptionsPosition() { - return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; + return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE + + HEADER_SIZE_FIELD_SIZE; } static bool readHeaderValue(const uint8_t *const dictBuf, @@ -57,12 +56,12 @@ class BinaryDictionaryHeaderReadingUtils { static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key); private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); - static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; - static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; - static const int VERSION_2_HEADER_FLAG_SIZE; - static const int VERSION_2_HEADER_SIZE_FIELD_SIZE; + static const int HEADER_MAGIC_NUMBER_SIZE; + static const int HEADER_DICTIONARY_VERSION_SIZE; + static const int HEADER_FLAG_SIZE; + static const int HEADER_SIZE_FIELD_SIZE; static const DictionaryFlags NO_FLAGS; // Flags for special processing @@ -74,4 +73,4 @@ class BinaryDictionaryHeaderReadingUtils { static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; }; } -#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ +#endif /* LATINIME_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 51b5b9af9..bebe1bfff 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" namespace latinime { @@ -31,8 +32,9 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - PatriciaTriePolicy(const uint8_t *const dictRoot) - : mDictRoot(dictRoot), mBigramListPolicy(dictRoot), mShortcutListPolicy(dictRoot) {} + PatriciaTriePolicy(const uint8_t *const dictBuf) + : mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()), + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~PatriciaTriePolicy() {} @@ -56,6 +58,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return &mHeaderPolicy; + } + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { return &mBigramListPolicy; } @@ -67,6 +73,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); + const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 7cddb0882..b6aa85896 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -155,7 +155,8 @@ class TypingWeighting : public Weighting { float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) const { - return DicNodeUtils::getBigramNodeImprobability(traverseSession->getBinaryDictionaryInfo(), + return DicNodeUtils::getBigramNodeImprobability( + traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } |