diff options
Diffstat (limited to 'native/jni/src')
50 files changed, 828 insertions, 785 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 4ff4bc2e4..150eb6762 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -21,7 +21,6 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_proximity_filter.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -33,17 +32,17 @@ namespace latinime { // Node initialization utils // /////////////////////////////// -/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, +/* static */ void DicNodeUtils::initAsRoot( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(), - prevWordNodePos); + newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNode *const prevWordLastNode, DicNode *const newRootNode) { newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition()); + prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); } /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { @@ -67,12 +66,13 @@ namespace latinime { } /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) { - getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes); + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + DicNodeVector *childDicNodes) { + getProximityChildDicNodes(dicNode, dictionaryStructurePolicy, 0, 0, false, childDicNodes); } /* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { @@ -82,7 +82,7 @@ namespace latinime { if (!dicNode->isLeavingNode()) { DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); } else { - binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode, + dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, &childrenFilter, childDicNodes); } } @@ -94,12 +94,13 @@ namespace latinime { * Computes the combined bigram / unigram cost for the given dicNode. */ /* static */ float DicNodeUtils::getBigramNodeImprobability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap) { if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap); + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) / static_cast<float>(MAX_PROBABILITY); @@ -107,7 +108,7 @@ namespace latinime { } /* static */ int DicNodeUtils::getBigramNodeProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap) { const int unigramProbability = node->getProbability(); const int wordPos = node->getPos(); @@ -118,8 +119,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), - prevWordPos, wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, + wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 4f12b29f4..8dc984fe1 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -23,10 +23,10 @@ namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeProximityFilter; class DicNodeVector; +class DictionaryStructureWithBufferPolicy; class ProximityInfoState; class MultiBigramMap; @@ -34,18 +34,22 @@ class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, const int16_t length1, int *dest); - static void initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static void initAsRoot( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const int prevWordNodePos, DicNode *newRootNode); - static void initAsRootWithPreviousWord(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static void initAsRootWithPreviousWord( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNode *prevWordLastNode, DicNode *newRootNode); static void initByCopy(DicNode *srcNode, DicNode *destNode); static void getAllChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes); - static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + DicNodeVector *childDicNodes); + static float getBigramNodeImprobability( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *const multiBigramMap); // TODO: Move to private static void getProximityChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes); @@ -54,7 +58,8 @@ class DicNodeUtils { // Max number of bigrams to look up static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500; - static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static int getBigramNodeProbability( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap); static void createAndGetPassingChildNode(DicNode *dicNode, const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index fdaa562e5..ebe76467a 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -22,15 +22,16 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/probability_utils.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" namespace latinime { -BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo) { +BigramDictionary::BigramDictionary( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy) + : mDictionaryStructurePolicy(dictionaryStructurePolicy) { if (DEBUG_DICT) { AKLOGI("BigramDictionary - constructor"); } @@ -112,12 +113,11 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt( + mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - const int length = structurePolicy-> + const int length = mDictionaryStructurePolicy-> getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the @@ -139,12 +139,10 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; - return structurePolicy->getBigramsPositionOfNode(pos); + return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, @@ -152,13 +150,12 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt( + mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 438c34cac..99b964c49 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -21,11 +21,11 @@ namespace latinime { -class BinaryDictionaryInfo; +class DictionaryStructureWithBufferPolicy; class BigramDictionary { public: - BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); + BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; @@ -40,7 +40,7 @@ class BigramDictionary { int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy; }; } // namespace latinime #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp deleted file mode 100644 index 91c643a5f..000000000 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/core/dictionary/binary_dictionary_header.h" - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" - -namespace latinime { - -const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY = - "MULTIPLE_WORDS_DEMOTION_RATE"; -const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; -const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; - -BinaryDictionaryHeader::BinaryDictionaryHeader( - const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo), - mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(binaryDictionaryInfo)), - mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(binaryDictionaryInfo)), - mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} - -float BinaryDictionaryHeader::readMultiWordCostMultiplier() const { - const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt( - mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY); - if (headerValue == S_INT_MIN) { - // not found - return DEFAULT_MULTI_WORD_COST_MULTIPLIER; - } - if (headerValue <= 0) { - return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); - } - return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp deleted file mode 100644 index a57b0f859..000000000 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" - -#include <cctype> -#include <cstdlib> - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" - -namespace latinime { - -const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; - -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4; - -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; -// Flags for special processing -// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or -// something very bad (like, the apocalypse) will happen. Please update both at the same time. -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; -const BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; - -/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { - case HEADER_VERSION_2: - // See the format of the header in the comment in - // BinaryDictionaryFormatUtils::detectFormatVersion() - return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), - VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE); - default: - return S_INT_MAX; - } -} - -/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::getFlags( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { - case HEADER_VERSION_2: - return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), - VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); - default: - return NO_FLAGS; - } -} - -// Returns if the key is found or not and reads the found value into outValue. -/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const char *const key, int *outValue, const int outValueSize) { - if (outValueSize <= 0) { - return false; - } - const int headerSize = getHeaderSize(binaryDictionaryInfo); - int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); - if (pos == NOT_A_DICT_POS) { - // The header doesn't have header options. - return false; - } - while (pos < headerSize) { - if(ByteArrayUtils::compareStringInBufferWithCharArray( - binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { - // The key was found. - const int length = ByteArrayUtils::readStringAndAdvancePosition( - binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos); - // Add a 0 terminator to the string. - outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; - return true; - } - ByteArrayUtils::advancePositionToBehindString( - binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos); - } - // The key was not found. - return false; -} - -/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) { - const int bufferSize = LARGEST_INT_DIGIT_COUNT; - int intBuffer[bufferSize]; - char charBuffer[bufferSize]; - if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) { - return S_INT_MIN; - } - for (int i = 0; i < bufferSize; ++i) { - charBuffer[i] = intBuffer[i]; - if (charBuffer[i] == '0') { - break; - } - if (!isdigit(charBuffer[i])) { - // If not a number, return S_INT_MIN - return S_INT_MIN; - } - } - return atoi(charBuffer); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h deleted file mode 100644 index 61748227e..000000000 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DICTIONARY_HEADER_READING_UTILS_H -#define LATINIME_DICTIONARY_HEADER_READING_UTILS_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" - -namespace latinime { - -class BinaryDictionaryInfo; - -class BinaryDictionaryHeaderReadingUtils { - public: - typedef uint16_t DictionaryFlags; - - static const int MAX_OPTION_KEY_LENGTH; - - static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo); - - static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo); - - static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) { - return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0; - } - - static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) { - return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0; - } - - static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) { - return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; - } - - static AK_FORCE_INLINE int getHeaderOptionsPosition( - const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { - switch (getHeaderVersion(dictionaryFormat)) { - case HEADER_VERSION_2: - return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; - break; - default: - return NOT_A_DICT_POS; - } - } - - static bool readHeaderValue( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const char *const key, int *outValue, const int outValueSize); - - static int readHeaderValueInt( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); - - enum HEADER_VERSION { - HEADER_VERSION_2, - UNKNOWN_HEADER_VERSION - }; - - static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; - static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; - static const int VERSION_2_HEADER_FLAG_SIZE; - static const int VERSION_2_HEADER_SIZE_FIELD_SIZE; - - static const DictionaryFlags NO_FLAGS; - // Flags for special processing - // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or - // something very bad (like, the apocalypse) will happen. Please update both at the same time. - static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; - static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; - static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; - static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; - - static HEADER_VERSION getHeaderVersion( - const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) { - switch(formatVersion) { - case BinaryDictionaryFormatUtils::VERSION_2: - // Fall through - case BinaryDictionaryFormatUtils::VERSION_3: - return HEADER_VERSION_2; - default: - return UNKNOWN_HEADER_VERSION; - } - } -}; -} -#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index c694c6a3a..e50baae0b 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -20,32 +20,17 @@ #include <stdint.h> #include "defines.h" -#include "jni.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" -#include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" -#include "utils/log_utils.h" namespace latinime { class BinaryDictionaryInfo { public: - AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf, + AK_FORCE_INLINE BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable) : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd), - mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), - mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( - mDictBuf, mDictSize)), - mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), - // TODO: Remove. - mStructurePolicy(DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructurePolicy(this)) { - logDictionaryInfo(env); - } + mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable) {} - ~BinaryDictionaryInfo() { - delete mStructurePolicy; - } + ~BinaryDictionaryInfo() {} AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -63,30 +48,12 @@ class BinaryDictionaryInfo { return mDictBufOffset; } - AK_FORCE_INLINE const uint8_t *getDictRoot() const { - return mDictRoot; - } - - AK_FORCE_INLINE BinaryDictionaryFormatUtils::FORMAT_VERSION getFormat() const { - return mDictionaryFormat; - } - - // TODO: Move to DictionaryStructurePolicy. - AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { - return &mDictionaryHeader; - } - AK_FORCE_INLINE bool isDynamicallyUpdatable() const { // TODO: Support dynamic dictionary formats. const bool isUpdatableDictionaryFormat = false; return mIsUpdatable && isUpdatableDictionaryFormat; } - // TODO: remove - AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const { - return mStructurePolicy; - } - private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo); @@ -95,40 +62,6 @@ class BinaryDictionaryInfo { const int mMmapFd; const int mDictBufOffset; const bool mIsUpdatable; - const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; - // TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the - // DictionaryStructurePolicy. - const BinaryDictionaryHeader mDictionaryHeader; - const uint8_t *const mDictRoot; - // TODO: remove - const DictionaryStructureWithBufferPolicy *const mStructurePolicy; - - AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { - const int BUFFER_SIZE = 16; - int dictionaryIdCodePointBuffer[BUFFER_SIZE]; - int versionStringCodePointBuffer[BUFFER_SIZE]; - int dateStringCodePointBuffer[BUFFER_SIZE]; - mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary", - dictionaryIdCodePointBuffer, BUFFER_SIZE); - mDictionaryHeader.readHeaderValueOrQuestionMark("version", - versionStringCodePointBuffer, BUFFER_SIZE); - mDictionaryHeader.readHeaderValueOrQuestionMark("date", - dateStringCodePointBuffer, BUFFER_SIZE); - - char dictionaryIdCharBuffer[BUFFER_SIZE]; - char versionStringCharBuffer[BUFFER_SIZE]; - char dateStringCharBuffer[BUFFER_SIZE]; - intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, - dictionaryIdCharBuffer, BUFFER_SIZE); - intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, - versionStringCharBuffer, BUFFER_SIZE); - intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, - dateStringCharBuffer, BUFFER_SIZE); - - LogUtils::logToJava(env, - "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i", - dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize); - } }; } #endif /* LATINIME_BINARY_DICTIONARY_INFO_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h new file mode 100644 index 000000000..558e0a5c3 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H +#define LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" + +namespace latinime { + +class BinaryDictionaryShortcutIterator { + public: + BinaryDictionaryShortcutIterator( + const DictionaryShortcutsStructurePolicy *const shortcutStructurePolicy, + const int shortcutPos) + : mShortcutStructurePolicy(shortcutStructurePolicy), + mPos(shortcutStructurePolicy->getStartPos(shortcutPos)), + mHasNextShortcutTarget(shortcutPos != NOT_A_DICT_POS) {} + + AK_FORCE_INLINE bool hasNextShortcutTarget() const { + return mHasNextShortcutTarget; + } + + // Gets the shortcut target itself as an int string and put it to outTarget, put its length + // to outTargetLength, put whether it is whitelist to outIsWhitelist. + AK_FORCE_INLINE void nextShortcutTarget( + const int maxDepth, int *const outTarget, int *const outTargetLength, + bool *const outIsWhitelist) { + mShortcutStructurePolicy->getNextShortcut(maxDepth, outTarget, outTargetLength, + outIsWhitelist, &mHasNextShortcutTarget, &mPos); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryShortcutIterator); + + const DictionaryShortcutsStructurePolicy *const mShortcutStructurePolicy; + int mPos; + bool mHasNextShortcutTarget; +}; +} // namespace latinime +#endif // LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp deleted file mode 100644 index cb73a577e..000000000 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" - -#include "suggest/core/dictionary/byte_array_utils.h" - -namespace latinime { - -typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils; - -const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; -// Flag for presence of more attributes -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; -// Mask for attribute probability, stored on 4 bits inside the flags byte. -const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; -const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; -const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; -// The numeric value of the shortcut probability that means 'whitelist'. -const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; - -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h deleted file mode 100644 index c28f43ff7..000000000 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H -#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/byte_array_utils.h" - -namespace latinime { - -class BinaryDictionaryTerminalAttributesReadingUtils { - public: - typedef uint8_t TerminalAttributeFlags; - typedef TerminalAttributeFlags ShortcutFlags; - - static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( - const uint8_t *const dictRoot, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); - } - - static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { - return flags & MASK_ATTRIBUTE_PROBABILITY; - } - - static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) { - return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; - } - - // Shortcuts reading methods - // This method returns the size of the shortcut list region excluding the shortcut list size - // field at the beginning. - static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. - return ByteArrayUtils::readUint16AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE; - } - - static AK_FORCE_INLINE void skipShortcuts( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - const int shortcutListSize = getShortcutListSizeAndForwardPointer( - binaryDictionaryInfo, pos); - *pos += shortcutListSize; - } - - static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) { - return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; - } - - static AK_FORCE_INLINE int readShortcutTarget( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength, - int *const outWord, int *const pos) { - return ByteArrayUtils::readStringAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos); - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils); - - static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT; - static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY; - static const int ATTRIBUTE_ADDRESS_SHIFT; - static const int SHORTCUT_LIST_SIZE_FIELD_SIZE; - static const int WHITELIST_SHORTCUT_PROBABILITY; - - static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) { - return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; - } - - static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) { - return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; - /* Note: this is a value-dependant optimization of what may probably be - more readably written this way: - switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) { - case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3; - default: return 0; - } - */ - } -}; -} -#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 891b80331..af00e9927 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -18,33 +18,39 @@ #include "suggest/core/dictionary/dictionary.h" -#include <map> // TODO: remove #include <stdint.h> #include "defines.h" -#include "jni.h" #include "suggest/core/dictionary/bigram_dictionary.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" +#include "utils/log_utils.h" namespace latinime { Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable) - : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd, + : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd, dictBufOffset, isUpdatable), - mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), + mDictionaryStructureWithBufferPolicy(DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructureWithBufferPolicy( + static_cast<const uint8_t *>(dict), dictSize)), + mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { + logDictionaryInfo(env); } Dictionary::~Dictionary() { delete mBigramDictionary; delete mGestureSuggest; delete mTypingSuggest; + delete mDictionaryStructureWithBufferPolicy; } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, @@ -83,14 +89,12 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo.getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == pos) { return NOT_A_PROBABILITY; } - return structurePolicy->getUnigramProbability(pos); + return getDictionaryStructurePolicy()->getUnigramProbability(pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { @@ -126,4 +130,33 @@ void Dictionary::removeBigramWords(const int *const word0, const int length0, // TODO: Support dynamic update } +void Dictionary::logDictionaryInfo(JNIEnv *const env) const { + const int BUFFER_SIZE = 16; + int dictionaryIdCodePointBuffer[BUFFER_SIZE]; + int versionStringCodePointBuffer[BUFFER_SIZE]; + int dateStringCodePointBuffer[BUFFER_SIZE]; + const DictionaryHeaderStructurePolicy *const headerPolicy = + getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, + BUFFER_SIZE); + headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, + BUFFER_SIZE); + headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE); + + char dictionaryIdCharBuffer[BUFFER_SIZE]; + char versionStringCharBuffer[BUFFER_SIZE]; + char dateStringCharBuffer[BUFFER_SIZE]; + intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, + dictionaryIdCharBuffer, BUFFER_SIZE); + intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, + versionStringCharBuffer, BUFFER_SIZE); + intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, + dateStringCharBuffer, BUFFER_SIZE); + + LogUtils::logToJava(env, + "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i", + dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, + mBinaryDictionaryInfo.getDictSize()); +} + } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 9f1e0729d..17ce47974 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -26,6 +26,7 @@ namespace latinime { class BigramDictionary; +class DictionaryStructureWithBufferPolicy; class DicTraverseSession; class ProximityInfo; class SuggestInterface; @@ -77,19 +78,27 @@ class Dictionary { void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); + // TODO: Remove. const BinaryDictionaryInfo *getBinaryDictionaryInfo() const { return &mBinaryDictionaryInfo; } + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { + return mDictionaryStructureWithBufferPolicy; + } + virtual ~Dictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); const BinaryDictionaryInfo mBinaryDictionaryInfo; + DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; const BigramDictionary *mBigramDictionary; SuggestInterface *mGestureSuggest; SuggestInterface *mTypingSuggest; + + void logDictionaryInfo(JNIEnv *const env) const; }; } // namespace latinime #endif // LATINIME_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp index af378b1b7..3271c1bfb 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp @@ -19,7 +19,7 @@ #include <cstdlib> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_header.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "utils/char_utils.h" namespace latinime { @@ -35,8 +35,9 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; /* static */ bool DigraphUtils::hasDigraphForCodePoint( - const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) { - const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header); + const DictionaryHeaderStructurePolicy *const headerPolicy, + const int compositeGlyphCodePoint) { + const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(headerPolicy); if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) { return true; } @@ -45,11 +46,11 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = // Returns the digraph type associated with the given dictionary. /* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary( - const BinaryDictionaryHeader *const header) { - if (header->requiresGermanUmlautProcessing()) { + const DictionaryHeaderStructurePolicy *const headerPolicy) { + if (headerPolicy->requiresGermanUmlautProcessing()) { return DIGRAPH_TYPE_GERMAN_UMLAUT; } - if (header->requiresFrenchLigatureProcessing()) { + if (headerPolicy->requiresFrenchLigatureProcessing()) { return DIGRAPH_TYPE_FRENCH_LIGATURES; } return DIGRAPH_TYPE_NONE; diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h index 9d74fe3a6..6ae16e390 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.h +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h @@ -21,7 +21,7 @@ namespace latinime { -class BinaryDictionaryHeader; +class DictionaryHeaderStructurePolicy; class DigraphUtils { public: @@ -39,14 +39,15 @@ class DigraphUtils { typedef struct { int first; int second; int compositeGlyph; } digraph_t; - static bool hasDigraphForCodePoint( - const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint); + static bool hasDigraphForCodePoint(const DictionaryHeaderStructurePolicy *const headerPolicy, + const int compositeGlyphCodePoint); static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils); - static DigraphType getDigraphTypeForDictionary(const BinaryDictionaryHeader *const header); + static DigraphType getDigraphTypeForDictionary( + const DictionaryHeaderStructurePolicy *const headerPolicy); static int getAllDigraphsForDigraphTypeAndReturnSize( const DigraphType digraphType, const digraph_t **const digraphs); static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint); diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h index 3c2180937..461d7b454 100644 --- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -19,21 +19,20 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" -#include "suggest/core/dictionary/terminal_attributes.h" +#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" namespace latinime { class ShortcutUtils { public: - static int outputShortcuts(const TerminalAttributes *const terminalAttributes, + static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt, int outputWordIndex, const int finalScore, int *const outputCodePoints, int *const frequencies, int *const outputTypes, const bool sameAsTyped) { - TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); int shortcutTarget[MAX_WORD_LENGTH]; - while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { + while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { bool isWhilelist; int shortcutTargetStringLength; - iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget, + shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget, &shortcutTargetStringLength, &isWhilelist); int shortcutScore; int kind; diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h deleted file mode 100644 index c40a3bb59..000000000 --- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_TERMINAL_ATTRIBUTES_H -#define LATINIME_TERMINAL_ATTRIBUTES_H - -#include <stdint.h> - -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" - -namespace latinime { - -/** - * This class encapsulates information about a terminal that allows to - * retrieve local node attributes like the list of shortcuts without - * exposing the format structure to the client. - */ -class TerminalAttributes { - public: - class ShortcutIterator { - public: - ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int shortcutPos, const bool hasShortcutList) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos), - mHasNextShortcutTarget(hasShortcutList) {} - - inline bool hasNextShortcutTarget() const { - return mHasNextShortcutTarget; - } - - // Gets the shortcut target itself as an int string and put it to outTarget, put its length - // to outTargetLength, put whether it is whitelist to outIsWhitelist. - AK_FORCE_INLINE void nextShortcutTarget( - const int maxDepth, int *const outTarget, int *const outTargetLength, - bool *const outIsWhitelist) { - const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags = - BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( - mBinaryDictionaryInfo->getDictRoot(), &mPos); - mHasNextShortcutTarget = - BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); - if (outIsWhitelist) { - *outIsWhitelist = - BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags); - } - if (outTargetLength) { - *outTargetLength = - BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget( - mBinaryDictionaryInfo, maxDepth, outTarget, &mPos); - } - } - - private: - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - int mPos; - bool mHasNextShortcutTarget; - }; - - TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int shortcutPos) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {} - - inline ShortcutIterator getShortcutIterator() const { - int shortcutPos = mShortcutListSizePos; - const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS; - if (hasShortcutList) { - BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer( - mBinaryDictionaryInfo, &shortcutPos); - } - // shortcutPos is never used if hasShortcutList is false. - return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList); - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - const int mShortcutListSizePos; -}; -} // namespace latinime -#endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h new file mode 100644 index 000000000..a6829b476 --- /dev/null +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H +#define LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H + +#include "defines.h" + +namespace latinime { + +/* + * This class abstracts structure of dictionaries. + * Implement this policy to support additional dictionaries. + */ +class DictionaryHeaderStructurePolicy { + public: + virtual ~DictionaryHeaderStructurePolicy() {} + + virtual bool supportsDynamicUpdate() const = 0; + + virtual bool requiresGermanUmlautProcessing() const = 0; + + virtual bool requiresFrenchLigatureProcessing() const = 0; + + virtual float getMultiWordCostMultiplier() const = 0; + + virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue, + int outValueSize) const = 0; + + protected: + DictionaryHeaderStructurePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictionaryHeaderStructurePolicy); +}; +} // namespace latinime +#endif /* LATINIME_DICTIONARY_HEADER_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h new file mode 100644 index 000000000..40b6c2de1 --- /dev/null +++ b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H +#define LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H + +#include "defines.h" + +namespace latinime { + +/* + * This class abstracts structure of shortcuts. + */ +class DictionaryShortcutsStructurePolicy { + public: + virtual ~DictionaryShortcutsStructurePolicy() {} + + virtual int getStartPos(const int pos) const = 0; + + virtual void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const = 0; + + virtual void skipAllShortcuts(int *const pos) const = 0; + + protected: + DictionaryShortcutsStructurePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictionaryShortcutsStructurePolicy); +}; +} // namespace latinime +#endif /* LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index d83d1e390..1b34f03f0 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -24,6 +24,8 @@ namespace latinime { class DicNode; class DicNodeVector; class DictionaryBigramsStructurePolicy; +class DictionaryHeaderStructurePolicy; +class DictionaryShortcutsStructurePolicy; /* * This class abstracts structure of dictionaries. @@ -64,8 +66,12 @@ class DictionaryStructureWithBufferPolicy { virtual int getBigramsPositionOfNode(const int nodePos) const = 0; + virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; + virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; + virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index 58729229f..f9b777df2 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -148,7 +148,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_TERMINAL: { const float languageImprobability = DicNodeUtils::getBigramNodeImprobability( - traverseSession->getBinaryDictionaryInfo(), dicNode, multiBigramMap); + traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap); return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability); } case CT_TERMINAL_INSERTION: diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 11a147bda..0ca583f90 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -17,31 +17,29 @@ #include "suggest/core/session/dic_traverse_session.h" #include "defines.h" -#include "jni.h" -#include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; - const BinaryDictionaryInfo *const binaryDictionaryInfo = - mDictionary->getBinaryDictionaryInfo(); - mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier(); + mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() + ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { mPrevWordPos = NOT_A_VALID_WORD_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (mPrevWordPos == NOT_A_VALID_WORD_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } @@ -56,8 +54,9 @@ void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, maxSpatialDistance, maxPointerCount); } -const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const { - return mDictionary->getBinaryDictionaryInfo(); +const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStructurePolicy() + const { + return mDictionary->getDictionaryStructurePolicy(); } void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) { diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 5c4cef02d..23de5cc65 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -28,8 +28,8 @@ namespace latinime { -class BinaryDictionaryInfo; class Dictionary; +class DictionaryStructureWithBufferPolicy; class ProximityInfo; class SuggestOptions; @@ -75,8 +75,7 @@ class DicTraverseSession { const int maxPointerCount); void resetCache(const int nextActiveCacheSize, const int maxWords); - // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. - const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const; //-------------------- // getters and setters diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index f28efd526..3b77227a0 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -19,12 +19,12 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/shortcut_utils.h" -#include "suggest/core/dictionary/terminal_attributes.h" #include "suggest/core/layout/proximity_info.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/scoring.h" #include "suggest/core/policy/traversal.h" #include "suggest/core/policy/weighting.h" @@ -107,7 +107,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo MAX_RESULTS); // Create a new dic node here DicNode rootNode; - DicNodeUtils::initAsRoot(traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), traverseSession->getPrevWordPos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } @@ -211,15 +211,14 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen } if (!terminalDicNode->hasMultipleWords()) { - const BinaryDictionaryInfo *const binaryDictionaryInfo = - traverseSession->getBinaryDictionaryInfo(); - const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), - binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode( - terminalDicNode->getPos())); + BinaryDictionaryShortcutIterator shortcutIt( + traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), + traverseSession->getDictionaryStructurePolicy() + ->getShortcutPositionOfNode(terminalDicNode->getPos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); - outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, + outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); } DicNode::managedDelete(terminalDicNode); @@ -298,7 +297,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } DicNodeUtils::getAllChildDicNodes( - &dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); + &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int childDicNodesSize = childDicNodes.getSizeAndLock(); for (int i = 0; i < childDicNodesSize; ++i) { @@ -309,7 +308,8 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { continue; } if (DigraphUtils::hasDigraphForCodePoint( - traverseSession->getBinaryDictionaryInfo()->getHeader(), + traverseSession->getDictionaryStructurePolicy() + ->getHeaderStructurePolicy(), childDicNode->getNodeCodePoint())) { correctionDicNode.initByCopy(childDicNode); correctionDicNode.advanceDigraphIndex(); @@ -447,7 +447,7 @@ void Suggest::processDicNodeAsOmission( DicTraverseSession *traverseSession, DicNode *dicNode) const { DicNodeVector childDicNodes; DicNodeUtils::getAllChildDicNodes( - dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); + dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { @@ -472,7 +472,8 @@ void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes; - DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::getProximityChildDicNodes(dicNode, + traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { @@ -490,14 +491,15 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes1; - DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::getProximityChildDicNodes(dicNode, + traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1); const int childSize1 = childDicNodes1.getSizeAndLock(); for (int i = 0; i < childSize1; i++) { if (childDicNodes1[i]->hasChildren()) { DicNodeVector childDicNodes2; DicNodeUtils::getProximityChildDicNodes( - childDicNodes1[i], traverseSession->getBinaryDictionaryInfo(), + childDicNodes1[i], traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2); const int childSize2 = childDicNodes2.getSizeAndLock(); for (int j = 0; j < childSize2; j++) { @@ -538,7 +540,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode // Create a non-cached node here. DicNode newDicNode; DicNodeUtils::initAsRootWithPreviousWord( - traverseSession->getBinaryDictionaryInfo(), dicNode, &newDicNode); + traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); const CorrectionType correctionType = spaceSubstitution ? CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION; Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp index 6f4fcbfc2..6da0e8b85 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h" -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h index 6b2bfe8c9..d0c584bd5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h @@ -20,7 +20,7 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp index f2c586245..34f092f49 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -16,23 +16,23 @@ #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include <stdint.h> + #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { /* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructurePolicy( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { - case BinaryDictionaryFormatUtils::VERSION_2: - return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(), - binaryDictionaryInfo); - case BinaryDictionaryFormatUtils::VERSION_3: - return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(), - binaryDictionaryInfo); + ::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf, + const int dictSize) { + switch (FormatUtils::detectFormatVersion(dictBuf, dictSize)) { + case FormatUtils::VERSION_2: + return new PatriciaTriePolicy(dictBuf); + case FormatUtils::VERSION_3: + return new DynamicPatriciaTriePolicy(dictBuf); default: ASSERT(false); return 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h index 95f82aabe..53eb8f927 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -17,18 +17,17 @@ #ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H #define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H -#include "defines.h" +#include <stdint.h> +#include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { -class BinaryDictionaryInfo; - class DictionaryStructureWithBufferPolicyFactory { public: - static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy( - const BinaryDictionaryInfo *const binaryDictionaryInfo); + static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy( + const uint8_t *const dictBuf, const int dictSize); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index a6214d3e4..77a85c86d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -16,42 +16,40 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - const uint8_t *const dictRoot = mBinaryDictionaryInfo->getDictRoot(); int pos = nodePos; - mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); const int parentPos = - DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos); + DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); + mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); } else { mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( - dictRoot, mFlags, MAX_WORD_LENGTH, &pos); + mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); } if (isTerminal()) { - mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } else { mProbability = NOT_A_PROBABILITY; } if (hasChildren()) { mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictRoot, mFlags, &pos); + mDictRoot, mFlags, &pos); } else { mChildrenPos = NOT_A_DICT_POS; } if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { mShortcutPos = pos; - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); + mShortcutsPolicy->skipAllShortcuts(&pos); } else { mShortcutPos = NOT_A_DICT_POS; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 4f22db738..e990809e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -25,8 +25,8 @@ namespace latinime { -class BinaryDictionaryInfo; class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved @@ -34,13 +34,14 @@ class DictionaryBigramsStructurePolicy; */ class DynamicPatriciaTrieNodeReader { public: - DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const DictionaryBigramsStructurePolicy *const bigramsPolicy) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mBigramsPolicy(bigramsPolicy), - mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), - mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_VALID_WORD_POS) {} + DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, + const DictionaryBigramsStructurePolicy *const bigramsPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) + : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), + mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), + mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} ~DynamicPatriciaTrieNodeReader() {} @@ -120,8 +121,10 @@ class DynamicPatriciaTrieNodeReader { private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + // TODO: Consolidate mDictRoot. + const uint8_t *const mDictRoot; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; + const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; int mNodePos; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; int mParentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 9b384157e..7d3b2e28e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -19,7 +19,6 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" @@ -34,7 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; @@ -79,7 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); @@ -123,7 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); while (currentLength <= length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; @@ -194,7 +196,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; @@ -206,7 +209,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; @@ -218,7 +222,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 1e94140c4..56475b137 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -22,19 +22,19 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - DynamicPatriciaTriePolicy(const uint8_t *const dictRoot, - const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo), - mBigramListPolicy(dictRoot) {} + DynamicPatriciaTriePolicy(const uint8_t *const dictBuf) + : mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()), + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~DynamicPatriciaTriePolicy() {} @@ -58,18 +58,27 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return &mHeaderPolicy; + } + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { return &mBigramListPolicy; } + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return &mShortcutListPolicy; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; + const HeaderPolicy mHeaderPolicy; + // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; - // TODO: remove - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const BigramListPolicy mBigramListPolicy; + const ShortcutListPolicy mShortcutListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index 0de6341b0..1ef3b65c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -17,7 +17,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index 5398d7e37..a6cb46d39 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -20,7 +20,7 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp new file mode 100644 index 000000000..eb828b58c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/header/header_policy.h" + +namespace latinime { + +const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = + "MULTIPLE_WORDS_DEMOTION_RATE"; +const float HeaderPolicy::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; +const float HeaderPolicy::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; + +float HeaderPolicy::readMultiWordCostMultiplier() const { + const int headerValue = HeaderReadingUtils::readHeaderValueInt( + mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY); + if (headerValue == S_INT_MIN) { + // not found + return DEFAULT_MULTI_WORD_COST_MULTIPLIER; + } + if (headerValue <= 0) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); + } + return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 240512bce..e3e6fc077 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -14,38 +14,40 @@ * limitations under the License. */ -#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H -#define LATINIME_BINARY_DICTIONARY_HEADER_H +#ifndef LATINIME_HEADER_POLICY_H +#define LATINIME_HEADER_POLICY_H + +#include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/policyimpl/dictionary/header/header_reading_utils.h" namespace latinime { -class BinaryDictionaryInfo; - -/** - * This class abstracts dictionary header structures and provide interface to access dictionary - * header information. - */ -class BinaryDictionaryHeader { +class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo); + explicit HeaderPolicy(const uint8_t *const dictBuf) + : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), + mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), + mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} + + ~HeaderPolicy() {} AK_FORCE_INLINE int getSize() const { return mSize; } AK_FORCE_INLINE bool supportsDynamicUpdate() const { - return BinaryDictionaryHeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags); + return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags); } AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { - return BinaryDictionaryHeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags); + return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags); } AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { - return BinaryDictionaryHeaderReadingUtils::requiresFrenchLigatureProcessing( + return HeaderReadingUtils::requiresFrenchLigatureProcessing( mDictionaryFlags); } @@ -60,7 +62,7 @@ class BinaryDictionaryHeader { outValue[0] = '\0'; return; } - if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo, + if (!HeaderReadingUtils::readHeaderValue(mDictBuf, key, outValue, outValueSize)) { outValue[0] = '?'; outValue[1] = '\0'; @@ -68,18 +70,19 @@ class BinaryDictionaryHeader { } private: - DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader); + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER; static const float MULTI_WORD_COST_MULTIPLIER_SCALE; - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags; + const uint8_t *const mDictBuf; + const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; const int mSize; const float mMultiWordCostMultiplier; float readMultiWordCostMultiplier() const; }; + } // namespace latinime -#endif // LATINIME_BINARY_DICTIONARY_HEADER_H +#endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp new file mode 100644 index 000000000..23b88ecdf --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/header/header_reading_utils.h" + +#include <cctype> +#include <cstdlib> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + +namespace latinime { + +const int HeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; + +const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; +const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; +const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; +const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; + +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::NO_FLAGS = 0; +// Flags for special processing +// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or +// something very bad (like, the apocalypse) will happen. Please update both at the same time. +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; +const HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; + +/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { + // See the format of the header in the comment in + // BinaryDictionaryFormatUtils::detectFormatVersion() + return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE + + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE); +} + +/* static */ HeaderReadingUtils::DictionaryFlags + HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) { + return ByteArrayUtils::readUint16(dictBuf, + HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); +} + +// Returns if the key is found or not and reads the found value into outValue. +/* static */ bool HeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf, + const char *const key, int *outValue, const int outValueSize) { + if (outValueSize <= 0) { + return false; + } + const int headerSize = getHeaderSize(dictBuf); + int pos = getHeaderOptionsPosition(); + if (pos == NOT_A_DICT_POS) { + // The header doesn't have header options. + return false; + } + while (pos < headerSize) { + if(ByteArrayUtils::compareStringInBufferWithCharArray( + dictBuf, key, headerSize - pos, &pos) == 0) { + // The key was found. + const int length = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, outValueSize, + outValue, &pos); + // Add a 0 terminator to the string. + outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; + return true; + } + ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos); + } + // The key was not found. + return false; +} + +/* static */ int HeaderReadingUtils::readHeaderValueInt( + const uint8_t *const dictBuf, const char *const key) { + const int bufferSize = LARGEST_INT_DIGIT_COUNT; + int intBuffer[bufferSize]; + char charBuffer[bufferSize]; + if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) { + return S_INT_MIN; + } + for (int i = 0; i < bufferSize; ++i) { + charBuffer[i] = intBuffer[i]; + if (charBuffer[i] == '0') { + break; + } + if (!isdigit(charBuffer[i])) { + // If not a number, return S_INT_MIN + return S_INT_MIN; + } + } + return atoi(charBuffer); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h new file mode 100644 index 000000000..c94919640 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_HEADER_READING_UTILS_H +#define LATINIME_HEADER_READING_UTILS_H + +#include <stdint.h> + +#include "defines.h" + +namespace latinime { + +class HeaderReadingUtils { + public: + typedef uint16_t DictionaryFlags; + + static const int MAX_OPTION_KEY_LENGTH; + + static int getHeaderSize(const uint8_t *const dictBuf); + + static DictionaryFlags getFlags(const uint8_t *const dictBuf); + + static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) { + return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0; + } + + static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) { + return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0; + } + + static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) { + return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; + } + + static AK_FORCE_INLINE int getHeaderOptionsPosition() { + return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE + + HEADER_SIZE_FIELD_SIZE; + } + + static bool readHeaderValue(const uint8_t *const dictBuf, + const char *const key, int *outValue, const int outValueSize); + + static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); + + static const int HEADER_MAGIC_NUMBER_SIZE; + static const int HEADER_DICTIONARY_VERSION_SIZE; + static const int HEADER_FLAG_SIZE; + static const int HEADER_SIZE_FIELD_SIZE; + + static const DictionaryFlags NO_FLAGS; + // Flags for special processing + // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or + // something very bad (like, the apocalypse) will happen. Please update both at the same time. + static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; + static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; + static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; + static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; +}; +} +#endif /* LATINIME_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 7001509ab..8ce2b3ea0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -20,8 +20,6 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/policyimpl/dictionary/binary_format.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" @@ -112,7 +110,7 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); + mShortcutListPolicy.skipAllShortcuts(&pos);; } return pos; } @@ -133,7 +131,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos) : NOT_A_DICT_POS; if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); + getShortcutsStructurePolicy()->skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { getBigramsStructurePolicy()->skipAllBigrams(&pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 2f81aedb8..bebe1bfff 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -22,19 +22,19 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - PatriciaTriePolicy(const uint8_t *const dictRoot, - const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo), - mBigramListPolicy(dictRoot) {} + PatriciaTriePolicy(const uint8_t *const dictBuf) + : mHeaderPolicy(dictBuf), mDictRoot(dictBuf + mHeaderPolicy.getSize()), + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~PatriciaTriePolicy() {} @@ -58,17 +58,25 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return &mHeaderPolicy; + } + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { return &mBigramListPolicy; } + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return &mShortcutListPolicy; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); + const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; - // TODO: remove - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const BigramListPolicy mBigramListPolicy; + const ShortcutListPolicy mShortcutListPolicy; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 89e981df8..003b9435b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -17,7 +17,7 @@ #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h index 002c3f19b..9f2fc2059 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -20,7 +20,7 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h new file mode 100644 index 000000000..d73f73953 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_LIST_POLICY_H +#define LATINIME_SHORTCUT_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" + +namespace latinime { + +class ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { + public: + explicit ShortcutListPolicy(const uint8_t *const shortcutBuf) + : mShortcutsBuf(shortcutBuf) {} + + ~ShortcutListPolicy() {} + + int getStartPos(const int pos) const { + if (pos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + int listPos = pos; + ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mShortcutsBuf, &listPos); + return listPos; + } + + void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const { + const ShortcutListReadingUtils::ShortcutFlags flags = + ShortcutListReadingUtils::getFlagsAndForwardPointer(mShortcutsBuf, pos); + if (outHasNext) { + *outHasNext = ShortcutListReadingUtils::hasNext(flags); + } + if (outIsWhitelist) { + *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags); + } + if (outCodePoint) { + *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget( + mShortcutsBuf, maxCodePointCount, outCodePoint, pos); + } + } + + void skipAllShortcuts(int *const pos) const { + const int shortcutListSize = ShortcutListReadingUtils + ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos); + *pos += shortcutListSize; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListPolicy); + + const uint8_t *const mShortcutsBuf; +}; +} // namespace latinime +#endif // LATINIME_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp new file mode 100644 index 000000000..e70bb5071 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" + +namespace latinime { + +// Flag for presence of more attributes +const ShortcutListReadingUtils::ShortcutFlags + ShortcutListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; +// Mask for attribute probability, stored on 4 bits inside the flags byte. +const ShortcutListReadingUtils::ShortcutFlags + ShortcutListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; +const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; +// The numeric value of the shortcut probability that means 'whitelist'. +const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h new file mode 100644 index 000000000..b5bb964d5 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H +#define LATINIME_SHORTCUT_LIST_READING_UTILS_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + +namespace latinime { + +class ShortcutListReadingUtils { + public: + typedef uint8_t ShortcutFlags; + + static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer( + const uint8_t *const dictRoot, int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); + } + + static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) { + return flags & MASK_ATTRIBUTE_PROBABILITY; + } + + static AK_FORCE_INLINE bool hasNext(const ShortcutFlags flags) { + return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; + } + + // This method returns the size of the shortcut list region excluding the shortcut list size + // field at the beginning. + static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( + const uint8_t *const dictRoot, int *const pos) { + // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. + return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos) + - SHORTCUT_LIST_SIZE_FIELD_SIZE; + } + + static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) { + const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos); + *pos += shortcutListSize; + } + + static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) { + return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; + } + + static AK_FORCE_INLINE int readShortcutTarget( + const uint8_t *const dictRoot, const int maxLength, int *const outWord, + int *const pos) { + return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils); + + static const ShortcutFlags FLAG_ATTRIBUTE_HAS_NEXT; + static const ShortcutFlags MASK_ATTRIBUTE_PROBABILITY; + static const int SHORTCUT_LIST_SIZE_FIELD_SIZE; + static const int WHITELIST_SHORTCUT_PROBABILITY; +}; +} // namespace latinime +#endif // LATINIME_SHORTCUT_LIST_READING_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp index 68b1d5d15..a84cfb9d5 100644 --- a/native/jni/src/suggest/core/dictionary/byte_array_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/core/dictionary/byte_array_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index 75ccfc766..75ccfc766 100644 --- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index 0e8d72f2e..3796c7b7b 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -14,7 +14,9 @@ * limitations under the License. */ -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" + +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -22,22 +24,19 @@ namespace latinime { * Dictionary size */ // Any file smaller than this is not a dictionary. -const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4; +const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4; /** * Format versions */ - -// The versions of Latin IME that only handle format version 1 only test for the magic -// number, so we had to change it so that version 2 files would be rejected by older -// implementations. On this occasion, we made the magic number 32 bits long. -const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; +// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported +// or obsolete dictionary formats. +const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 -const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; +const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; -/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION - BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict, - const int dictSize) { +/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion( + const uint8_t *const dict, const int dictSize) { // The magic number is stored big-endian. // If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't // understand this format. diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 830684c70..f84321577 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -14,24 +14,19 @@ * limitations under the License. */ -#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H -#define LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H +#ifndef LATINIME_FORMAT_UTILS_H +#define LATINIME_FORMAT_UTILS_H #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { /** * Methods to handle binary dictionary format version. - * - * Currently, we have a file with a similar name, binary_format.h. binary_format.h contains binary - * reading methods and utility methods for various purposes. - * On the other hand, this file deals with only about dictionary format version. */ -class BinaryDictionaryFormatUtils { +class FormatUtils { public: enum FORMAT_VERSION { VERSION_2, @@ -42,11 +37,11 @@ class BinaryDictionaryFormatUtils { static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize); private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils); static const int DICTIONARY_MINIMUM_SIZE; static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime -#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */ +#endif /* LATINIME_FORMAT_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 7cddb0882..b6aa85896 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -155,7 +155,8 @@ class TypingWeighting : public Weighting { float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) const { - return DicNodeUtils::getBigramNodeImprobability(traverseSession->getBinaryDictionaryInfo(), + return DicNodeUtils::getBigramNodeImprobability( + traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } |