diff options
Diffstat (limited to 'native/jni/src')
38 files changed, 825 insertions, 592 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h index 1a39f2ef3..c7ab571de 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h @@ -20,11 +20,11 @@ #include "defines.h" #include "suggest/core/layout/proximity_info_state.h" #include "suggest/core/layout/proximity_info_utils.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { -class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter { +class DicNodeProximityFilter : public DictionaryStructureWithBufferPolicy::NodeFilter { public: DicNodeProximityFilter(const ProximityInfoState *const pInfoState, const int pointIndex, const bool exactOnly) diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 6b4ef2fea..4ff4bc2e4 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -24,7 +24,7 @@ #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/probability_utils.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" namespace latinime { @@ -83,7 +83,7 @@ namespace latinime { DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); } else { binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode, - binaryDictionaryInfo, &childrenFilter, childDicNodes); + &childrenFilter, childDicNodes); } } @@ -118,8 +118,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability( - binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), + prevWordPos, wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 3751ae500..fdaa562e5 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -112,13 +112,14 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + mBinaryDictionaryInfo->getStructurePolicy(); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - const int length = mBinaryDictionaryInfo->getStructurePolicy()-> - getCodePointsAndProbabilityAndReturnCodePointCount( - mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, - bigramBuffer, &unigramProbability); + const int length = structurePolicy-> + getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), + MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the // unigram probability, the worse the precision. The theoritical maximum error in // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 @@ -138,11 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + mBinaryDictionaryInfo->getStructurePolicy(); + int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; - return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode( - mBinaryDictionaryInfo, pos); + return structurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, @@ -150,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; - int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + mBinaryDictionaryInfo->getStructurePolicy(); + int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; - BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h index 8cbb12998..d16ac47fe 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h @@ -18,51 +18,41 @@ #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" namespace latinime { class BinaryDictionaryBigramsIterator { public: BinaryDictionaryBigramsIterator( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), - mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {} + const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) + : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), + mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), + mHasNext(pos != NOT_A_DICT_POS) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } AK_FORCE_INLINE void next() { - mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( - mBinaryDictionaryInfo, &mPos); - mBigramPos = - BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer( - mBinaryDictionaryInfo, mBigramFlags, &mPos); - mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags); + mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos); } AK_FORCE_INLINE int getProbability() const { - return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags( - mBigramFlags); + return mProbability; } AK_FORCE_INLINE int getBigramPos() const { return mBigramPos; } - AK_FORCE_INLINE int getFlags() const { - return mBigramFlags; - } - private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy; int mPos; - BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags; int mBigramPos; + int mProbability; bool mHasNext; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp index 0e8d72f2e..15f2aa630 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/core/dictionary/binary_dictionary_format_utils.h" +#include "suggest/core/dictionary/byte_array_utils.h" + namespace latinime { /** @@ -27,7 +29,6 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4; /** * Format versions */ - // The versions of Latin IME that only handle format version 1 only test for the magic // number, so we had to change it so that version 2 files would be rejected by older // implementations. On this occasion, we made the magic number 32 bits long. diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h index 830684c70..62c7376e1 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp index 91c643a5f..16f534c21 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp @@ -17,7 +17,6 @@ #include "suggest/core/dictionary/binary_dictionary_header.h" #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" namespace latinime { @@ -26,16 +25,15 @@ const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY = const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; -BinaryDictionaryHeader::BinaryDictionaryHeader( - const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo), - mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(binaryDictionaryInfo)), - mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(binaryDictionaryInfo)), +BinaryDictionaryHeader::BinaryDictionaryHeader(const uint8_t *const dictBuf) + : mDictBuf(dictBuf), + mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(mDictBuf)), + mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(mDictBuf)), mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} float BinaryDictionaryHeader::readMultiWordCostMultiplier() const { const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt( - mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY); + mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY); if (headerValue == S_INT_MIN) { // not found return DEFAULT_MULTI_WORD_COST_MULTIPLIER; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h index 240512bce..4d9295229 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h @@ -17,6 +17,8 @@ #ifndef LATINIME_BINARY_DICTIONARY_HEADER_H #define LATINIME_BINARY_DICTIONARY_HEADER_H +#include <stdint.h> + #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" @@ -28,9 +30,10 @@ class BinaryDictionaryInfo; * This class abstracts dictionary header structures and provide interface to access dictionary * header information. */ +// TODO:: Move header classes to policyimpl. class BinaryDictionaryHeader { public: - explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo); + explicit BinaryDictionaryHeader(const uint8_t *const dictBuf); AK_FORCE_INLINE int getSize() const { return mSize; @@ -60,7 +63,7 @@ class BinaryDictionaryHeader { outValue[0] = '\0'; return; } - if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo, + if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mDictBuf, key, outValue, outValueSize)) { outValue[0] = '?'; outValue[1] = '\0'; @@ -74,7 +77,7 @@ class BinaryDictionaryHeader { static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER; static const float MULTI_WORD_COST_MULTIPLIER_SCALE; - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const uint8_t *const mDictBuf; const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags; const int mSize; const float mMultiWordCostMultiplier; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp index a57b0f859..bcf0e612c 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp @@ -20,12 +20,11 @@ #include <cstdlib> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; - const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; @@ -43,68 +42,54 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags const BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; -/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { - case HEADER_VERSION_2: - // See the format of the header in the comment in - // BinaryDictionaryFormatUtils::detectFormatVersion() - return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), - VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE); - default: - return S_INT_MAX; - } +/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { + // See the format of the header in the comment in + // BinaryDictionaryFormatUtils::detectFormatVersion() + return ByteArrayUtils::readUint32(dictBuf, + VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + + VERSION_2_HEADER_FLAG_SIZE); } /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags - BinaryDictionaryHeaderReadingUtils::getFlags( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { - case HEADER_VERSION_2: - return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), - VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); - default: - return NO_FLAGS; - } + BinaryDictionaryHeaderReadingUtils::getFlags(const uint8_t *const dictBuf) { + return ByteArrayUtils::readUint16(dictBuf, VERSION_2_HEADER_MAGIC_NUMBER_SIZE + + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); } // Returns if the key is found or not and reads the found value into outValue. -/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( - const BinaryDictionaryInfo *const binaryDictionaryInfo, +/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf, const char *const key, int *outValue, const int outValueSize) { if (outValueSize <= 0) { return false; } - const int headerSize = getHeaderSize(binaryDictionaryInfo); - int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); + const int headerSize = getHeaderSize(dictBuf); + int pos = getHeaderOptionsPosition(); if (pos == NOT_A_DICT_POS) { // The header doesn't have header options. return false; } while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( - binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { + dictBuf, key, headerSize - pos, &pos) == 0) { // The key was found. const int length = ByteArrayUtils::readStringAndAdvancePosition( - binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos); + dictBuf, outValueSize, outValue, &pos); // Add a 0 terminator to the string. outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; return true; } - ByteArrayUtils::advancePositionToBehindString( - binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos); + ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos); } // The key was not found. return false; } /* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) { + const uint8_t *const dictBuf, const char *const key) { const int bufferSize = LARGEST_INT_DIGIT_COUNT; int intBuffer[bufferSize]; char charBuffer[bufferSize]; - if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) { + if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) { return S_INT_MIN; } for (int i = 0; i < bufferSize; ++i) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h index 61748227e..deae9be27 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h @@ -20,21 +20,19 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" namespace latinime { -class BinaryDictionaryInfo; - +// TODO:: Move header classes to policyimpl. class BinaryDictionaryHeaderReadingUtils { public: typedef uint16_t DictionaryFlags; static const int MAX_OPTION_KEY_LENGTH; - static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo); + static int getHeaderSize(const uint8_t *const dictBuf); - static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo); + static DictionaryFlags getFlags(const uint8_t *const dictBuf); static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) { return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0; @@ -48,33 +46,19 @@ class BinaryDictionaryHeaderReadingUtils { return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; } - static AK_FORCE_INLINE int getHeaderOptionsPosition( - const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { - switch (getHeaderVersion(dictionaryFormat)) { - case HEADER_VERSION_2: - return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; - break; - default: - return NOT_A_DICT_POS; - } + static AK_FORCE_INLINE int getHeaderOptionsPosition() { + return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; } - static bool readHeaderValue( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + static bool readHeaderValue(const uint8_t *const dictBuf, const char *const key, int *outValue, const int outValueSize); - static int readHeaderValueInt( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key); + static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); - enum HEADER_VERSION { - HEADER_VERSION_2, - UNKNOWN_HEADER_VERSION - }; - static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; static const int VERSION_2_HEADER_FLAG_SIZE; @@ -88,18 +72,6 @@ class BinaryDictionaryHeaderReadingUtils { static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; - - static HEADER_VERSION getHeaderVersion( - const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) { - switch(formatVersion) { - case BinaryDictionaryFormatUtils::VERSION_2: - // Fall through - case BinaryDictionaryFormatUtils::VERSION_3: - return HEADER_VERSION_2; - default: - return UNKNOWN_HEADER_VERSION; - } - } }; } #endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index cbea18f90..818b2af56 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -23,7 +23,7 @@ #include "jni.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "utils/log_utils.h" namespace latinime { @@ -36,12 +36,17 @@ class BinaryDictionaryInfo { mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictBuf, mDictSize)), - mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), - mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( - mDictionaryFormat)) { + mDictionaryHeader(dictBuf), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), + // TODO: Remove. + mStructurePolicy(DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructurePolicy(this)) { logDictionaryInfo(env); } + ~BinaryDictionaryInfo() { + delete mStructurePolicy; + } + AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; } @@ -66,6 +71,7 @@ class BinaryDictionaryInfo { return mDictionaryFormat; } + // TODO: Move to DictionaryStructurePolicy. AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { return &mDictionaryHeader; } @@ -76,7 +82,8 @@ class BinaryDictionaryInfo { return mIsUpdatable && isUpdatableDictionaryFormat; } - AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const { + // TODO: remove + AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const { return mStructurePolicy; } @@ -89,9 +96,12 @@ class BinaryDictionaryInfo { const int mDictBufOffset; const bool mIsUpdatable; const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; + // TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the + // DictionaryStructurePolicy. const BinaryDictionaryHeader mDictionaryHeader; const uint8_t *const mDictRoot; - const DictionaryStructurePolicy *const mStructurePolicy; + // TODO: remove + const DictionaryStructureWithBufferPolicy *const mStructurePolicy; AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { const int BUFFER_SIZE = 16; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h new file mode 100644 index 000000000..558e0a5c3 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_shortcut_iterator.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H +#define LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" + +namespace latinime { + +class BinaryDictionaryShortcutIterator { + public: + BinaryDictionaryShortcutIterator( + const DictionaryShortcutsStructurePolicy *const shortcutStructurePolicy, + const int shortcutPos) + : mShortcutStructurePolicy(shortcutStructurePolicy), + mPos(shortcutStructurePolicy->getStartPos(shortcutPos)), + mHasNextShortcutTarget(shortcutPos != NOT_A_DICT_POS) {} + + AK_FORCE_INLINE bool hasNextShortcutTarget() const { + return mHasNextShortcutTarget; + } + + // Gets the shortcut target itself as an int string and put it to outTarget, put its length + // to outTargetLength, put whether it is whitelist to outIsWhitelist. + AK_FORCE_INLINE void nextShortcutTarget( + const int maxDepth, int *const outTarget, int *const outTargetLength, + bool *const outIsWhitelist) { + mShortcutStructurePolicy->getNextShortcut(maxDepth, outTarget, outTargetLength, + outIsWhitelist, &mHasNextShortcutTarget, &mPos); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryShortcutIterator); + + const DictionaryShortcutsStructurePolicy *const mShortcutStructurePolicy; + int mPos; + bool mHasNextShortcutTarget; +}; +} // namespace latinime +#endif // LATINIME_BINARY_DICTIONARY_SHORTCUT_ITERATOR_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h deleted file mode 100644 index 375fc7dff..000000000 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H -#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/byte_array_utils.h" - -namespace latinime { - -class BinaryDictionaryTerminalAttributesReadingUtils { - public: - typedef uint8_t TerminalAttributeFlags; - typedef TerminalAttributeFlags BigramFlags; - typedef TerminalAttributeFlags ShortcutFlags; - - static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); - } - - static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { - return flags & MASK_ATTRIBUTE_PROBABILITY; - } - - static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) { - return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; - } - - // Bigrams reading methods - static AK_FORCE_INLINE void skipExistingBigrams( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); - while (hasNext(flags)) { - *pos += attributeAddressSize(flags); - flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); - } - *pos += attributeAddressSize(flags); - } - - static int getBigramAddressAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, - int *const pos); - - // Shortcuts reading methods - // This method returns the size of the shortcut list region excluding the shortcut list size - // field at the beginning. - static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. - return ByteArrayUtils::readUint16AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE; - } - - static AK_FORCE_INLINE void skipShortcuts( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - const int shortcutListSize = getShortcutListSizeAndForwardPointer( - binaryDictionaryInfo, pos); - *pos += shortcutListSize; - } - - static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) { - return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; - } - - static AK_FORCE_INLINE int readShortcutTarget( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength, - int *const outWord, int *const pos) { - return ByteArrayUtils::readStringAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos); - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils); - - static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; - static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT; - static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY; - static const int ATTRIBUTE_ADDRESS_SHIFT; - static const int SHORTCUT_LIST_SIZE_FIELD_SIZE; - static const int WHITELIST_SHORTCUT_PROBABILITY; - - static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) { - return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; - } - - static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) { - return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; - /* Note: this is a value-dependant optimization of what may probably be - more readably written this way: - switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) { - case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3; - default: return 0; - } - */ - } -}; -} -#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 4a9e38fe8..891b80331 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -83,14 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - const DictionaryStructurePolicy *const structurePolicy = + const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo.getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length, + int pos = structurePolicy->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == pos) { return NOT_A_PROBABILITY; } - return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos); + return structurePolicy->getUnigramProbability(pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index d5eafe1bf..97d4cd161 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/probability_utils.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/hash_map_compat.h" namespace latinime { @@ -38,7 +38,7 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. - int getBigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, const int wordPosition, const int nextWordPosition, const int unigramProbability) { hash_map_compat<int, BigramMap>::const_iterator mapPosition = mBigramMaps.find(wordPosition); @@ -46,12 +46,12 @@ class MultiBigramMap { return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability); } if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { - addBigramsForWordPosition(binaryDictionaryInfo, wordPosition); + addBigramsForWordPosition(structurePolicy, wordPosition); return mBigramMaps[wordPosition].getBigramProbability( nextWordPosition, unigramProbability); } - return readBigramProbabilityFromBinaryDictionary(binaryDictionaryInfo, - wordPosition, nextWordPosition, unigramProbability); + return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, + nextWordPosition, unigramProbability); } void clear() { @@ -66,10 +66,11 @@ class MultiBigramMap { BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} ~BigramMap() {} - void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) { - const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> - getBigramsPositionOfNode(binaryDictionaryInfo, nodePos); - BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); + void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int nodePos) { + const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); @@ -100,16 +101,16 @@ class MultiBigramMap { }; AK_FORCE_INLINE void addBigramsForWordPosition( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int position) { - mBigramMaps[position].init(binaryDictionaryInfo, position); + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { + mBigramMaps[position].init(structurePolicy, position); } AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { - const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> - getBigramsPositionOfNode(binaryDictionaryInfo, nodePos); - BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); + const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h index 3c2180937..461d7b454 100644 --- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -19,21 +19,20 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" -#include "suggest/core/dictionary/terminal_attributes.h" +#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" namespace latinime { class ShortcutUtils { public: - static int outputShortcuts(const TerminalAttributes *const terminalAttributes, + static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt, int outputWordIndex, const int finalScore, int *const outputCodePoints, int *const frequencies, int *const outputTypes, const bool sameAsTyped) { - TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); int shortcutTarget[MAX_WORD_LENGTH]; - while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { + while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { bool isWhilelist; int shortcutTargetStringLength; - iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget, + shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget, &shortcutTargetStringLength, &isWhilelist); int shortcutScore; int kind; diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h deleted file mode 100644 index 0da6504eb..000000000 --- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_TERMINAL_ATTRIBUTES_H -#define LATINIME_TERMINAL_ATTRIBUTES_H - -#include <stdint.h> - -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" - -namespace latinime { - -/** - * This class encapsulates information about a terminal that allows to - * retrieve local node attributes like the list of shortcuts without - * exposing the format structure to the client. - */ -class TerminalAttributes { - public: - class ShortcutIterator { - public: - ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int shortcutPos, const bool hasShortcutList) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos), - mHasNextShortcutTarget(hasShortcutList) {} - - inline bool hasNextShortcutTarget() const { - return mHasNextShortcutTarget; - } - - // Gets the shortcut target itself as an int string and put it to outTarget, put its length - // to outTargetLength, put whether it is whitelist to outIsWhitelist. - AK_FORCE_INLINE void nextShortcutTarget( - const int maxDepth, int *const outTarget, int *const outTargetLength, - bool *const outIsWhitelist) { - const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags = - BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( - mBinaryDictionaryInfo, &mPos); - mHasNextShortcutTarget = - BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); - if (outIsWhitelist) { - *outIsWhitelist = - BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags); - } - if (outTargetLength) { - *outTargetLength = - BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget( - mBinaryDictionaryInfo, maxDepth, outTarget, &mPos); - } - } - - private: - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - int mPos; - bool mHasNextShortcutTarget; - }; - - TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int shortcutPos) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {} - - inline ShortcutIterator getShortcutIterator() const { - int shortcutPos = mShortcutListSizePos; - const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS; - if (hasShortcutList) { - BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer( - mBinaryDictionaryInfo, &shortcutPos); - } - // shortcutPos is never used if hasShortcutList is false. - return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList); - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - const int mShortcutListSizePos; -}; -} // namespace latinime -#endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h new file mode 100644 index 000000000..661ef1b1a --- /dev/null +++ b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H +#define LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H + +#include "defines.h" + +namespace latinime { + +/* + * This class abstracts structure of bigrams. + */ +class DictionaryBigramsStructurePolicy { + public: + virtual ~DictionaryBigramsStructurePolicy() {} + + virtual void getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const pos) const = 0; + virtual void skipAllBigrams(int *const pos) const = 0; + + protected: + DictionaryBigramsStructurePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictionaryBigramsStructurePolicy); +}; +} // namespace latinime +#endif /* LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h new file mode 100644 index 000000000..40b6c2de1 --- /dev/null +++ b/native/jni/src/suggest/core/policy/dictionary_shortcuts_structure_policy.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H +#define LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H + +#include "defines.h" + +namespace latinime { + +/* + * This class abstracts structure of shortcuts. + */ +class DictionaryShortcutsStructurePolicy { + public: + virtual ~DictionaryShortcutsStructurePolicy() {} + + virtual int getStartPos(const int pos) const = 0; + + virtual void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const = 0; + + virtual void skipAllShortcuts(int *const pos) const = 0; + + protected: + DictionaryShortcutsStructurePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictionaryShortcutsStructurePolicy); +}; +} // namespace latinime +#endif /* LATINIME_DICTIONARY_SHORTCUTS_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index cc14c982c..6c97067cf 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -21,15 +21,16 @@ namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeVector; +class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; /* * This class abstracts structure of dictionaries. * Implement this policy to support additional dictionaries. */ -class DictionaryStructurePolicy { +class DictionaryStructureWithBufferPolicy { public: // This provides a filtering method for filtering new node. class NodeFilter { @@ -44,36 +45,35 @@ class DictionaryStructurePolicy { DISALLOW_COPY_AND_ASSIGN(NodeFilter); }; + virtual ~DictionaryStructureWithBufferPolicy() {} + virtual int getRootPosition() const = 0; virtual void createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + virtual int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; - virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const = 0; + virtual int getUnigramProbability(const int nodePos) const = 0; + + virtual int getShortcutPositionOfNode(const int nodePos) const = 0; + + virtual int getBigramsPositionOfNode(const int nodePos) const = 0; - virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const = 0; + virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; - virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const = 0; + virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; protected: - DictionaryStructurePolicy() {} - virtual ~DictionaryStructurePolicy() {} + DictionaryStructureWithBufferPolicy() {} private: - DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy); + DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy); }; } // namespace latinime #endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 7651b19a0..11a147bda 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -37,12 +37,12 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */); + prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (mPrevWordPos == NOT_A_VALID_WORD_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */); + prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index de57e041a..5c4cef02d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -75,7 +75,7 @@ class DicTraverseSession { const int maxPointerCount); void resetCache(const int nextActiveCacheSize, const int maxWords); - // TODO: Remove + // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; //-------------------- diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 9376d7b93..bc1f25d3e 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -19,11 +19,12 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" +// TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. #include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/shortcut_utils.h" -#include "suggest/core/dictionary/terminal_attributes.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/policy/scoring.h" #include "suggest/core/policy/traversal.h" @@ -211,15 +212,15 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen } if (!terminalDicNode->hasMultipleWords()) { - const BinaryDictionaryInfo *const binaryDictionaryInfo = - traverseSession->getBinaryDictionaryInfo(); - const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), - binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode( - binaryDictionaryInfo, terminalDicNode->getPos())); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + traverseSession->getBinaryDictionaryInfo()->getStructurePolicy(); + BinaryDictionaryShortcutIterator shortcutIt( + structurePolicy->getShortcutsStructurePolicy(), + structurePolicy->getShortcutPositionOfNode(terminalDicNode->getPos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); - outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, + outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); } DicNode::managedDelete(terminalDicNode); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h new file mode 100644 index 000000000..beb9bee27 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_LIST_POLICY_H +#define LATINIME_BIGRAM_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h" + +namespace latinime { + +class BigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {} + + ~BigramListPolicy() {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, + int *const pos) const { + const BigramListReadingUtils::BigramFlags flags = + BigramListReadingUtils::getFlagsAndForwardPointer(mBigramsBuf, pos); + *outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer( + mBigramsBuf, flags, pos); + *outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags); + *outHasNext = BigramListReadingUtils::hasNext(flags); + } + + void skipAllBigrams(int *const pos) const { + BigramListReadingUtils::skipExistingBigrams(mBigramsBuf, pos); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy); + + const uint8_t *const mBigramsBuf; +}; +} // namespace latinime +#endif // LATINIME_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp index 20b77b3b2..6f4fcbfc2 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.cpp @@ -14,46 +14,42 @@ * limitations under the License. */ -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { -typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils; - -const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; +const BigramListReadingUtils::BigramFlags BigramListReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = + 0x30; +const BigramListReadingUtils::BigramFlags + BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; +const BigramListReadingUtils::BigramFlags + BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; +const BigramListReadingUtils::BigramFlags + BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; +const BigramListReadingUtils::BigramFlags + BigramListReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; // Flag for presence of more attributes -const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; +const BigramListReadingUtils::BigramFlags BigramListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; // Mask for attribute probability, stored on 4 bits inside the flags byte. -const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; -const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; -const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; -// The numeric value of the shortcut probability that means 'whitelist'. -const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; +const BigramListReadingUtils::BigramFlags + BigramListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; +const int BigramListReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; -/* static */ int TaUtils::getBigramAddressAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, - int *const pos) { +/* static */ int BigramListReadingUtils::getBigramAddressAndForwardPointer( + const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) { int offset = 0; const int origin = *pos; switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - offset = ByteArrayUtils::readUint8AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + offset = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - offset = ByteArrayUtils::readUint16AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + offset = ByteArrayUtils::readUint16AndAdvancePosition(bigramsBuf, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - offset = ByteArrayUtils::readUint24AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos); break; } if (isOffsetNegative(flags)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h new file mode 100644 index 000000000..6b2bfe8c9 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_LIST_READING_UTILS_H +#define LATINIME_BIGRAM_LIST_READING_UTILS_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/dictionary/byte_array_utils.h" + +namespace latinime { + +class BigramListReadingUtils { +public: + typedef uint8_t BigramFlags; + + static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer( + const uint8_t *const bigramsBuf, int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); + } + + static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { + return flags & MASK_ATTRIBUTE_PROBABILITY; + } + + static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) { + return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; + } + + // Bigrams reading methods + static AK_FORCE_INLINE void skipExistingBigrams(const uint8_t *const bigramsBuf, + int *const pos) { + BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos); + while (hasNext(flags)) { + *pos += attributeAddressSize(flags); + flags = getFlagsAndForwardPointer(bigramsBuf, pos); + } + *pos += attributeAddressSize(flags); + } + + static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf, + const BigramFlags flags, int *const pos); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadingUtils); + + static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE; + static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; + static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; + static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; + static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; + static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT; + static const BigramFlags MASK_ATTRIBUTE_PROBABILITY; + static const int ATTRIBUTE_ADDRESS_SHIFT; + + static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) { + return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; + } + + static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) { + return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; + /* Note: this is a value-dependant optimization of what may probably be + more readably written this way: + switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) { + case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1; + case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2; + case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3; + default: return 0; + } + */ + } +}; +} // namespace latinime +#endif // LATINIME_BIGRAM_LIST_READING_UTILS_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h deleted file mode 100644 index c0df89f49..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H -#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" - -namespace latinime { - -class DictionaryStructurePolicy; - -class DictionaryStructurePolicyFactory { - public: - static const DictionaryStructurePolicy *getDictionaryStructurePolicy( - const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { - switch (dictionaryFormat) { - case BinaryDictionaryFormatUtils::VERSION_2: - return PatriciaTriePolicy::getInstance(); - case BinaryDictionaryFormatUtils::VERSION_3: - return DynamicPatriciaTriePolicy::getInstance(); - default: - ASSERT(false); - return 0; - } - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory); -}; -} // namespace latinime -#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp new file mode 100644 index 000000000..324992a48 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" + +namespace latinime { + +/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructurePolicy( + const BinaryDictionaryInfo *const binaryDictionaryInfo) { + switch (binaryDictionaryInfo->getFormat()) { + case BinaryDictionaryFormatUtils::VERSION_2: + return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot()); + case BinaryDictionaryFormatUtils::VERSION_3: + return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot()); + default: + ASSERT(false); + return 0; + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h new file mode 100644 index 000000000..95f82aabe --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H +#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H + +#include "defines.h" + +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" + +namespace latinime { + +class BinaryDictionaryInfo; + +class DictionaryStructureWithBufferPolicyFactory { + public: + static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy( + const BinaryDictionaryInfo *const binaryDictionaryInfo); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); +}; +} // namespace latinime +#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 7ac635a00..77a85c86d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -16,48 +16,46 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - const uint8_t *const dictRoot = mBinaryDictionaryInfo->getDictRoot(); int pos = nodePos; - mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); const int parentPos = - DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos); + DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); + mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); } else { mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( - dictRoot, mFlags, MAX_WORD_LENGTH, &pos); + mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); } if (isTerminal()) { - mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } else { mProbability = NOT_A_PROBABILITY; } if (hasChildren()) { mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictRoot, mFlags, &pos); + mDictRoot, mFlags, &pos); } else { mChildrenPos = NOT_A_DICT_POS; } if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { mShortcutPos = pos; - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); + mShortcutsPolicy->skipAllShortcuts(&pos); } else { mShortcutPos = NOT_A_DICT_POS; } if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { mBigramPos = pos; - BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( - mBinaryDictionaryInfo, &pos); + mBigramsPolicy->skipAllBigrams(&pos); } else { mBigramPos = NOT_A_DICT_POS; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 71558edaa..e990809e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -17,13 +17,16 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H +#include <stdint.h> + #include "defines.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { -class BinaryDictionaryInfo; +class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved @@ -31,12 +34,14 @@ class BinaryDictionaryInfo; */ class DynamicPatriciaTrieNodeReader { public: - explicit DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mNodePos(NOT_A_VALID_WORD_POS), - mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), - mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_VALID_WORD_POS) {} + DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, + const DictionaryBigramsStructurePolicy *const bigramsPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) + : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), + mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), + mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} ~DynamicPatriciaTrieNodeReader() {} @@ -116,7 +121,10 @@ class DynamicPatriciaTrieNodeReader { private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + // TODO: Consolidate mDictRoot. + const uint8_t *const mDictRoot; + const DictionaryBigramsStructurePolicy *const mBigramsPolicy; + const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; int mNodePos; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; int mParentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 3df505688..7d3b2e28e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -19,30 +19,28 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { -const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance; // To avoid infinite loop caused by invalid or malicious forward links. const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), &nextPos); + mDictRoot, &nextPos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. @@ -64,13 +62,11 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d } nextPos = nodeReader.getSiblingNodePos(); } - nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( - binaryDictionaryInfo->getDictRoot(), nextPos); + nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos); } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos)); } int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { if (nodePos == NOT_A_VALID_WORD_POS) { @@ -83,7 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); @@ -118,8 +115,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun return codePointCount; } -int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, +int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int searchCodePoints[length]; for (int i = 0; i < length; ++i) { @@ -128,14 +124,15 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); while (currentLength <= length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), &pos); + mDictRoot, &pos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. @@ -183,7 +180,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( // If the matched node is not found in the current node group, try to follow the // forward link. pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( - binaryDictionaryInfo->getDictRoot(), pos); + mDictRoot, pos); } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos)); if (!foundMatchedNode) { // Matched node is not found. @@ -195,12 +192,12 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( return NOT_A_VALID_WORD_POS; } -int DynamicPatriciaTriePolicy::getUnigramProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { +int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; @@ -208,13 +205,12 @@ int DynamicPatriciaTriePolicy::getUnigramProbability( return nodeReader.getProbability(); } -int DynamicPatriciaTriePolicy::getShortcutPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { +int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; @@ -222,13 +218,12 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode( return nodeReader.getShortcutPos(); } -int DynamicPatriciaTriePolicy::getBigramsPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { +int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), + getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 6a7977138..caca36977 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -17,54 +17,61 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H +#include <stdint.h> + #include "defines.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeVector; -class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy { +class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - static AK_FORCE_INLINE const DynamicPatriciaTriePolicy *getInstance() { - return &sInstance; - } + DynamicPatriciaTriePolicy(const uint8_t *const dictRoot) + : mDictRoot(dictRoot), mBigramListPolicy(dictRoot), mShortcutListPolicy(dictRoot) {} + + ~DynamicPatriciaTriePolicy() {} AK_FORCE_INLINE int getRootPosition() const { return 0; } void createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; - int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getUnigramProbability(const int nodePos) const; - int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getShortcutPositionOfNode(const int nodePos) const; - int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getBigramsPositionOfNode(const int nodePos) const; + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return &mBigramListPolicy; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return &mShortcutListPolicy; + } private: - DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTriePolicy); - static const DynamicPatriciaTriePolicy sInstance; + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - DynamicPatriciaTriePolicy() {} - ~DynamicPatriciaTriePolicy() {} + // TODO: Consolidate mDictRoot. + const uint8_t *const mDictRoot; + const BigramListPolicy mBigramListPolicy; + const ShortcutListPolicy mShortcutListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 097f7c86a..8ce2b3ea0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -20,55 +20,44 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/policyimpl/dictionary/binary_format.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { -const PatriciaTriePolicy PatriciaTriePolicy::sInstance; - void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } int nextPos = dicNode->getChildrenPos(); const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), &nextPos); + mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { - nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, - nodeFilter, childDicNodes); + nextPos = createAndGetLeavingChildNode(dicNode, nextPos, nodeFilter, childDicNodes); } } int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount( - binaryDictionaryInfo->getDictRoot(), nodePos, + return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(mDictRoot, nodePos, maxCodePointCount, outCodePoints, outUnigramProbability); } -int PatriciaTriePolicy::getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, +int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord, + return BinaryFormat::getTerminalPosition(mDictRoot, inWord, length, forceLowerCaseSearch); } -int PatriciaTriePolicy::getUnigramProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { +int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); if (!PatriciaTrieReadingUtils::isTerminal(flags)) { return NOT_A_PROBABILITY; } @@ -79,81 +68,73 @@ int PatriciaTriePolicy::getUnigramProbability( // for shortcuts). return NOT_A_PROBABILITY; } - PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); - return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); + return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } -int PatriciaTriePolicy::getShortcutPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { +int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { return NOT_A_DICT_POS; } - PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } return pos; } -int PatriciaTriePolicy::getBigramsPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { +int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { return NOT_A_DICT_POS; } - PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); + mShortcutListPolicy.skipAllShortcuts(&pos);; } return pos; } int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, - const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo, - const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const { - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); + const int nodePos, const NodeFilter *const childrenFilter, + DicNodeVector *childDicNodes) const { int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int mergedNodeCodePoints[MAX_WORD_LENGTH]; const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); + mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos) + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos) : NOT_A_PROBABILITY; const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictRoot, flags, &pos) : NOT_A_DICT_POS; + mDictRoot, flags, &pos) : NOT_A_DICT_POS; if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); + getShortcutsStructurePolicy()->skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( - binaryDictionaryInfo, &pos); + getBigramsStructurePolicy()->skipAllBigrams(&pos); } if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 71f256eee..51b5b9af9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -17,52 +17,61 @@ #ifndef LATINIME_PATRICIA_TRIE_POLICY_H #define LATINIME_PATRICIA_TRIE_POLICY_H +#include <stdint.h> + #include "defines.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" namespace latinime { -class PatriciaTriePolicy : public DictionaryStructurePolicy { +class DicNode; +class DicNodeVector; + +class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() { - return &sInstance; - } + PatriciaTriePolicy(const uint8_t *const dictRoot) + : mDictRoot(dictRoot), mBigramListPolicy(dictRoot), mShortcutListPolicy(dictRoot) {} + + ~PatriciaTriePolicy() {} AK_FORCE_INLINE int getRootPosition() const { return 0; } void createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; - int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getUnigramProbability(const int nodePos) const; - int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getShortcutPositionOfNode(const int nodePos) const; - int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getBigramsPositionOfNode(const int nodePos) const; + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return &mBigramListPolicy; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return &mShortcutListPolicy; + } private: - DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy); - static const PatriciaTriePolicy sInstance; + DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); - PatriciaTriePolicy() {} - ~PatriciaTriePolicy() {} + const uint8_t *const mDictRoot; + const BigramListPolicy mBigramListPolicy; + const ShortcutListPolicy mShortcutListPolicy; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h new file mode 100644 index 000000000..3c7fab033 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_LIST_POLICY_H +#define LATINIME_SHORTCUT_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" + +namespace latinime { + +class ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { + public: + explicit ShortcutListPolicy(const uint8_t *const shortcutBuf) + : mShortcutsBuf(shortcutBuf) {} + + ~ShortcutListPolicy() {} + + int getStartPos(const int pos) const { + int listPos = pos; + ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mShortcutsBuf, &listPos); + return listPos; + } + + void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const { + const ShortcutListReadingUtils::ShortcutFlags flags = + ShortcutListReadingUtils::getFlagsAndForwardPointer(mShortcutsBuf, pos); + if (outHasNext) { + *outHasNext = ShortcutListReadingUtils::hasNext(flags); + } + if (outIsWhitelist) { + *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags); + } + if (outCodePoint) { + *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget( + mShortcutsBuf, maxCodePointCount, outCodePoint, pos); + } + } + + void skipAllShortcuts(int *const pos) const { + const int shortcutListSize = ShortcutListReadingUtils + ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos); + *pos += shortcutListSize; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListPolicy); + + const uint8_t *const mShortcutsBuf; +}; +} // namespace latinime +#endif // LATINIME_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp new file mode 100644 index 000000000..e70bb5071 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" + +namespace latinime { + +// Flag for presence of more attributes +const ShortcutListReadingUtils::ShortcutFlags + ShortcutListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; +// Mask for attribute probability, stored on 4 bits inside the flags byte. +const ShortcutListReadingUtils::ShortcutFlags + ShortcutListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; +const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; +// The numeric value of the shortcut probability that means 'whitelist'. +const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h new file mode 100644 index 000000000..e92fa5f9f --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H +#define LATINIME_SHORTCUT_LIST_READING_UTILS_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/dictionary/byte_array_utils.h" + +namespace latinime { + +class ShortcutListReadingUtils { + public: + typedef uint8_t ShortcutFlags; + + static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer( + const uint8_t *const dictRoot, int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); + } + + static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) { + return flags & MASK_ATTRIBUTE_PROBABILITY; + } + + static AK_FORCE_INLINE bool hasNext(const ShortcutFlags flags) { + return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; + } + + // This method returns the size of the shortcut list region excluding the shortcut list size + // field at the beginning. + static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( + const uint8_t *const dictRoot, int *const pos) { + // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. + return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos) + - SHORTCUT_LIST_SIZE_FIELD_SIZE; + } + + static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) { + const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos); + *pos += shortcutListSize; + } + + static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) { + return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; + } + + static AK_FORCE_INLINE int readShortcutTarget( + const uint8_t *const dictRoot, const int maxLength, int *const outWord, + int *const pos) { + return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils); + + static const ShortcutFlags FLAG_ATTRIBUTE_HAS_NEXT; + static const ShortcutFlags MASK_ATTRIBUTE_PROBABILITY; + static const int SHORTCUT_LIST_SIZE_FIELD_SIZE; + static const int WHITELIST_SHORTCUT_PROBABILITY; +}; +} // namespace latinime +#endif // LATINIME_SHORTCUT_LIST_READING_UTILS_H |