diff options
author | 2013-11-05 16:00:26 +0900 | |
---|---|---|
committer | 2013-11-05 16:00:26 +0900 | |
commit | 9b3e59d6444d54c6835369e939794c1c1e49b00d (patch) | |
tree | 723db342e23ad095a958c27a9c19b17f63fc0099 /native/jni/src | |
parent | 3bd64441a6e6adae613fc57c6a545cd3cf48d043 (diff) | |
download | latinime-9b3e59d6444d54c6835369e939794c1c1e49b00d.tar.gz latinime-9b3e59d6444d54c6835369e939794c1c1e49b00d.tar.xz latinime-9b3e59d6444d54c6835369e939794c1c1e49b00d.zip |
Implement ver4 bigram reading method.
Bug: 11073222
Change-Id: I7b3408938f304da361201892e0a1342fdf92e62e
Diffstat (limited to 'native/jni/src')
10 files changed, 173 insertions, 16 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h new file mode 100644 index 000000000..875a0ff9b --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H +#define LATINIME_VER4_BIGRAM_LIST_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" + +namespace latinime { + +class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent, + const TerminalPositionLookupTable *const terminalPositionLookupTable) + : mBigramDictContent(bigramDictContent), + mTerminalPositionLookupTable(terminalPositionLookupTable) {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + int bigramFlags = 0; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId, + bigramEntryPos); + if (outProbability) { + *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); + } + if (outHasNext) { + *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); + } + if (outBigramPos) { + // Lookup target PtNode position. + *outBigramPos = + mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); + } + } + + void skipAllBigrams(int *const pos) const { + // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); + + const BigramDictContent *const mBigramDictContent; + const TerminalPositionLookupTable *const mTerminalPositionLookupTable; +}; +} // namespace latinime +#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h new file mode 100644 index 000000000..634c1f08e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_DICT_CONTENT_H +#define LATINIME_BIGRAM_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +class BigramDictContent : public SparseTableDictContent { + public: + BigramDictContent(const char *const dictDirPath, const bool isUpdatable) + : SparseTableDictContent(dictDirPath, + Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + + void getBigramEntryAndAdvancePosition(int *const outBigramFlags, + int *const outTargetTerminalId, int *const bigramEntryPos) const { + const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); + if (outBigramFlags) { + *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos); + } + if (outTargetTerminalId) { + *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + } + } + + // Returns head position of bigram list for a PtNode specified by terminalId. + int getBigramListHeadPos(const int terminalId) const { + const SparseTable *const addressLookupTable = getAddressLookupTable(); + if (!addressLookupTable->contains(terminalId)) { + return NOT_A_DICT_POS; + } + return addressLookupTable->get(terminalId); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent); +}; +} // namespace latinime +#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 4836d8688..71868e9ca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -58,6 +58,15 @@ class SparseTableDictContent : public DictContent { && mContentBuffer.get() != 0; } + protected: + const SparseTable *getAddressLookupTable() const { + return &mAddressLookupTable; + } + + const BufferWithExtendableBuffer *getContentBuffer() const { + return &mExpandableContentBuffer; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index b12ab58a7..173d0da05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -29,11 +29,14 @@ namespace latinime { class TerminalPositionLookupTable : public SingleDictContent { public: - TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable) + // TODO: Quit using headerRegionSize. + TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable, + const int headerRegionSize) : SingleDictContent(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable), mSize(getBuffer()->getTailPosition() - / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {} + / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE), + mHeaderRegionSize(headerRegionSize) {} int getTerminalPtNodePosition(const int terminalId) const { if (terminalId < 0 || terminalId >= mSize) { @@ -41,13 +44,14 @@ class TerminalPositionLookupTable : public SingleDictContent { } const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, - readingPos); + readingPos) - mHeaderRegionSize; } private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); const int mSize; + const int mHeaderRegionSize; }; } // namespace latinime #endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 7c0c83c3e..4e10403f3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -18,6 +18,8 @@ #define LATINIME_VER4_DICT_BUFFER_H #include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" @@ -52,24 +54,29 @@ class Ver4DictBuffers { return mDictBuffer.get()->getBufferSize(); } + AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const { + return &mTerminalPositionLookupTable; + } + AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { return &mProbabilityDictContent; } + AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { + return &mBigramDictContent; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath, const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable) : mDictBuffer(dictBuffer), - mTerminalPositionLookupTable(dictDirPath, isUpdatable), + // TODO: Quit using getHeaderSize. + mTerminalPositionLookupTable(dictDirPath, isUpdatable, + HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), mProbabilityDictContent(dictDirPath, isUpdatable), - mBigramDictContent(dictDirPath, - Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), + mBigramDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, @@ -80,7 +87,7 @@ class Ver4DictBuffers { const MmappedBuffer::MmappedBufferPtr mDictBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; ProbabilityDictContent mProbabilityDictContent; - SparseTableDictContent mBigramDictContent; + BigramDictContent mBigramDictContent; SparseTableDictContent mShortcutDictContent; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 6b3a749b2..fb29c0c4a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -40,4 +40,7 @@ const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; +const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; +const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1; + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 3801f594d..a0bebb75f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -43,6 +43,9 @@ class Ver4DictConstants { static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; + + static const int BIGRAM_FRAGS_FIELD_SIZE; + static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 33f738413..43ad301db 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -119,7 +119,8 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return ptNodeParams.getTerminalId(); + return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos( + ptNodeParams.getTerminalId()); } bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 2f577f741..d0be77d0b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" @@ -38,6 +39,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mBigramPolicy(mBuffers.get()->getBigramDictContent(), + mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; AK_FORCE_INLINE int getRootPosition() const { @@ -67,7 +70,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return 0; + return &mBigramPolicy; } const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { @@ -97,6 +100,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mDictBuffer; + const Ver4BigramListPolicy mBigramPolicy; Ver4PatriciaTrieNodeReader mNodeReader; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp index 93ec70c1e..2678b8c7b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp @@ -21,11 +21,10 @@ namespace latinime { const int SparseTable::NOT_EXIST = -1; bool SparseTable::contains(const int id) const { - if (id < 0 || mIndexTableBuffer->getTailPosition() <= id * mDataSize) { + const int readingPos = id / mBlockSize * mDataSize; + if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) { return false; } - const int indexTableIndex = id / mBlockSize; - const int readingPos = indexTableIndex * mDataSize; const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); return index != NOT_EXIST; } |