1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
|
/*
* Copyright (C) 2014, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_TRIE_MAP_H
#define LATINIME_TRIE_MAP_H
#include <climits>
#include <cstdint>
#include <cstdio>
#include <vector>
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/byte_array_view.h"
namespace latinime {
/**
* Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
* key is int and value is uint64_t.
* This supports multiple level map. Terminal entries can have a bitmap for the next level map.
* This doesn't support root map resizing.
*/
class TrieMap {
public:
struct Result {
const uint64_t mValue;
const bool mIsValid;
const int mNextLevelBitmapEntryIndex;
Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
: mValue(value), mIsValid(isValid),
mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
};
/**
* Struct to record iteration state in a table.
*/
struct TableIterationState {
int mTableSize;
int mTableIndex;
int mCurrentIndex;
TableIterationState(const int tableSize, const int tableIndex)
: mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
};
class TrieMapRange;
class TrieMapIterator {
public:
class IterationResult {
public:
IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
const int nextLeveBitmapEntryIndex)
: mTrieMap(trieMap), mKey(key), mValue(value),
mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
const TrieMapRange getEntriesInNextLevel() const {
return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
}
bool hasNextLevelMap() const {
return mNextLevelBitmapEntryIndex != INVALID_INDEX;
}
AK_FORCE_INLINE int key() const {
return mKey;
}
AK_FORCE_INLINE uint64_t value() const {
return mValue;
}
AK_FORCE_INLINE int getNextLevelBitmapEntryIndex() const {
return mNextLevelBitmapEntryIndex;
}
private:
const TrieMap *const mTrieMap;
const int mKey;
const uint64_t mValue;
const int mNextLevelBitmapEntryIndex;
};
TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
: mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
if (!trieMap || mBaseBitmapEntryIndex == INVALID_INDEX) {
return;
}
const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
mStateStack.emplace_back(
mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
this->operator++();
}
const IterationResult operator*() const {
return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
}
bool operator!=(const TrieMapIterator &other) const {
// Caveat: This works only for for loops.
return mIsValid || other.mIsValid;
}
const TrieMapIterator &operator++() {
const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
mValue = result.mValue;
mIsValid = result.mIsValid;
mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
return *this;
}
private:
DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
const TrieMap *const mTrieMap;
std::vector<TrieMap::TableIterationState> mStateStack;
const int mBaseBitmapEntryIndex;
int mKey;
uint64_t mValue;
bool mIsValid;
int mNextLevelBitmapEntryIndex;
};
/**
* Class to support iterating entries in TrieMap by range base for loops.
*/
class TrieMapRange {
public:
TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
: mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
TrieMapIterator begin() const {
return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
}
const TrieMapIterator end() const {
return TrieMapIterator(nullptr, INVALID_INDEX);
}
private:
DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
const TrieMap *const mTrieMap;
const int mBaseBitmapEntryIndex;
};
static const int INVALID_INDEX;
static const uint64_t MAX_VALUE;
TrieMap();
// Construct TrieMap using existing data in the memory region written by save().
TrieMap(const ReadWriteByteArrayView buffer);
void dump(const int from = 0, const int to = 0) const;
bool isNearSizeLimit() const {
return mBuffer.isNearSizeLimit();
}
int getRootBitmapEntryIndex() const {
return ROOT_BITMAP_ENTRY_INDEX;
}
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
int getNextLevelBitmapEntryIndex(const int key) {
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
}
int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
const Result getRoot(const int key) const {
return get(key, ROOT_BITMAP_ENTRY_INDEX);
}
const Result get(const int key, const int bitmapEntryIndex) const;
bool putRoot(const int key, const uint64_t value) {
return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
}
bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
const TrieMapRange getEntriesInRootLevel() const {
return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
}
const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
return TrieMapRange(this, bitmapEntryIndex);
}
bool save(FILE *const file) const;
bool remove(const int key, const int bitmapEntryIndex);
private:
DISALLOW_COPY_AND_ASSIGN(TrieMap);
/**
* Struct represents an entry.
*
* Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
* FIELD_1.
* 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
* FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
* 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
* entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
* for the key.
* FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
* 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
* lower order bytes are stored in FIELD_1.
* FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
*/
struct Entry {
const uint32_t mData0;
const uint32_t mData1;
Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
AK_FORCE_INLINE bool isBitmapEntry() const {
return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
}
AK_FORCE_INLINE bool hasTerminalLink() const {
return (mData1 & TERMINAL_LINK_FLAG) != 0;
}
// For terminal entry.
AK_FORCE_INLINE uint32_t getKey() const {
return mData0;
}
// For terminal entry.
AK_FORCE_INLINE uint32_t getValue() const {
return mData1 & VALUE_MASK;
}
// For terminal entry.
AK_FORCE_INLINE bool isValidTerminalEntry() const {
return hasTerminalLink() || ((mData1 & VALUE_MASK) != INVALID_VALUE_IN_KEY_VALUE_ENTRY);
}
// For terminal entry.
AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
return mData1 & TERMINAL_LINK_MASK;
}
// For bitmap entry.
AK_FORCE_INLINE uint32_t getBitmap() const {
return mData0;
}
// For bitmap entry.
AK_FORCE_INLINE int getTableIndex() const {
return static_cast<int>(mData1);
}
// For value entry.
AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
}
};
BufferWithExtendableBuffer mBuffer;
static const int FIELD0_SIZE;
static const int FIELD1_SIZE;
static const int ENTRY_SIZE;
static const uint32_t VALUE_FLAG;
static const uint32_t VALUE_MASK;
static const uint32_t INVALID_VALUE_IN_KEY_VALUE_ENTRY;
static const uint32_t TERMINAL_LINK_FLAG;
static const uint32_t TERMINAL_LINK_MASK;
static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
static const uint32_t LABEL_MASK;
static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
static const int ROOT_BITMAP_ENTRY_INDEX;
static const int ROOT_BITMAP_ENTRY_POS;
static const Entry EMPTY_BITMAP_ENTRY;
static const int TERMINAL_LINKED_ENTRY_COUNT;
static const int MAX_BUFFER_SIZE;
uint32_t getBitShuffledKey(const uint32_t key) const;
bool writeValue(const uint64_t value, const int terminalEntryIndex);
bool updateValue(const Entry &terminalEntry, const uint64_t value,
const int terminalEntryIndex);
bool freeTable(const int tableIndex, const int entryCount);
int allocateTable(const int entryCount);
int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
const Entry &bitmapEntry, const int level) const;
const Result getInternal(const uint32_t key, const uint32_t hashedKey,
const int bitmapEntryIndex, const int level) const;
bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
const int level);
bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
const int label);
const Result iterateNext(std::vector<TableIterationState> *const iterationState,
int *const outKey) const;
AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
return Entry(readField0(entryIndex), readField1(entryIndex));
}
// Returns whether an entry for the index is existing by testing if the index-th bit in the
// bitmap is set or not.
AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
return (bitmap & (1 << index)) != 0;
}
// Set index-th bit in the bitmap.
AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
return bitmap | (1 << index);
}
// Count set bits before index in the bitmap.
AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
return popCount(bitmap & ((1 << index) - 1));
}
// Count set bits in the bitmap.
AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
return __builtin_popcount(bitmap);
// int v = bitmap - ((bitmap >> 1) & 0x55555555);
// v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
// return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}
AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
}
AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
}
AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
return mBuffer.readUint(FIELD1_SIZE,
ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
}
AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
}
AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
}
AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
return mBuffer.writeUint(data, FIELD0_SIZE,
ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
}
AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
return mBuffer.writeUint(data, FIELD1_SIZE,
ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
}
AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
}
AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
const int entryIndex) {
return writeField0(key, entryIndex) && writeValue(value, entryIndex);
}
AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
}
AK_FORCE_INLINE int getTailEntryIndex() const {
return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
}
bool removeInner(const Entry &bitmapEntry);
};
} // namespace latinime
#endif /* LATINIME_TRIE_MAP_H */
|