rculfhash: add missing clear flag in gc
[urcu.git] / rculfhash.c
... / ...
CommitLineData
1/*
2 * rculfhash.c
3 *
4 * Userspace RCU library - Lock-Free Expandable RCU Hash Table
5 *
6 * Copyright 2010-2011 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#define _LGPL_SOURCE
24#include <stdlib.h>
25#include <errno.h>
26#include <assert.h>
27#include <stdio.h>
28#include <stdint.h>
29#include <string.h>
30
31#include <urcu.h>
32#include <urcu-call-rcu.h>
33#include <urcu/arch.h>
34#include <urcu/uatomic.h>
35#include <urcu/jhash.h>
36#include <urcu/compiler.h>
37#include <urcu/rculfhash.h>
38#include <stdio.h>
39#include <pthread.h>
40
41#define DEBUG /* Test */
42
43#ifdef DEBUG
44#define dbg_printf(args...) printf(args)
45#else
46#define dbg_printf(args...)
47#endif
48
49#define CHAIN_LEN_TARGET 1
50#define CHAIN_LEN_RESIZE_THRESHOLD 2
51
52#ifndef max
53#define max(a, b) ((a) > (b) ? (a) : (b))
54#endif
55
56/*
57 * The removed flag needs to be updated atomically with the pointer.
58 * The dummy flag does not require to be updated atomically with the
59 * pointer, but it is added as a pointer low bit flag to save space.
60 */
61#define REMOVED_FLAG (1UL << 0)
62#define DUMMY_FLAG (1UL << 1)
63#define FLAGS_MASK ((1UL << 2) - 1)
64
65struct rcu_table {
66 unsigned long size; /* always a power of 2 */
67 unsigned long resize_target;
68 int resize_initiated;
69 struct rcu_head head;
70 struct rcu_ht_node *tbl[0];
71};
72
73struct rcu_ht {
74 struct rcu_table *t; /* shared */
75 ht_hash_fct hash_fct;
76 ht_compare_fct compare_fct;
77 unsigned long hash_seed;
78 pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
79 unsigned int in_progress_resize;
80 void (*ht_call_rcu)(struct rcu_head *head,
81 void (*func)(struct rcu_head *head));
82};
83
84struct rcu_resize_work {
85 struct rcu_head head;
86 struct rcu_ht *ht;
87};
88
89/*
90 * Algorithm to reverse bits in a word by lookup table, extended to
91 * 64-bit words.
92 * Source:
93 * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
94 * Originally from Public Domain.
95 */
96
97static const uint8_t BitReverseTable256[256] =
98{
99#define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64
100#define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16)
101#define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 )
102 R6(0), R6(2), R6(1), R6(3)
103};
104#undef R2
105#undef R4
106#undef R6
107
108static
109uint8_t bit_reverse_u8(uint8_t v)
110{
111 return BitReverseTable256[v];
112}
113
114static __attribute__((unused))
115uint32_t bit_reverse_u32(uint32_t v)
116{
117 return ((uint32_t) bit_reverse_u8(v) << 24) |
118 ((uint32_t) bit_reverse_u8(v >> 8) << 16) |
119 ((uint32_t) bit_reverse_u8(v >> 16) << 8) |
120 ((uint32_t) bit_reverse_u8(v >> 24));
121}
122
123static __attribute__((unused))
124uint64_t bit_reverse_u64(uint64_t v)
125{
126 return ((uint64_t) bit_reverse_u8(v) << 56) |
127 ((uint64_t) bit_reverse_u8(v >> 8) << 48) |
128 ((uint64_t) bit_reverse_u8(v >> 16) << 40) |
129 ((uint64_t) bit_reverse_u8(v >> 24) << 32) |
130 ((uint64_t) bit_reverse_u8(v >> 32) << 24) |
131 ((uint64_t) bit_reverse_u8(v >> 40) << 16) |
132 ((uint64_t) bit_reverse_u8(v >> 48) << 8) |
133 ((uint64_t) bit_reverse_u8(v >> 56));
134}
135
136static
137unsigned long bit_reverse_ulong(unsigned long v)
138{
139#if (CAA_BITS_PER_LONG == 32)
140 return bit_reverse_u32(v);
141#else
142 return bit_reverse_u64(v);
143#endif
144}
145
146/*
147 * Algorithm to find the log2 of a 32-bit unsigned integer.
148 * source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
149 * Originally from Public Domain.
150 */
151static const char LogTable256[256] =
152{
153#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
154 -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
155 LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
156 LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
157};
158
159uint32_t log2_u32(uint32_t v)
160{
161 uint32_t t, tt;
162
163 if ((tt = (v >> 16)))
164 return (t = (tt >> 8))
165 ? 24 + LogTable256[t]
166 : 16 + LogTable256[tt];
167 else
168 return (t = (v >> 8))
169 ? 8 + LogTable256[t]
170 : LogTable256[v];
171}
172
173static
174void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth);
175
176static
177void check_resize(struct rcu_ht *ht, struct rcu_table *t,
178 uint32_t chain_len)
179{
180 if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD)
181 ht_resize_lazy(ht, t,
182 log2_u32(chain_len - CHAIN_LEN_TARGET - 1));
183}
184
185static
186struct rcu_ht_node *clear_flag(struct rcu_ht_node *node)
187{
188 return (struct rcu_ht_node *) (((unsigned long) node) & ~FLAGS_MASK);
189}
190
191static
192int is_removed(struct rcu_ht_node *node)
193{
194 return ((unsigned long) node) & REMOVED_FLAG;
195}
196
197static
198struct rcu_ht_node *flag_removed(struct rcu_ht_node *node)
199{
200 return (struct rcu_ht_node *) (((unsigned long) node) | REMOVED_FLAG);
201}
202
203static
204int is_dummy(struct rcu_ht_node *node)
205{
206 return ((unsigned long) node) & DUMMY_FLAG;
207}
208
209static
210struct rcu_ht_node *flag_dummy(struct rcu_ht_node *node)
211{
212 return (struct rcu_ht_node *) (((unsigned long) node) | DUMMY_FLAG);
213}
214
215static
216unsigned long _uatomic_max(unsigned long *ptr, unsigned long v)
217{
218 unsigned long old1, old2;
219
220 old1 = uatomic_read(ptr);
221 do {
222 old2 = old1;
223 if (old2 >= v)
224 return old2;
225 } while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2);
226 return v;
227}
228
229/*
230 * Remove all logically deleted nodes from a bucket up to a certain node key.
231 */
232static
233void _ht_gc_bucket(struct rcu_ht_node *dummy, struct rcu_ht_node *node)
234{
235 struct rcu_ht_node *iter_prev, *iter, *next, *new_next;
236
237 for (;;) {
238 iter_prev = dummy;
239 /* We can always skip the dummy node initially */
240 iter = rcu_dereference(iter_prev->p.next);
241 assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
242 for (;;) {
243 if (unlikely(!clear_flag(iter)))
244 return;
245 if (clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)
246 return;
247 next = rcu_dereference(clear_flag(iter)->p.next);
248 if (is_removed(next))
249 break;
250 iter_prev = clear_flag(iter);
251 iter = next;
252 }
253 assert(!is_removed(iter));
254 if (is_dummy(iter))
255 new_next = flag_dummy(clear_flag(next));
256 else
257 new_next = clear_flag(next);
258 (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next);
259 }
260}
261
262static
263struct rcu_ht_node *_ht_add(struct rcu_ht *ht, struct rcu_table *t,
264 struct rcu_ht_node *node, int unique, int dummy)
265{
266 struct rcu_ht_node *iter_prev, *iter, *next, *new_node, *new_next,
267 *dummy_node;
268 unsigned long hash;
269
270 if (!t->size) {
271 assert(dummy);
272 node->p.next = flag_dummy(NULL);
273 return node; /* Initial first add (head) */
274 }
275 hash = bit_reverse_ulong(node->p.reverse_hash);
276 for (;;) {
277 uint32_t chain_len = 0;
278
279 /*
280 * iter_prev points to the non-removed node prior to the
281 * insert location.
282 */
283 iter_prev = rcu_dereference(t->tbl[hash & (t->size - 1)]);
284 /* We can always skip the dummy node initially */
285 iter = rcu_dereference(iter_prev->p.next);
286 assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
287 for (;;) {
288 if (unlikely(!clear_flag(iter)))
289 goto insert;
290 if (clear_flag(iter)->p.reverse_hash > node->p.reverse_hash)
291 goto insert;
292 next = rcu_dereference(clear_flag(iter)->p.next);
293 if (is_removed(next))
294 goto gc_node;
295 if (unique
296 && !is_dummy(next)
297 && !ht->compare_fct(node->key, node->key_len,
298 clear_flag(iter)->key,
299 clear_flag(iter)->key_len))
300 return clear_flag(iter);
301 /* Only account for identical reverse hash once */
302 if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash)
303 check_resize(ht, t, ++chain_len);
304 iter_prev = clear_flag(iter);
305 iter = next;
306 }
307 insert:
308 assert(node != clear_flag(iter));
309 assert(!is_removed(iter_prev));
310 assert(iter_prev != node);
311 if (!dummy)
312 node->p.next = clear_flag(iter);
313 else
314 node->p.next = flag_dummy(clear_flag(iter));
315 if (is_dummy(iter))
316 new_node = flag_dummy(node);
317 else
318 new_node = node;
319 if (uatomic_cmpxchg(&iter_prev->p.next, iter,
320 new_node) != iter)
321 continue; /* retry */
322 else
323 goto gc_end;
324 gc_node:
325 assert(!is_removed(iter));
326 if (is_dummy(iter))
327 new_next = flag_dummy(clear_flag(next));
328 else
329 new_next = clear_flag(next);
330 (void) uatomic_cmpxchg(&iter_prev->p.next, iter, new_next);
331 /* retry */
332 }
333gc_end:
334 /* Garbage collect logically removed nodes in the bucket */
335 dummy_node = rcu_dereference(t->tbl[hash & (t->size - 1)]);
336 _ht_gc_bucket(dummy_node, node);
337 return node;
338}
339
340static
341int _ht_remove(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
342{
343 struct rcu_ht_node *dummy, *next, *old;
344 int flagged = 0;
345 unsigned long hash;
346
347 /* logically delete the node */
348 old = rcu_dereference(node->p.next);
349 do {
350 next = old;
351 if (is_removed(next))
352 goto end;
353 assert(!is_dummy(next));
354 old = uatomic_cmpxchg(&node->p.next, next,
355 flag_removed(next));
356 } while (old != next);
357
358 /* We performed the (logical) deletion. */
359 flagged = 1;
360
361 /*
362 * Ensure that the node is not visible to readers anymore: lookup for
363 * the node, and remove it (along with any other logically removed node)
364 * if found.
365 */
366 hash = bit_reverse_ulong(node->p.reverse_hash);
367 dummy = rcu_dereference(t->tbl[hash & (t->size - 1)]);
368 _ht_gc_bucket(dummy, node);
369end:
370 /*
371 * Only the flagging action indicated that we (and no other)
372 * removed the node from the hash.
373 */
374 if (flagged) {
375 assert(is_removed(rcu_dereference(node->p.next)));
376 return 0;
377 } else
378 return -ENOENT;
379}
380
381static
382void init_table(struct rcu_ht *ht, struct rcu_table *t,
383 unsigned long first, unsigned long len)
384{
385 unsigned long i, end;
386
387 end = first + len;
388 for (i = first; i < end; i++) {
389 /* Update table size when power of two */
390 if (i != 0 && !(i & (i - 1)))
391 t->size = i;
392 t->tbl[i] = calloc(1, sizeof(struct _rcu_ht_node));
393 t->tbl[i]->p.reverse_hash = bit_reverse_ulong(i);
394 (void) _ht_add(ht, t, t->tbl[i], 0, 1);
395 }
396 t->resize_target = t->size = end;
397 t->resize_initiated = 0;
398}
399
400struct rcu_ht *ht_new(ht_hash_fct hash_fct,
401 ht_compare_fct compare_fct,
402 unsigned long hash_seed,
403 unsigned long init_size,
404 void (*ht_call_rcu)(struct rcu_head *head,
405 void (*func)(struct rcu_head *head)))
406{
407 struct rcu_ht *ht;
408
409 ht = calloc(1, sizeof(struct rcu_ht));
410 ht->hash_fct = hash_fct;
411 ht->compare_fct = compare_fct;
412 ht->hash_seed = hash_seed;
413 ht->ht_call_rcu = ht_call_rcu;
414 ht->in_progress_resize = 0;
415 /* this mutex should not nest in read-side C.S. */
416 pthread_mutex_init(&ht->resize_mutex, NULL);
417 ht->t = calloc(1, sizeof(struct rcu_table)
418 + (max(init_size, 1) * sizeof(struct rcu_ht_node *)));
419 ht->t->size = 0;
420 pthread_mutex_lock(&ht->resize_mutex);
421 init_table(ht, ht->t, 0, max(init_size, 1));
422 pthread_mutex_unlock(&ht->resize_mutex);
423 return ht;
424}
425
426struct rcu_ht_node *ht_lookup(struct rcu_ht *ht, void *key, size_t key_len)
427{
428 struct rcu_table *t;
429 struct rcu_ht_node *node, *next;
430 unsigned long hash, reverse_hash;
431
432 hash = ht->hash_fct(key, key_len, ht->hash_seed);
433 reverse_hash = bit_reverse_ulong(hash);
434
435 t = rcu_dereference(ht->t);
436 node = rcu_dereference(t->tbl[hash & (t->size - 1)]);
437 for (;;) {
438 if (unlikely(!node))
439 break;
440 if (unlikely(node->p.reverse_hash > reverse_hash)) {
441 node = NULL;
442 break;
443 }
444 next = rcu_dereference(node->p.next);
445 if (likely(!is_removed(next))
446 && !is_dummy(next)
447 && likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) {
448 break;
449 }
450 node = clear_flag(next);
451 }
452 assert(!node || !is_dummy(rcu_dereference(node->p.next)));
453 return node;
454}
455
456void ht_add(struct rcu_ht *ht, struct rcu_ht_node *node)
457{
458 struct rcu_table *t;
459 unsigned long hash;
460
461 hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
462 node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
463
464 t = rcu_dereference(ht->t);
465 (void) _ht_add(ht, t, node, 0, 0);
466}
467
468struct rcu_ht_node *ht_add_unique(struct rcu_ht *ht, struct rcu_ht_node *node)
469{
470 struct rcu_table *t;
471 unsigned long hash;
472
473 hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
474 node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
475
476 t = rcu_dereference(ht->t);
477 return _ht_add(ht, t, node, 1, 0);
478}
479
480int ht_remove(struct rcu_ht *ht, struct rcu_ht_node *node)
481{
482 struct rcu_table *t;
483
484 t = rcu_dereference(ht->t);
485 return _ht_remove(ht, t, node);
486}
487
488static
489int ht_delete_dummy(struct rcu_ht *ht)
490{
491 struct rcu_table *t;
492 struct rcu_ht_node *node;
493 unsigned long i;
494
495 t = ht->t;
496 /* Check that the table is empty */
497 node = t->tbl[0];
498 do {
499 node = clear_flag(node)->p.next;
500 if (!is_dummy(node))
501 return -EPERM;
502 assert(!is_removed(node));
503 } while (clear_flag(node));
504 /* Internal sanity check: all nodes left should be dummy */
505 for (i = 0; i < t->size; i++) {
506 assert(is_dummy(t->tbl[i]->p.next));
507 free(t->tbl[i]);
508 }
509 return 0;
510}
511
512/*
513 * Should only be called when no more concurrent readers nor writers can
514 * possibly access the table.
515 */
516int ht_destroy(struct rcu_ht *ht)
517{
518 int ret;
519
520 /* Wait for in-flight resize operations to complete */
521 while (uatomic_read(&ht->in_progress_resize))
522 poll(NULL, 0, 100); /* wait for 100ms */
523 ret = ht_delete_dummy(ht);
524 if (ret)
525 return ret;
526 free(ht->t);
527 free(ht);
528 return ret;
529}
530
531void ht_count_nodes(struct rcu_ht *ht,
532 unsigned long *count,
533 unsigned long *removed)
534{
535 struct rcu_table *t;
536 struct rcu_ht_node *node, *next;
537
538 *count = 0;
539 *removed = 0;
540
541 t = rcu_dereference(ht->t);
542 /* Check that the table is empty */
543 node = rcu_dereference(t->tbl[0]);
544 do {
545 next = rcu_dereference(node->p.next);
546 if (is_removed(next)) {
547 assert(!is_dummy(next));
548 (*removed)++;
549 } else if (!is_dummy(next))
550 (*count)++;
551 node = clear_flag(next);
552 } while (node);
553}
554
555static
556void ht_free_table_cb(struct rcu_head *head)
557{
558 struct rcu_table *t =
559 caa_container_of(head, struct rcu_table, head);
560 free(t);
561}
562
563/* called with resize mutex held */
564static
565void _do_ht_resize(struct rcu_ht *ht)
566{
567 unsigned long new_size, old_size;
568 struct rcu_table *new_t, *old_t;
569
570 old_t = ht->t;
571 old_size = old_t->size;
572
573 new_size = CMM_LOAD_SHARED(old_t->resize_target);
574 dbg_printf("rculfhash: resize from %lu to %lu buckets\n",
575 old_size, new_size);
576 if (old_size == new_size)
577 return;
578 new_t = malloc(sizeof(struct rcu_table)
579 + (new_size * sizeof(struct rcu_ht_node *)));
580 assert(new_size > old_size);
581 memcpy(&new_t->tbl, &old_t->tbl,
582 old_size * sizeof(struct rcu_ht_node *));
583 init_table(ht, new_t, old_size, new_size - old_size);
584 /* Changing table and size atomically wrt lookups */
585 rcu_assign_pointer(ht->t, new_t);
586 ht->ht_call_rcu(&old_t->head, ht_free_table_cb);
587}
588
589static
590unsigned long resize_target_update(struct rcu_table *t,
591 int growth_order)
592{
593 return _uatomic_max(&t->resize_target,
594 t->size << growth_order);
595}
596
597void ht_resize(struct rcu_ht *ht, int growth)
598{
599 struct rcu_table *t = rcu_dereference(ht->t);
600 unsigned long target_size;
601
602 target_size = resize_target_update(t, growth);
603 if (t->size < target_size) {
604 CMM_STORE_SHARED(t->resize_initiated, 1);
605 pthread_mutex_lock(&ht->resize_mutex);
606 _do_ht_resize(ht);
607 pthread_mutex_unlock(&ht->resize_mutex);
608 }
609}
610
611static
612void do_resize_cb(struct rcu_head *head)
613{
614 struct rcu_resize_work *work =
615 caa_container_of(head, struct rcu_resize_work, head);
616 struct rcu_ht *ht = work->ht;
617
618 pthread_mutex_lock(&ht->resize_mutex);
619 _do_ht_resize(ht);
620 pthread_mutex_unlock(&ht->resize_mutex);
621 free(work);
622 cmm_smp_mb(); /* finish resize before decrement */
623 uatomic_dec(&ht->in_progress_resize);
624}
625
626static
627void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth)
628{
629 struct rcu_resize_work *work;
630 unsigned long target_size;
631
632 target_size = resize_target_update(t, growth);
633 if (!CMM_LOAD_SHARED(t->resize_initiated) && t->size < target_size) {
634 uatomic_inc(&ht->in_progress_resize);
635 cmm_smp_mb(); /* increment resize count before calling it */
636 work = malloc(sizeof(*work));
637 work->ht = ht;
638 ht->ht_call_rcu(&work->head, do_resize_cb);
639 CMM_STORE_SHARED(t->resize_initiated, 1);
640 }
641}
This page took 0.023867 seconds and 4 git commands to generate.