Version 0.8.11
[userspace-rcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfcqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in ms */
83 #define RCU_SLEEP_DELAY_MS 10
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 static
98 void __attribute__((constructor)) rcu_bp_init(void);
99 static
100 void __attribute__((destructor)) _rcu_bp_exit(void);
101
102 /*
103 * rcu_gp_lock ensures mutual exclusion between threads calling
104 * synchronize_rcu().
105 */
106 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
107 /*
108 * rcu_registry_lock ensures mutual exclusion between threads
109 * registering and unregistering themselves to/from the registry, and
110 * with threads reading that registry from synchronize_rcu(). However,
111 * this lock is not held all the way through the completion of awaiting
112 * for the grace period. It is sporadically released between iterations
113 * on the registry.
114 * rcu_registry_lock may nest inside rcu_gp_lock.
115 */
116 static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
117
118 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
119 static int initialized;
120
121 static pthread_key_t urcu_bp_key;
122
123 #ifdef DEBUG_YIELD
124 unsigned int rcu_yield_active;
125 __DEFINE_URCU_TLS_GLOBAL(unsigned int, rcu_rand_yield);
126 #endif
127
128 struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
129
130 /*
131 * Pointer to registry elements. Written to only by each individual reader. Read
132 * by both the reader and the writers.
133 */
134 __DEFINE_URCU_TLS_GLOBAL(struct rcu_reader *, rcu_reader);
135
136 static CDS_LIST_HEAD(registry);
137
138 struct registry_chunk {
139 size_t data_len; /* data length */
140 size_t used; /* amount of data used */
141 struct cds_list_head node; /* chunk_list node */
142 char data[];
143 };
144
145 struct registry_arena {
146 struct cds_list_head chunk_list;
147 };
148
149 static struct registry_arena registry_arena = {
150 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
151 };
152
153 /* Saved fork signal mask, protected by rcu_gp_lock */
154 static sigset_t saved_fork_signal_mask;
155
156 static void mutex_lock(pthread_mutex_t *mutex)
157 {
158 int ret;
159
160 #ifndef DISTRUST_SIGNALS_EXTREME
161 ret = pthread_mutex_lock(mutex);
162 if (ret)
163 urcu_die(ret);
164 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
165 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
166 if (ret != EBUSY && ret != EINTR)
167 urcu_die(ret);
168 poll(NULL,0,10);
169 }
170 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
171 }
172
173 static void mutex_unlock(pthread_mutex_t *mutex)
174 {
175 int ret;
176
177 ret = pthread_mutex_unlock(mutex);
178 if (ret)
179 urcu_die(ret);
180 }
181
182 /*
183 * Always called with rcu_registry lock held. Releases this lock between
184 * iterations and grabs it again. Holds the lock when it returns.
185 */
186 static void wait_for_readers(struct cds_list_head *input_readers,
187 struct cds_list_head *cur_snap_readers,
188 struct cds_list_head *qsreaders)
189 {
190 unsigned int wait_loops = 0;
191 struct rcu_reader *index, *tmp;
192
193 /*
194 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
195 * indicate quiescence (not nested), or observe the current
196 * rcu_gp.ctr value.
197 */
198 for (;;) {
199 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
200 wait_loops++;
201
202 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
203 switch (rcu_reader_state(&index->ctr)) {
204 case RCU_READER_ACTIVE_CURRENT:
205 if (cur_snap_readers) {
206 cds_list_move(&index->node,
207 cur_snap_readers);
208 break;
209 }
210 /* Fall-through */
211 case RCU_READER_INACTIVE:
212 cds_list_move(&index->node, qsreaders);
213 break;
214 case RCU_READER_ACTIVE_OLD:
215 /*
216 * Old snapshot. Leaving node in
217 * input_readers will make us busy-loop
218 * until the snapshot becomes current or
219 * the reader becomes inactive.
220 */
221 break;
222 }
223 }
224
225 if (cds_list_empty(input_readers)) {
226 break;
227 } else {
228 /* Temporarily unlock the registry lock. */
229 mutex_unlock(&rcu_registry_lock);
230 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
231 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
232 else
233 caa_cpu_relax();
234 /* Re-lock the registry lock before the next loop. */
235 mutex_lock(&rcu_registry_lock);
236 }
237 }
238 }
239
240 void synchronize_rcu(void)
241 {
242 CDS_LIST_HEAD(cur_snap_readers);
243 CDS_LIST_HEAD(qsreaders);
244 sigset_t newmask, oldmask;
245 int ret;
246
247 ret = sigfillset(&newmask);
248 assert(!ret);
249 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
250 assert(!ret);
251
252 mutex_lock(&rcu_gp_lock);
253
254 mutex_lock(&rcu_registry_lock);
255
256 if (cds_list_empty(&registry))
257 goto out;
258
259 /* All threads should read qparity before accessing data structure
260 * where new ptr points to. */
261 /* Write new ptr before changing the qparity */
262 cmm_smp_mb();
263
264 /*
265 * Wait for readers to observe original parity or be quiescent.
266 * wait_for_readers() can release and grab again rcu_registry_lock
267 * interally.
268 */
269 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
270
271 /*
272 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
273 * model easier to understand. It does not have a big performance impact
274 * anyway, given this is the write-side.
275 */
276 cmm_smp_mb();
277
278 /* Switch parity: 0 -> 1, 1 -> 0 */
279 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
280
281 /*
282 * Must commit qparity update to memory before waiting for other parity
283 * quiescent state. Failure to do so could result in the writer waiting
284 * forever while new readers are always accessing data (no progress).
285 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
286 */
287
288 /*
289 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
290 * model easier to understand. It does not have a big performance impact
291 * anyway, given this is the write-side.
292 */
293 cmm_smp_mb();
294
295 /*
296 * Wait for readers to observe new parity or be quiescent.
297 * wait_for_readers() can release and grab again rcu_registry_lock
298 * interally.
299 */
300 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
301
302 /*
303 * Put quiescent reader list back into registry.
304 */
305 cds_list_splice(&qsreaders, &registry);
306
307 /*
308 * Finish waiting for reader threads before letting the old ptr being
309 * freed.
310 */
311 cmm_smp_mb();
312 out:
313 mutex_unlock(&rcu_registry_lock);
314 mutex_unlock(&rcu_gp_lock);
315 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
316 assert(!ret);
317 }
318
319 /*
320 * library wrappers to be used by non-LGPL compatible source code.
321 */
322
323 void rcu_read_lock(void)
324 {
325 _rcu_read_lock();
326 }
327
328 void rcu_read_unlock(void)
329 {
330 _rcu_read_unlock();
331 }
332
333 int rcu_read_ongoing(void)
334 {
335 return _rcu_read_ongoing();
336 }
337
338 /*
339 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
340 * Else, try expanding the last chunk. If this fails, allocate a new
341 * chunk twice as big as the last chunk.
342 * Memory used by chunks _never_ moves. A chunk could theoretically be
343 * freed when all "used" slots are released, but we don't do it at this
344 * point.
345 */
346 static
347 void expand_arena(struct registry_arena *arena)
348 {
349 struct registry_chunk *new_chunk, *last_chunk;
350 size_t old_chunk_len, new_chunk_len;
351
352 /* No chunk. */
353 if (cds_list_empty(&arena->chunk_list)) {
354 assert(ARENA_INIT_ALLOC >=
355 sizeof(struct registry_chunk)
356 + sizeof(struct rcu_reader));
357 new_chunk_len = ARENA_INIT_ALLOC;
358 new_chunk = (struct registry_chunk *) mmap(NULL,
359 new_chunk_len,
360 PROT_READ | PROT_WRITE,
361 MAP_ANONYMOUS | MAP_PRIVATE,
362 -1, 0);
363 if (new_chunk == MAP_FAILED)
364 abort();
365 bzero(new_chunk, new_chunk_len);
366 new_chunk->data_len =
367 new_chunk_len - sizeof(struct registry_chunk);
368 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
369 return; /* We're done. */
370 }
371
372 /* Try expanding last chunk. */
373 last_chunk = cds_list_entry(arena->chunk_list.prev,
374 struct registry_chunk, node);
375 old_chunk_len =
376 last_chunk->data_len + sizeof(struct registry_chunk);
377 new_chunk_len = old_chunk_len << 1;
378
379 /* Don't allow memory mapping to move, just expand. */
380 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
381 new_chunk_len, 0);
382 if (new_chunk != MAP_FAILED) {
383 /* Should not have moved. */
384 assert(new_chunk == last_chunk);
385 bzero((char *) last_chunk + old_chunk_len,
386 new_chunk_len - old_chunk_len);
387 last_chunk->data_len =
388 new_chunk_len - sizeof(struct registry_chunk);
389 return; /* We're done. */
390 }
391
392 /* Remap did not succeed, we need to add a new chunk. */
393 new_chunk = (struct registry_chunk *) mmap(NULL,
394 new_chunk_len,
395 PROT_READ | PROT_WRITE,
396 MAP_ANONYMOUS | MAP_PRIVATE,
397 -1, 0);
398 if (new_chunk == MAP_FAILED)
399 abort();
400 bzero(new_chunk, new_chunk_len);
401 new_chunk->data_len =
402 new_chunk_len - sizeof(struct registry_chunk);
403 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
404 }
405
406 static
407 struct rcu_reader *arena_alloc(struct registry_arena *arena)
408 {
409 struct registry_chunk *chunk;
410 struct rcu_reader *rcu_reader_reg;
411 int expand_done = 0; /* Only allow to expand once per alloc */
412 size_t len = sizeof(struct rcu_reader);
413
414 retry:
415 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
416 if (chunk->data_len - chunk->used < len)
417 continue;
418 /* Find spot */
419 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
420 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
421 rcu_reader_reg++) {
422 if (!rcu_reader_reg->alloc) {
423 rcu_reader_reg->alloc = 1;
424 chunk->used += len;
425 return rcu_reader_reg;
426 }
427 }
428 }
429
430 if (!expand_done) {
431 expand_arena(arena);
432 expand_done = 1;
433 goto retry;
434 }
435
436 return NULL;
437 }
438
439 /* Called with signals off and mutex locked */
440 static
441 void add_thread(void)
442 {
443 struct rcu_reader *rcu_reader_reg;
444 int ret;
445
446 rcu_reader_reg = arena_alloc(&registry_arena);
447 if (!rcu_reader_reg)
448 abort();
449 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
450 if (ret)
451 abort();
452
453 /* Add to registry */
454 rcu_reader_reg->tid = pthread_self();
455 assert(rcu_reader_reg->ctr == 0);
456 cds_list_add(&rcu_reader_reg->node, &registry);
457 /*
458 * Reader threads are pointing to the reader registry. This is
459 * why its memory should never be relocated.
460 */
461 URCU_TLS(rcu_reader) = rcu_reader_reg;
462 }
463
464 /* Called with mutex locked */
465 static
466 void cleanup_thread(struct registry_chunk *chunk,
467 struct rcu_reader *rcu_reader_reg)
468 {
469 rcu_reader_reg->ctr = 0;
470 cds_list_del(&rcu_reader_reg->node);
471 rcu_reader_reg->tid = 0;
472 rcu_reader_reg->alloc = 0;
473 chunk->used -= sizeof(struct rcu_reader);
474 }
475
476 static
477 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
478 {
479 struct registry_chunk *chunk;
480
481 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
482 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
483 continue;
484 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
485 continue;
486 return chunk;
487 }
488 return NULL;
489 }
490
491 /* Called with signals off and mutex locked */
492 static
493 void remove_thread(struct rcu_reader *rcu_reader_reg)
494 {
495 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
496 URCU_TLS(rcu_reader) = NULL;
497 }
498
499 /* Disable signals, take mutex, add to registry */
500 void rcu_bp_register(void)
501 {
502 sigset_t newmask, oldmask;
503 int ret;
504
505 ret = sigfillset(&newmask);
506 if (ret)
507 abort();
508 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
509 if (ret)
510 abort();
511
512 /*
513 * Check if a signal concurrently registered our thread since
514 * the check in rcu_read_lock().
515 */
516 if (URCU_TLS(rcu_reader))
517 goto end;
518
519 /*
520 * Take care of early registration before urcu_bp constructor.
521 */
522 rcu_bp_init();
523
524 mutex_lock(&rcu_registry_lock);
525 add_thread();
526 mutex_unlock(&rcu_registry_lock);
527 end:
528 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
529 if (ret)
530 abort();
531 }
532
533 /* Disable signals, take mutex, remove from registry */
534 static
535 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
536 {
537 sigset_t newmask, oldmask;
538 int ret;
539
540 ret = sigfillset(&newmask);
541 if (ret)
542 abort();
543 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
544 if (ret)
545 abort();
546
547 mutex_lock(&rcu_registry_lock);
548 remove_thread(rcu_reader_reg);
549 mutex_unlock(&rcu_registry_lock);
550 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
551 if (ret)
552 abort();
553 _rcu_bp_exit();
554 }
555
556 /*
557 * Remove thread from the registry when it exits, and flag it as
558 * destroyed so garbage collection can take care of it.
559 */
560 static
561 void urcu_bp_thread_exit_notifier(void *rcu_key)
562 {
563 rcu_bp_unregister(rcu_key);
564 }
565
566 static
567 void rcu_bp_init(void)
568 {
569 mutex_lock(&init_lock);
570 if (!rcu_bp_refcount++) {
571 int ret;
572
573 ret = pthread_key_create(&urcu_bp_key,
574 urcu_bp_thread_exit_notifier);
575 if (ret)
576 abort();
577 initialized = 1;
578 }
579 mutex_unlock(&init_lock);
580 }
581
582 static
583 void _rcu_bp_exit(void)
584 {
585 mutex_lock(&init_lock);
586 if (!--rcu_bp_refcount) {
587 struct registry_chunk *chunk, *tmp;
588 int ret;
589
590 cds_list_for_each_entry_safe(chunk, tmp,
591 &registry_arena.chunk_list, node) {
592 munmap((void *) chunk, chunk->data_len
593 + sizeof(struct registry_chunk));
594 }
595 CDS_INIT_LIST_HEAD(&registry_arena.chunk_list);
596 ret = pthread_key_delete(urcu_bp_key);
597 if (ret)
598 abort();
599 }
600 mutex_unlock(&init_lock);
601 }
602
603 /*
604 * Keep ABI compability within stable versions. This has never been
605 * exposed through a header, but needs to stay in the .so until the
606 * soname is bumped.
607 */
608 void rcu_bp_exit(void)
609 {
610 }
611
612 /*
613 * Holding the rcu_gp_lock and rcu_registry_lock across fork will make
614 * sure we fork() don't race with a concurrent thread executing with
615 * any of those locks held. This ensures that the registry and data
616 * protected by rcu_gp_lock are in a coherent state in the child.
617 */
618 void rcu_bp_before_fork(void)
619 {
620 sigset_t newmask, oldmask;
621 int ret;
622
623 ret = sigfillset(&newmask);
624 assert(!ret);
625 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
626 assert(!ret);
627 mutex_lock(&rcu_gp_lock);
628 mutex_lock(&rcu_registry_lock);
629 saved_fork_signal_mask = oldmask;
630 }
631
632 void rcu_bp_after_fork_parent(void)
633 {
634 sigset_t oldmask;
635 int ret;
636
637 oldmask = saved_fork_signal_mask;
638 mutex_unlock(&rcu_registry_lock);
639 mutex_unlock(&rcu_gp_lock);
640 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
641 assert(!ret);
642 }
643
644 /*
645 * Prune all entries from registry except our own thread. Fits the Linux
646 * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held.
647 */
648 static
649 void urcu_bp_prune_registry(void)
650 {
651 struct registry_chunk *chunk;
652 struct rcu_reader *rcu_reader_reg;
653
654 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
655 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
656 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
657 rcu_reader_reg++) {
658 if (!rcu_reader_reg->alloc)
659 continue;
660 if (rcu_reader_reg->tid == pthread_self())
661 continue;
662 cleanup_thread(chunk, rcu_reader_reg);
663 }
664 }
665 }
666
667 void rcu_bp_after_fork_child(void)
668 {
669 sigset_t oldmask;
670 int ret;
671
672 urcu_bp_prune_registry();
673 oldmask = saved_fork_signal_mask;
674 mutex_unlock(&rcu_registry_lock);
675 mutex_unlock(&rcu_gp_lock);
676 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
677 assert(!ret);
678 }
679
680 void *rcu_dereference_sym_bp(void *p)
681 {
682 return _rcu_dereference(p);
683 }
684
685 void *rcu_set_pointer_sym_bp(void **p, void *v)
686 {
687 cmm_wmb();
688 uatomic_set(p, v);
689 return v;
690 }
691
692 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
693 {
694 cmm_wmb();
695 return uatomic_xchg(p, v);
696 }
697
698 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
699 {
700 cmm_wmb();
701 return uatomic_cmpxchg(p, old, _new);
702 }
703
704 DEFINE_RCU_FLAVOR(rcu_flavor);
705
706 #include "urcu-call-rcu-impl.h"
707 #include "urcu-defer-impl.h"
This page took 0.04236 seconds and 4 git commands to generate.