Fix: rcu_barrier(): uninitialized futex field
[userspace-rcu.git] / urcu-bp.c
... / ...
CommitLineData
1/*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26#define _GNU_SOURCE
27#define _LGPL_SOURCE
28#include <stdio.h>
29#include <pthread.h>
30#include <signal.h>
31#include <assert.h>
32#include <stdlib.h>
33#include <string.h>
34#include <errno.h>
35#include <poll.h>
36#include <unistd.h>
37#include <sys/mman.h>
38
39#include "urcu/wfcqueue.h"
40#include "urcu/map/urcu-bp.h"
41#include "urcu/static/urcu-bp.h"
42#include "urcu-pointer.h"
43#include "urcu/tls-compat.h"
44
45#include "urcu-die.h"
46
47/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48#undef _LGPL_SOURCE
49#include "urcu-bp.h"
50#define _LGPL_SOURCE
51
52#ifndef MAP_ANONYMOUS
53#define MAP_ANONYMOUS MAP_ANON
54#endif
55
56#ifdef __linux__
57static
58void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60{
61 return mremap(old_address, old_size, new_size, flags);
62}
63#else
64
65#define MREMAP_MAYMOVE 1
66#define MREMAP_FIXED 2
67
68/*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71*/
72static
73void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75{
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79}
80#endif
81
82/* Sleep delay in ms */
83#define RCU_SLEEP_DELAY_MS 10
84#define INIT_NR_THREADS 8
85#define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89/*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92#define RCU_QS_ACTIVE_ATTEMPTS 100
93
94static
95int rcu_bp_refcount;
96
97static
98void __attribute__((constructor)) rcu_bp_init(void);
99static
100void __attribute__((destructor)) rcu_bp_exit(void);
101
102static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
103
104static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
105static int initialized;
106
107static pthread_key_t urcu_bp_key;
108
109#ifdef DEBUG_YIELD
110unsigned int rcu_yield_active;
111DEFINE_URCU_TLS(unsigned int, rcu_rand_yield);
112#endif
113
114struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
115
116/*
117 * Pointer to registry elements. Written to only by each individual reader. Read
118 * by both the reader and the writers.
119 */
120DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
121
122static CDS_LIST_HEAD(registry);
123
124struct registry_chunk {
125 size_t data_len; /* data length */
126 size_t used; /* amount of data used */
127 struct cds_list_head node; /* chunk_list node */
128 char data[];
129};
130
131struct registry_arena {
132 struct cds_list_head chunk_list;
133};
134
135static struct registry_arena registry_arena = {
136 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
137};
138
139/* Saved fork signal mask, protected by rcu_gp_lock */
140static sigset_t saved_fork_signal_mask;
141
142static void mutex_lock(pthread_mutex_t *mutex)
143{
144 int ret;
145
146#ifndef DISTRUST_SIGNALS_EXTREME
147 ret = pthread_mutex_lock(mutex);
148 if (ret)
149 urcu_die(ret);
150#else /* #ifndef DISTRUST_SIGNALS_EXTREME */
151 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
152 if (ret != EBUSY && ret != EINTR)
153 urcu_die(ret);
154 poll(NULL,0,10);
155 }
156#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
157}
158
159static void mutex_unlock(pthread_mutex_t *mutex)
160{
161 int ret;
162
163 ret = pthread_mutex_unlock(mutex);
164 if (ret)
165 urcu_die(ret);
166}
167
168static void wait_for_readers(struct cds_list_head *input_readers,
169 struct cds_list_head *cur_snap_readers,
170 struct cds_list_head *qsreaders)
171{
172 unsigned int wait_loops = 0;
173 struct rcu_reader *index, *tmp;
174
175 /*
176 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
177 * indicate quiescence (not nested), or observe the current
178 * rcu_gp.ctr value.
179 */
180 for (;;) {
181 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
182 wait_loops++;
183
184 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
185 switch (rcu_reader_state(&index->ctr)) {
186 case RCU_READER_ACTIVE_CURRENT:
187 if (cur_snap_readers) {
188 cds_list_move(&index->node,
189 cur_snap_readers);
190 break;
191 }
192 /* Fall-through */
193 case RCU_READER_INACTIVE:
194 cds_list_move(&index->node, qsreaders);
195 break;
196 case RCU_READER_ACTIVE_OLD:
197 /*
198 * Old snapshot. Leaving node in
199 * input_readers will make us busy-loop
200 * until the snapshot becomes current or
201 * the reader becomes inactive.
202 */
203 break;
204 }
205 }
206
207 if (cds_list_empty(input_readers)) {
208 break;
209 } else {
210 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
211 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
212 else
213 caa_cpu_relax();
214 }
215 }
216}
217
218void synchronize_rcu(void)
219{
220 CDS_LIST_HEAD(cur_snap_readers);
221 CDS_LIST_HEAD(qsreaders);
222 sigset_t newmask, oldmask;
223 int ret;
224
225 ret = sigfillset(&newmask);
226 assert(!ret);
227 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
228 assert(!ret);
229
230 mutex_lock(&rcu_gp_lock);
231
232 if (cds_list_empty(&registry))
233 goto out;
234
235 /* All threads should read qparity before accessing data structure
236 * where new ptr points to. */
237 /* Write new ptr before changing the qparity */
238 cmm_smp_mb();
239
240 /*
241 * Wait for readers to observe original parity or be quiescent.
242 */
243 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
244
245 /*
246 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
247 * model easier to understand. It does not have a big performance impact
248 * anyway, given this is the write-side.
249 */
250 cmm_smp_mb();
251
252 /* Switch parity: 0 -> 1, 1 -> 0 */
253 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
254
255 /*
256 * Must commit qparity update to memory before waiting for other parity
257 * quiescent state. Failure to do so could result in the writer waiting
258 * forever while new readers are always accessing data (no progress).
259 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
260 */
261
262 /*
263 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
264 * model easier to understand. It does not have a big performance impact
265 * anyway, given this is the write-side.
266 */
267 cmm_smp_mb();
268
269 /*
270 * Wait for readers to observe new parity or be quiescent.
271 */
272 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
273
274 /*
275 * Put quiescent reader list back into registry.
276 */
277 cds_list_splice(&qsreaders, &registry);
278
279 /*
280 * Finish waiting for reader threads before letting the old ptr being
281 * freed.
282 */
283 cmm_smp_mb();
284out:
285 mutex_unlock(&rcu_gp_lock);
286 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
287 assert(!ret);
288}
289
290/*
291 * library wrappers to be used by non-LGPL compatible source code.
292 */
293
294void rcu_read_lock(void)
295{
296 _rcu_read_lock();
297}
298
299void rcu_read_unlock(void)
300{
301 _rcu_read_unlock();
302}
303
304int rcu_read_ongoing(void)
305{
306 return _rcu_read_ongoing();
307}
308
309/*
310 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
311 * Else, try expanding the last chunk. If this fails, allocate a new
312 * chunk twice as big as the last chunk.
313 * Memory used by chunks _never_ moves. A chunk could theoretically be
314 * freed when all "used" slots are released, but we don't do it at this
315 * point.
316 */
317static
318void expand_arena(struct registry_arena *arena)
319{
320 struct registry_chunk *new_chunk, *last_chunk;
321 size_t old_chunk_len, new_chunk_len;
322
323 /* No chunk. */
324 if (cds_list_empty(&arena->chunk_list)) {
325 assert(ARENA_INIT_ALLOC >=
326 sizeof(struct registry_chunk)
327 + sizeof(struct rcu_reader));
328 new_chunk_len = ARENA_INIT_ALLOC;
329 new_chunk = mmap(NULL, new_chunk_len,
330 PROT_READ | PROT_WRITE,
331 MAP_ANONYMOUS | MAP_PRIVATE,
332 -1, 0);
333 if (new_chunk == MAP_FAILED)
334 abort();
335 bzero(new_chunk, new_chunk_len);
336 new_chunk->data_len =
337 new_chunk_len - sizeof(struct registry_chunk);
338 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
339 return; /* We're done. */
340 }
341
342 /* Try expanding last chunk. */
343 last_chunk = cds_list_entry(arena->chunk_list.prev,
344 struct registry_chunk, node);
345 old_chunk_len =
346 last_chunk->data_len + sizeof(struct registry_chunk);
347 new_chunk_len = old_chunk_len << 1;
348
349 /* Don't allow memory mapping to move, just expand. */
350 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
351 new_chunk_len, 0);
352 if (new_chunk != MAP_FAILED) {
353 /* Should not have moved. */
354 assert(new_chunk == last_chunk);
355 bzero((char *) last_chunk + old_chunk_len,
356 new_chunk_len - old_chunk_len);
357 last_chunk->data_len =
358 new_chunk_len - sizeof(struct registry_chunk);
359 return; /* We're done. */
360 }
361
362 /* Remap did not succeed, we need to add a new chunk. */
363 new_chunk = mmap(NULL, new_chunk_len,
364 PROT_READ | PROT_WRITE,
365 MAP_ANONYMOUS | MAP_PRIVATE,
366 -1, 0);
367 if (new_chunk == MAP_FAILED)
368 abort();
369 bzero(new_chunk, new_chunk_len);
370 new_chunk->data_len =
371 new_chunk_len - sizeof(struct registry_chunk);
372 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
373}
374
375static
376struct rcu_reader *arena_alloc(struct registry_arena *arena)
377{
378 struct registry_chunk *chunk;
379 struct rcu_reader *rcu_reader_reg;
380 int expand_done = 0; /* Only allow to expand once per alloc */
381 size_t len = sizeof(struct rcu_reader);
382
383retry:
384 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
385 if (chunk->data_len - chunk->used < len)
386 continue;
387 /* Find spot */
388 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
389 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
390 rcu_reader_reg++) {
391 if (!rcu_reader_reg->alloc) {
392 rcu_reader_reg->alloc = 1;
393 chunk->used += len;
394 return rcu_reader_reg;
395 }
396 }
397 }
398
399 if (!expand_done) {
400 expand_arena(arena);
401 expand_done = 1;
402 goto retry;
403 }
404
405 return NULL;
406}
407
408/* Called with signals off and mutex locked */
409static
410void add_thread(void)
411{
412 struct rcu_reader *rcu_reader_reg;
413 int ret;
414
415 rcu_reader_reg = arena_alloc(&registry_arena);
416 if (!rcu_reader_reg)
417 abort();
418 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
419 if (ret)
420 abort();
421
422 /* Add to registry */
423 rcu_reader_reg->tid = pthread_self();
424 assert(rcu_reader_reg->ctr == 0);
425 cds_list_add(&rcu_reader_reg->node, &registry);
426 /*
427 * Reader threads are pointing to the reader registry. This is
428 * why its memory should never be relocated.
429 */
430 URCU_TLS(rcu_reader) = rcu_reader_reg;
431}
432
433/* Called with mutex locked */
434static
435void cleanup_thread(struct registry_chunk *chunk,
436 struct rcu_reader *rcu_reader_reg)
437{
438 rcu_reader_reg->ctr = 0;
439 cds_list_del(&rcu_reader_reg->node);
440 rcu_reader_reg->tid = 0;
441 rcu_reader_reg->alloc = 0;
442 chunk->used -= sizeof(struct rcu_reader);
443}
444
445static
446struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
447{
448 struct registry_chunk *chunk;
449
450 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
451 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
452 continue;
453 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
454 continue;
455 return chunk;
456 }
457 return NULL;
458}
459
460/* Called with signals off and mutex locked */
461static
462void remove_thread(struct rcu_reader *rcu_reader_reg)
463{
464 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
465 URCU_TLS(rcu_reader) = NULL;
466}
467
468/* Disable signals, take mutex, add to registry */
469void rcu_bp_register(void)
470{
471 sigset_t newmask, oldmask;
472 int ret;
473
474 ret = sigfillset(&newmask);
475 if (ret)
476 abort();
477 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
478 if (ret)
479 abort();
480
481 /*
482 * Check if a signal concurrently registered our thread since
483 * the check in rcu_read_lock().
484 */
485 if (URCU_TLS(rcu_reader))
486 goto end;
487
488 /*
489 * Take care of early registration before urcu_bp constructor.
490 */
491 rcu_bp_init();
492
493 mutex_lock(&rcu_gp_lock);
494 add_thread();
495 mutex_unlock(&rcu_gp_lock);
496end:
497 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
498 if (ret)
499 abort();
500}
501
502/* Disable signals, take mutex, remove from registry */
503static
504void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
505{
506 sigset_t newmask, oldmask;
507 int ret;
508
509 ret = sigfillset(&newmask);
510 if (ret)
511 abort();
512 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
513 if (ret)
514 abort();
515
516 mutex_lock(&rcu_gp_lock);
517 remove_thread(rcu_reader_reg);
518 mutex_unlock(&rcu_gp_lock);
519 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
520 if (ret)
521 abort();
522 rcu_bp_exit();
523}
524
525/*
526 * Remove thread from the registry when it exits, and flag it as
527 * destroyed so garbage collection can take care of it.
528 */
529static
530void urcu_bp_thread_exit_notifier(void *rcu_key)
531{
532 rcu_bp_unregister(rcu_key);
533}
534
535static
536void rcu_bp_init(void)
537{
538 mutex_lock(&init_lock);
539 if (!rcu_bp_refcount++) {
540 int ret;
541
542 ret = pthread_key_create(&urcu_bp_key,
543 urcu_bp_thread_exit_notifier);
544 if (ret)
545 abort();
546 initialized = 1;
547 }
548 mutex_unlock(&init_lock);
549}
550
551static
552void rcu_bp_exit(void)
553{
554 mutex_lock(&init_lock);
555 if (!--rcu_bp_refcount) {
556 struct registry_chunk *chunk, *tmp;
557 int ret;
558
559 cds_list_for_each_entry_safe(chunk, tmp,
560 &registry_arena.chunk_list, node) {
561 munmap(chunk, chunk->data_len
562 + sizeof(struct registry_chunk));
563 }
564 ret = pthread_key_delete(urcu_bp_key);
565 if (ret)
566 abort();
567 }
568 mutex_unlock(&init_lock);
569}
570
571/*
572 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
573 * a concurrent thread executing with this same lock held. This ensures that the
574 * registry is in a coherent state in the child.
575 */
576void rcu_bp_before_fork(void)
577{
578 sigset_t newmask, oldmask;
579 int ret;
580
581 ret = sigfillset(&newmask);
582 assert(!ret);
583 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
584 assert(!ret);
585 mutex_lock(&rcu_gp_lock);
586 saved_fork_signal_mask = oldmask;
587}
588
589void rcu_bp_after_fork_parent(void)
590{
591 sigset_t oldmask;
592 int ret;
593
594 oldmask = saved_fork_signal_mask;
595 mutex_unlock(&rcu_gp_lock);
596 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
597 assert(!ret);
598}
599
600/*
601 * Prune all entries from registry except our own thread. Fits the Linux
602 * fork behavior. Called with rcu_gp_lock held.
603 */
604static
605void urcu_bp_prune_registry(void)
606{
607 struct registry_chunk *chunk;
608 struct rcu_reader *rcu_reader_reg;
609
610 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
611 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
612 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
613 rcu_reader_reg++) {
614 if (!rcu_reader_reg->alloc)
615 continue;
616 if (rcu_reader_reg->tid == pthread_self())
617 continue;
618 cleanup_thread(chunk, rcu_reader_reg);
619 }
620 }
621}
622
623void rcu_bp_after_fork_child(void)
624{
625 sigset_t oldmask;
626 int ret;
627
628 urcu_bp_prune_registry();
629 oldmask = saved_fork_signal_mask;
630 mutex_unlock(&rcu_gp_lock);
631 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
632 assert(!ret);
633}
634
635void *rcu_dereference_sym_bp(void *p)
636{
637 return _rcu_dereference(p);
638}
639
640void *rcu_set_pointer_sym_bp(void **p, void *v)
641{
642 cmm_wmb();
643 uatomic_set(p, v);
644 return v;
645}
646
647void *rcu_xchg_pointer_sym_bp(void **p, void *v)
648{
649 cmm_wmb();
650 return uatomic_xchg(p, v);
651}
652
653void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
654{
655 cmm_wmb();
656 return uatomic_cmpxchg(p, old, _new);
657}
658
659DEFINE_RCU_FLAVOR(rcu_flavor);
660
661#include "urcu-call-rcu-impl.h"
662#include "urcu-defer-impl.h"
This page took 0.025093 seconds and 4 git commands to generate.