From 121a5d44c8cc7197116df73854cb94c6cfbad0b0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 10 May 2009 20:54:43 -0400 Subject: [PATCH] LGPLv2.1 relicensing Except the content of arch_x86.h and arch_ppc.h, everything that was not LGPL-compatible has been either rewritten or was simply a trivial one-liner. The only content which could still be non-LGPL licensable in this commit is : arch_ppc.h: atomic_inc() __xchg_u32() get_cycles (maybe ?) arch_x86.h: __xchg() Signed-off-by: Mathieu Desnoyers --- Makefile | 24 +++- arch_ppc.h | 29 +++++ arch_x86.h | 29 +++++ rcutorture.h | 8 +- test_rwlock_timing.c | 1 + test_urcu.c | 11 +- test_urcu_timing.c | 8 +- urcu.c | 58 +++++++-- urcu.h | 297 +++++-------------------------------------- urcutorture.c | 1 + 10 files changed, 179 insertions(+), 287 deletions(-) diff --git a/Makefile b/Makefile index 386856f..5842b26 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,19 @@ -CFLAGS=-Wall -O2 -g +CFLAGS=-Wall -O2 -g -I. LDFLAGS=-lpthread #debug #CFLAGS=-Wall -g #CFLAGS+=-DDEBUG_FULL_MB +#Changing the signal number used by the library. SIGUSR1 by default. +#CFLAGS+=-DSIGURCU=SIGUSR2 + SRC_DEP=`echo $^ | sed 's/[^ ]*.h//g'` -all: test_urcu test_urcu_timing test_rwlock_timing test_urcu_yield urcu-asm.S \ - urcu-asm.o urcutorture urcutorture-yield +all: test_urcu test_urcu_dynamic_link test_urcu_timing \ + test_rwlock_timing test_urcu_yield urcu-asm.S \ + urcu-asm.o urcutorture urcutorture-yield liburcu.so pthreads-x86: clean cp api_x86.h api.h @@ -22,6 +26,9 @@ pthreads-ppc: clean test_urcu: urcu.o test_urcu.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) +test_urcu_dynamic_link: urcu.o test_urcu.c urcu.h + $(CC) ${CFLAGS} -DDYNAMIC_LINK_TEST $(LDFLAGS) -o $@ $(SRC_DEP) + test_urcu_yield: urcu-yield.o test_urcu.c urcu.h $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) @@ -32,7 +39,10 @@ test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) urcu.o: urcu.c urcu.h - $(CC) ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP) + $(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP) + +liburcu.so: urcu.o + $(CC) -fPIC -shared -o $@ $< urcu-yield.o: urcu.c urcu.h $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP) @@ -49,7 +59,11 @@ urcutorture: urcutorture.c urcu.o urcu.h rcutorture.h urcutorture-yield: urcutorture.c urcu-yield.o urcu.h rcutorture.h $(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) -.PHONY: clean +.PHONY: clean install + +install: liburcu.so + cp -f liburcu.so /usr/lib/ + cp -f arch.h compiler.h urcu.h urcu-static.h /usr/include/ clean: rm -f *.o test_urcu test_urcu_timing test_rwlock_timing urcu-asm.S \ diff --git a/arch_ppc.h b/arch_ppc.h index 6dc5f3e..b43d08b 100644 --- a/arch_ppc.h +++ b/arch_ppc.h @@ -1,3 +1,6 @@ +#ifndef _ARCH_PPC_H +#define _ARCH_PPC_H + /* * arch_x86.h: Definitions for the x86 architecture, derived from Linux. * @@ -18,6 +21,8 @@ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. */ +#include + #define CONFIG_HAVE_FENCE 1 #define CONFIG_HAVE_MEM_COHERENCY @@ -40,6 +45,28 @@ #define rmc() barrier() #define wmc() barrier() +/* Assume SMP machine, given we don't have this information */ +#define CONFIG_SMP 1 + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_mc() mc() +#define smp_rmc() rmc() +#define smp_wmc() wmc() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_mc() barrier() +#define smp_rmc() barrier() +#define smp_wmc() barrier() +#endif + +/* Nop everywhere except on alpha. */ +#define smp_read_barrier_depends() + static inline void cpu_relax(void) { barrier(); @@ -150,3 +177,5 @@ static inline cycles_t get_cycles (void) return (((long long)h) << 32) + l; } } + +#endif /* _ARCH_PPC_H */ diff --git a/arch_x86.h b/arch_x86.h index 99ccd29..e924913 100644 --- a/arch_x86.h +++ b/arch_x86.h @@ -1,3 +1,6 @@ +#ifndef _ARCH_X86_H +#define _ARCH_X86_H + /* * arch_x86.h: Definitions for the x86 architecture, derived from Linux. * @@ -18,6 +21,8 @@ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. */ +#include + /* Assume P4 or newer */ #define CONFIG_HAVE_FENCE 1 #define CONFIG_HAVE_MEM_COHERENCY @@ -51,6 +56,28 @@ #define rmc() barrier() #define wmc() barrier() +/* Assume SMP machine, given we don't have this information */ +#define CONFIG_SMP 1 + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_mc() mc() +#define smp_rmc() rmc() +#define smp_wmc() wmc() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_mc() barrier() +#define smp_rmc() barrier() +#define smp_wmc() barrier() +#endif + +/* Nop everywhere except on alpha. */ +#define smp_read_barrier_depends() + /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static inline void rep_nop(void) { @@ -135,3 +162,5 @@ static inline cycles_t get_cycles (void) rdtscll(ret); return ret; } + +#endif /* _ARCH_X86_H */ diff --git a/rcutorture.h b/rcutorture.h index 8681ef7..e123559 100644 --- a/rcutorture.h +++ b/rcutorture.h @@ -114,7 +114,7 @@ void *rcu_read_perf_test(void *arg) int me = (long)arg; long long n_reads_local = 0; - urcu_register_thread(); + rcu_register_thread(); run_on(me); atomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) @@ -132,7 +132,7 @@ void *rcu_read_perf_test(void *arg) } __get_thread_var(n_reads_pt) += n_reads_local; put_thread_offline(); - urcu_unregister_thread(); + rcu_unregister_thread(); return (NULL); } @@ -258,7 +258,7 @@ void *rcu_read_stress_test(void *arg) struct rcu_stress *p; int pc; - urcu_register_thread(); + rcu_register_thread(); while (goflag == GOFLAG_INIT) poll(NULL, 0, 1); mark_rcu_quiescent_state(); @@ -285,7 +285,7 @@ void *rcu_read_stress_test(void *arg) } } put_thread_offline(); - urcu_unregister_thread(); + rcu_unregister_thread(); return (NULL); } diff --git a/test_rwlock_timing.c b/test_rwlock_timing.c index 9ea2494..2c3d894 100644 --- a/test_rwlock_timing.c +++ b/test_rwlock_timing.c @@ -19,6 +19,7 @@ #include #include #include +#include #if defined(_syscall0) _syscall0(pid_t, gettid) diff --git a/test_urcu.c b/test_urcu.c index 129b33a..a044d53 100644 --- a/test_urcu.c +++ b/test_urcu.c @@ -34,6 +34,11 @@ static inline pid_t gettid(void) } #endif +#ifndef DYNAMIC_LINK_TEST +#define _LGPL_SOURCE +#else +#define debug_yield_read() +#endif #include "urcu.h" struct test_array { @@ -146,7 +151,7 @@ void *thr_reader(void *_count) printf("thread_begin %s, thread id : %lx, tid %lu\n", "reader", pthread_self(), (unsigned long)gettid()); - urcu_register_thread(); + rcu_register_thread(); for (;;) { rcu_read_lock(); @@ -160,7 +165,7 @@ void *thr_reader(void *_count) break; } - urcu_unregister_thread(); + rcu_unregister_thread(); *count = nr_reads; printf("thread_end %s, thread id : %lx, tid %lu\n", @@ -184,7 +189,7 @@ void *thr_writer(void *_count) if (old) assert(old->a == 8); new->a = 8; - old = urcu_publish_content(&test_rcu_pointer, new); + old = rcu_publish_content(&test_rcu_pointer, new); rcu_copy_mutex_unlock(); /* can be done after unlock */ if (old) diff --git a/test_urcu_timing.c b/test_urcu_timing.c index ac23846..a11532a 100644 --- a/test_urcu_timing.c +++ b/test_urcu_timing.c @@ -18,6 +18,7 @@ #include #include #include +#include #if defined(_syscall0) _syscall0(pid_t, gettid) @@ -34,6 +35,7 @@ static inline pid_t gettid(void) } #endif +#define _LGPL_SOURCE #include "urcu.h" pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -86,7 +88,7 @@ void *thr_reader(void *arg) "reader", pthread_self(), (unsigned long)gettid()); sleep(2); - urcu_register_thread(); + rcu_register_thread(); time1 = get_cycles(); for (i = 0; i < OUTER_READ_LOOP; i++) { @@ -101,7 +103,7 @@ void *thr_reader(void *arg) } time2 = get_cycles(); - urcu_unregister_thread(); + rcu_unregister_thread(); reader_time[(unsigned long)arg] = time2 - time1; @@ -129,7 +131,7 @@ void *thr_writer(void *arg) assert(old->a == 8); } new->a = 8; - old = urcu_publish_content(&test_rcu_pointer, new); + old = rcu_publish_content(&test_rcu_pointer, new); rcu_copy_mutex_unlock(); /* can be done after unlock */ if (old) { diff --git a/urcu.c b/urcu.c index 337f764..7ab87c4 100644 --- a/urcu.c +++ b/urcu.c @@ -5,7 +5,7 @@ * * Copyright February 2009 - Mathieu Desnoyers * - * Distributed under GPLv2 + * Distributed under LGPLv2.1 * * IBM's contributions to this file may be relicensed under LGPLv2 or later. */ @@ -19,6 +19,8 @@ #include #include +#include "urcu-static.h" +/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */ #include "urcu.h" pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -269,7 +271,47 @@ void synchronize_rcu(void) internal_urcu_unlock(); } -void urcu_add_reader(pthread_t id) +/* + * library wrappers to be used by non-LGPL compatible source code. + */ + +void rcu_read_lock(void) +{ + _rcu_read_lock(); +} + +void rcu_read_unlock(void) +{ + _rcu_read_unlock(); +} + +void *rcu_dereference(void *p) +{ + return _rcu_dereference(p); +} + +void *rcu_assign_pointer_sym(void **p, void *v) +{ + wmb(); + return STORE_SHARED(p, v); +} + +void *rcu_xchg_pointer_sym(void **p, void *v) +{ + wmb(); + return xchg(p, v); +} + +void *rcu_publish_content_sym(void **p, void *v) +{ + void *oldptr; + + oldptr = _rcu_xchg_pointer(p, v); + synchronize_rcu(); + return oldptr; +} + +static void rcu_add_reader(pthread_t id) { struct reader_registry *oldarray; @@ -299,7 +341,7 @@ void urcu_add_reader(pthread_t id) * Never shrink (implementation limitation). * This is O(nb threads). Eventually use a hash table. */ -void urcu_remove_reader(pthread_t id) +static void rcu_remove_reader(pthread_t id) { struct reader_registry *index; @@ -318,22 +360,22 @@ void urcu_remove_reader(pthread_t id) assert(0); } -void urcu_register_thread(void) +void rcu_register_thread(void) { internal_urcu_lock(); - urcu_add_reader(pthread_self()); + rcu_add_reader(pthread_self()); internal_urcu_unlock(); } -void urcu_unregister_thread(void) +void rcu_unregister_thread(void) { internal_urcu_lock(); - urcu_remove_reader(pthread_self()); + rcu_remove_reader(pthread_self()); internal_urcu_unlock(); } #ifndef DEBUG_FULL_MB -void sigurcu_handler(int signo, siginfo_t *siginfo, void *context) +static void sigurcu_handler(int signo, siginfo_t *siginfo, void *context) { /* * Executing this smp_mb() is the only purpose of this signal handler. diff --git a/urcu.h b/urcu.h index b43b280..0b0d232 100644 --- a/urcu.h +++ b/urcu.h @@ -8,13 +8,15 @@ * * Copyright February 2009 - Mathieu Desnoyers * - * Credits for Paul e. McKenney + * Credits for Paul E. McKenney * for inspiration coming from the Linux kernel RCU and rcu-preempt. * - * The barrier, mb, rmb, wmb, atomic_inc, smp_read_barrier_depends, ACCESS_ONCE - * and rcu_dereference primitives come from the Linux kernel. + * LGPL-compatible code should include this header with : * - * Distributed under GPLv2 + * #define _LGPL_SOURCE + * #include + * + * Distributed under LGPLv2.1 * * IBM's contributions to this file may be relicensed under LGPLv2 or later. */ @@ -22,288 +24,55 @@ #include #include -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") - -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -/* Assume SMP machine, given we don't have this information */ -#define CONFIG_SMP 1 - - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_mc() mc() -#define smp_rmc() rmc() -#define smp_wmc() wmc() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_mc() barrier() -#define smp_rmc() barrier() -#define smp_wmc() barrier() -#endif - -#include "arch.h" - -/* Nop everywhere except on alpha. */ -#define smp_read_barrier_depends() - -/* - * Prevent the compiler from merging or refetching accesses. The compiler - * is also forbidden from reordering successive instances of ACCESS_ONCE(), - * but only when the compiler is aware of some particular ordering. One way - * to make the compiler aware of ordering is to put the two invocations of - * ACCESS_ONCE() in different C statements. - * - * This macro does absolutely -nothing- to prevent the CPU from reordering, - * merging, or refetching absolutely anything at any time. Its main intended - * use is to mediate communication between process-level code and irq/NMI - * handlers, all running on the same CPU. - */ -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) - -/* - * Identify a shared load. A smp_rmc() or smp_mc() should come before the load. - */ -#define _LOAD_SHARED(p) ACCESS_ONCE(p) - -/* - * Load a data from shared memory, doing a cache flush if required. - */ -#define LOAD_SHARED(p) \ - ({ \ - smp_rmc(); \ - _LOAD_SHARED(p); \ - }) - +#ifdef _LGPL_SOURCE -/* - * Identify a shared store. A smp_wmc() or smp_mc() should follow the store. - */ -#define _STORE_SHARED(x, v) \ - do { \ - ACCESS_ONCE(x) = (v); \ - } while (0) +#include /* - * Store v into x, where x is located in shared memory. Performs the required - * cache flush after writing. + * Mappings for static use of the userspace RCU library. + * Should only be used in LGPL-compatible code. */ -#define STORE_SHARED(x, v) \ - do { \ - _STORE_SHARED(x, v); \ - smp_wmc(); \ - } while (0) -/** - * rcu_dereference - fetch an RCU-protected pointer in an - * RCU read-side critical section. This pointer may later - * be safely dereferenced. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ +#define rcu_dereference _rcu_dereference +#define rcu_read_lock _rcu_read_lock +#define rcu_read_unlock _rcu_read_unlock -#define rcu_dereference(p) ({ \ - typeof(p) _________p1 = LOAD_SHARED(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) +#define rcu_assign_pointer _rcu_assign_pointer +#define rcu_xchg_pointer _rcu_xchg_pointer +#define rcu_publish_content _rcu_publish_content -#define SIGURCU SIGUSR1 +#else /* !_LGPL_SOURCE */ /* - * If a reader is really non-cooperative and refuses to commit its - * urcu_active_readers count to memory (there is no barrier in the reader - * per-se), kick it after a few loops waiting for it. + * library wrappers to be used by non-LGPL compatible source code. */ -#define KICK_READER_LOOPS 10000 - -#ifdef DEBUG_YIELD -#include -#include -#include -#include -#define YIELD_READ (1 << 0) -#define YIELD_WRITE (1 << 1) +extern void rcu_read_lock(void); +extern void rcu_read_unlock(void); -/* Updates without DEBUG_FULL_MB are much slower. Account this in the delay */ -#ifdef DEBUG_FULL_MB -/* maximum sleep delay, in us */ -#define MAX_SLEEP 50 -#else -#define MAX_SLEEP 30000 -#endif +extern void *rcu_dereference(void *p); -extern unsigned int yield_active; -extern unsigned int __thread rand_yield; +extern void *rcu_assign_pointer_sym(void **p, void *v); -static inline void debug_yield_read(void) -{ - if (yield_active & YIELD_READ) - if (rand_r(&rand_yield) & 0x1) - usleep(rand_r(&rand_yield) % MAX_SLEEP); -} +#define rcu_assign_pointer(p, v) \ + rcu_assign_pointer_sym((void **)(p), (v)) -static inline void debug_yield_write(void) -{ - if (yield_active & YIELD_WRITE) - if (rand_r(&rand_yield) & 0x1) - usleep(rand_r(&rand_yield) % MAX_SLEEP); -} +extern void *rcu_xchg_pointer_sym(void **p, void *v); +#define rcu_xchg_pointer(p, v) \ + rcu_xchg_pointer_sym((void **)(p), (v)) -static inline void debug_yield_init(void) -{ - rand_yield = time(NULL) ^ pthread_self(); -} -#else -static inline void debug_yield_read(void) -{ -} +extern void *rcu_publish_content_sym(void **p, void *v); +#define rcu_publish_content(p, v) \ + rcu_publish_content_sym((void **)(p), (v)) -static inline void debug_yield_write(void) -{ -} - -static inline void debug_yield_init(void) -{ - -} -#endif - -#ifdef DEBUG_FULL_MB -static inline void reader_barrier() -{ - smp_mb(); -} -#else -static inline void reader_barrier() -{ - barrier(); -} -#endif - -/* - * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a - * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. - */ -#define RCU_GP_COUNT (1UL << 0) -/* Use the amount of bits equal to half of the architecture long size */ -#define RCU_GP_CTR_BIT (1UL << (sizeof(long) << 2)) -#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1) - -/* - * Global quiescent period counter with low-order bits unused. - * Using a int rather than a char to eliminate false register dependencies - * causing stalls on some architectures. - */ -extern long urcu_gp_ctr; - -extern long __thread urcu_active_readers; - -static inline int rcu_old_gp_ongoing(long *value) -{ - long v; - - if (value == NULL) - return 0; - /* - * Make sure both tests below are done on the same version of *value - * to insure consistency. - */ - v = LOAD_SHARED(*value); - return (v & RCU_GP_CTR_NEST_MASK) && - ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT); -} - -static inline void rcu_read_lock(void) -{ - long tmp; - - tmp = urcu_active_readers; - /* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */ - /* - * The data dependency "read urcu_gp_ctr, write urcu_active_readers", - * serializes those two memory operations. The memory barrier in the - * signal handler ensures we receive the proper memory commit barriers - * required by _STORE_SHARED and _LOAD_SHARED whenever communication - * with the writer is needed. - */ - if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) - _STORE_SHARED(urcu_active_readers, _LOAD_SHARED(urcu_gp_ctr)); - else - _STORE_SHARED(urcu_active_readers, tmp + RCU_GP_COUNT); - /* - * Increment active readers count before accessing the pointer. - * See force_mb_all_threads(). - */ - reader_barrier(); -} - -static inline void rcu_read_unlock(void) -{ - reader_barrier(); - /* - * Finish using rcu before decrementing the pointer. - * See force_mb_all_threads(). - */ - _STORE_SHARED(urcu_active_readers, urcu_active_readers - RCU_GP_COUNT); -} - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. - * - * Inserts memory barriers on architectures that require them - * (pretty much all of them other than x86), and also prevents - * the compiler from reordering the code that initializes the - * structure after the pointer assignment. More importantly, this - * call documents which pointers will be dereferenced by RCU read-side - * code. - */ - -#define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - wmb(); \ - STORE_SHARED(p, v); \ - }) - -#define rcu_xchg_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - wmb(); \ - xchg(p, v); \ - }) +#endif /* !_LGPL_SOURCE */ extern void synchronize_rcu(void); -/* - * Exchanges the pointer and waits for quiescent state. - * The pointer returned can be freed. - */ -#define urcu_publish_content(p, v) \ - ({ \ - void *oldptr; \ - oldptr = rcu_xchg_pointer(p, v); \ - synchronize_rcu(); \ - oldptr; \ - }) - /* * Reader thread registration. */ -extern void urcu_register_thread(void); -extern void urcu_unregister_thread(void); +extern void rcu_register_thread(void); +extern void rcu_unregister_thread(void); #endif /* _URCU_H */ diff --git a/urcutorture.c b/urcutorture.c index 258413a..75256f9 100644 --- a/urcutorture.c +++ b/urcutorture.c @@ -4,5 +4,6 @@ #include #include #include "api.h" +#define _LGPL_SOURCE #include "urcu.h" #include "rcutorture.h" -- 2.34.1