X-Git-Url: https://git.lttng.org/?p=urcu.git;a=blobdiff_plain;f=include%2Furcu%2Fstatic%2Fpointer.h;fp=include%2Furcu%2Fstatic%2Fpointer.h;h=891daed2994ba8a58c0b7f1bdfe10d886d37f91c;hp=732f8c078d7dcf585bde531cdacd51154818a211;hb=380f4b19052944e5f9990ba02b6fc6117ce446b4;hpb=7afcc7685532e13f39fc13d505c96a3e5581d4d3 diff --git a/include/urcu/static/pointer.h b/include/urcu/static/pointer.h index 732f8c0..891daed 100644 --- a/include/urcu/static/pointer.h +++ b/include/urcu/static/pointer.h @@ -49,14 +49,29 @@ extern "C" { * Inserts memory barriers on architectures that require them (currently only * Alpha) and documents which pointers are protected by RCU. * - * The compiler memory barrier in CMM_LOAD_SHARED() ensures that value-speculative - * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the - * data read before the pointer read by speculating the value of the pointer. - * Correct ordering is ensured because the pointer is read as a volatile access. - * This acts as a global side-effect operation, which forbids reordering of - * dependent memory operations. Note that such concern about dependency-breaking - * optimizations will eventually be taken care of by the "memory_order_consume" - * addition to forthcoming C++ standard. + * With C standards prior to C11/C++11, the compiler memory barrier in + * CMM_LOAD_SHARED() ensures that value-speculative optimizations (e.g. + * VSS: Value Speculation Scheduling) does not perform the data read + * before the pointer read by speculating the value of the pointer. + * Correct ordering is ensured because the pointer is read as a volatile + * access. This acts as a global side-effect operation, which forbids + * reordering of dependent memory operations. + * + * With C standards C11/C++11, concerns about dependency-breaking + * optimizations are taken care of by the "memory_order_consume" atomic + * load. + * + * By defining URCU_DEREFERENCE_USE_VOLATILE, the user requires use of + * volatile access to implement rcu_dereference rather than + * memory_order_consume load from the C11/C++11 standards. + * + * This may improve performance on weakly-ordered architectures where + * the compiler implements memory_order_consume as a + * memory_order_acquire, which is stricter than required by the + * standard. + * + * Note that using volatile accesses for rcu_dereference may cause + * LTO to generate incorrectly ordered code starting from C11/C++11. * * Should match rcu_assign_pointer() or rcu_xchg_pointer(). * @@ -64,10 +79,31 @@ extern "C" { * meets the 10-line criterion in LGPL, allowing this function to be * expanded directly in non-LGPL code. */ + +#ifdef URCU_DEREFERENCE_USE_VOLATILE +# define __rcu_dereference(p) CMM_LOAD_SHARED(p) +#else +# if defined (__cplusplus) +# if __cplusplus >= 201103L +# include +# define __rcu_dereference(p) ((std::atomic<__typeof__(p)>)(p)).load(std::memory_order_consume) +# else +# define __rcu_dereference(p) CMM_LOAD_SHARED(x) +# endif +# else +# if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) +# include +# define __rcu_dereference(p) atomic_load_explicit(&(p), memory_order_consume) +# else +# define __rcu_dereference(p) CMM_LOAD_SHARED(p) +# endif +# endif +#endif + #define _rcu_dereference(p) \ __extension__ \ ({ \ - __typeof__(p) _________p1 = CMM_LOAD_SHARED(p); \ + __typeof__(p) _________p1 = __rcu_dereference(p); \ cmm_smp_read_barrier_depends(); \ (_________p1); \ })