From 10544ee8af31afb239e3dfa71cb2fe09d3de3771 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 11 Nov 2020 17:28:06 -0500 Subject: [PATCH] Remove runtime dependency on liburcu shared objects Remove the runtime dependency on: - liblurcu-bp.so - liblurcu-cds.so - compat futex code. By integrating those into the lttng-ust project. For rculfhash, only the minimum pieces needed by lttng-ust are integrated (no auto-resize, no accounting). lttng-ust still requires liburcu at build time for header dependencies. Signed-off-by: Mathieu Desnoyers Change-Id: Idffb205b27b1bb0f972523c3ce3bdaf25bfe1710 --- configure.ac | 2 - include/Makefile.am | 6 +- include/lttng/tracepoint-rcu.h | 19 +- include/lttng/tracepoint.h | 36 +- include/lttng/urcu/pointer.h | 129 ++ include/lttng/urcu/static/pointer.h | 146 +++ include/lttng/urcu/static/urcu-ust.h | 211 ++++ include/lttng/urcu/urcu-ust.h | 105 ++ include/lttng/ust-events.h | 2 - include/lttng/ust-tracepoint-event.h | 8 +- liblttng-ust/Makefile.am | 16 +- liblttng-ust/compat_futex.c | 154 +++ liblttng-ust/futex.h | 189 +++ liblttng-ust/lttng-bytecode-interpreter.c | 4 +- liblttng-ust/lttng-bytecode-validator.c | 41 +- liblttng-ust/lttng-context.c | 11 +- liblttng-ust/lttng-events.c | 13 +- liblttng-ust/lttng-tracer-core.h | 4 + liblttng-ust/lttng-ust-abi.c | 3 +- liblttng-ust/lttng-ust-comm.c | 20 +- liblttng-ust/lttng-ust-urcu-pointer.c | 54 + liblttng-ust/lttng-ust-urcu.c | 733 ++++++++++++ liblttng-ust/rculfhash-internal.h | 179 +++ liblttng-ust/rculfhash-mm-chunk.c | 97 ++ liblttng-ust/rculfhash-mm-mmap.c | 214 ++++ liblttng-ust/rculfhash-mm-order.c | 90 ++ liblttng-ust/rculfhash.c | 1314 +++++++++++++++++++++ liblttng-ust/rculfhash.h | 467 ++++++++ liblttng-ust/tracepoint-internal.h | 6 +- liblttng-ust/tracepoint.c | 158 ++- libringbuffer/frontend_api.h | 1 - lttng-ust.pc.in | 1 - 32 files changed, 4326 insertions(+), 107 deletions(-) create mode 100644 include/lttng/urcu/pointer.h create mode 100644 include/lttng/urcu/static/pointer.h create mode 100644 include/lttng/urcu/static/urcu-ust.h create mode 100644 include/lttng/urcu/urcu-ust.h create mode 100644 liblttng-ust/compat_futex.c create mode 100644 liblttng-ust/futex.h create mode 100644 liblttng-ust/lttng-ust-urcu-pointer.c create mode 100644 liblttng-ust/lttng-ust-urcu.c create mode 100644 liblttng-ust/rculfhash-internal.h create mode 100644 liblttng-ust/rculfhash-mm-chunk.c create mode 100644 liblttng-ust/rculfhash-mm-mmap.c create mode 100644 liblttng-ust/rculfhash-mm-order.c create mode 100644 liblttng-ust/rculfhash.c create mode 100644 liblttng-ust/rculfhash.h diff --git a/configure.ac b/configure.ac index e5ac3200..f698e973 100644 --- a/configure.ac +++ b/configure.ac @@ -251,8 +251,6 @@ AM_CONDITIONAL([HAVE_DLINFO], [test "x${ac_cv_have_decl_RTLD_DI_LINKMAP}" = "xye # Require URCU >= 0.12 for DEFINE_URCU_TLS_INIT PKG_CHECK_MODULES([URCU], [liburcu >= 0.12]) -PKG_CHECK_MODULES([URCU_BP], [liburcu-bp >= 0.12]) -PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.12]) # numa.h integration AS_IF([test "x$NO_NUMA" = "x1"],[ diff --git a/include/Makefile.am b/include/Makefile.am index 23a165ae..40990ff6 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -27,7 +27,11 @@ nobase_include_HEADERS = \ lttng/ust-getcpu.h \ lttng/ust-elf.h \ lttng/counter-config.h \ - lttng/bitmap.h + lttng/bitmap.h \ + lttng/urcu/pointer.h \ + lttng/urcu/urcu-ust.h \ + lttng/urcu/static/pointer.h \ + lttng/urcu/static/urcu-ust.h # note: usterr-signal-safe.h, core.h and share.h need namespace cleanup. diff --git a/include/lttng/tracepoint-rcu.h b/include/lttng/tracepoint-rcu.h index 95d60493..3378d924 100644 --- a/include/lttng/tracepoint-rcu.h +++ b/include/lttng/tracepoint-rcu.h @@ -24,26 +24,27 @@ */ #include +#include #ifdef _LGPL_SOURCE -#include +#include -#define tp_rcu_read_lock_bp urcu_bp_read_lock -#define tp_rcu_read_unlock_bp urcu_bp_read_unlock -#define tp_rcu_dereference_bp rcu_dereference +#define tp_rcu_read_lock lttng_ust_urcu_read_lock +#define tp_rcu_read_unlock lttng_ust_urcu_read_unlock +#define tp_rcu_dereference lttng_ust_rcu_dereference #define TP_RCU_LINK_TEST() 1 #else /* _LGPL_SOURCE */ -#define tp_rcu_read_lock_bp tracepoint_dlopen_ptr->rcu_read_lock_sym_bp -#define tp_rcu_read_unlock_bp tracepoint_dlopen_ptr->rcu_read_unlock_sym_bp +#define tp_rcu_read_lock tracepoint_dlopen_ptr->rcu_read_lock_sym +#define tp_rcu_read_unlock tracepoint_dlopen_ptr->rcu_read_unlock_sym -#define tp_rcu_dereference_bp(p) \ +#define tp_rcu_dereference(p) \ URCU_FORCE_CAST(__typeof__(p), \ - tracepoint_dlopen_ptr->rcu_dereference_sym_bp(URCU_FORCE_CAST(void *, p))) + tracepoint_dlopen_ptr->rcu_dereference_sym(URCU_FORCE_CAST(void *, p))) -#define TP_RCU_LINK_TEST() (tracepoint_dlopen_ptr && tp_rcu_read_lock_bp) +#define TP_RCU_LINK_TEST() (tracepoint_dlopen_ptr && tp_rcu_read_lock) #endif /* _LGPL_SOURCE */ diff --git a/include/lttng/tracepoint.h b/include/lttng/tracepoint.h index 1cf02188..41ac5a28 100644 --- a/include/lttng/tracepoint.h +++ b/include/lttng/tracepoint.h @@ -180,12 +180,12 @@ void __tracepoint_cb_##_provider##___##_name(_TP_ARGS_PROTO(__VA_ARGS__)); \ static \ void __tracepoint_cb_##_provider##___##_name(_TP_ARGS_PROTO(__VA_ARGS__)) \ { \ - struct lttng_ust_tracepoint_probe *__tp_probe; \ + struct lttng_ust_tracepoint_probe *__tp_probe; \ \ if (caa_unlikely(!TP_RCU_LINK_TEST())) \ return; \ - tp_rcu_read_lock_bp(); \ - __tp_probe = tp_rcu_dereference_bp(__tracepoint_##_provider##___##_name.probes); \ + tp_rcu_read_lock(); \ + __tp_probe = tp_rcu_dereference(__tracepoint_##_provider##___##_name.probes); \ if (caa_unlikely(!__tp_probe)) \ goto end; \ do { \ @@ -196,7 +196,7 @@ void __tracepoint_cb_##_provider##___##_name(_TP_ARGS_PROTO(__VA_ARGS__)) \ (_TP_ARGS_DATA_VAR(__VA_ARGS__)); \ } while ((++__tp_probe)->func); \ end: \ - tp_rcu_read_unlock_bp(); \ + tp_rcu_read_unlock(); \ } \ static inline lttng_ust_notrace \ void __tracepoint_register_##_provider##___##_name(char *name, \ @@ -233,9 +233,9 @@ struct lttng_ust_tracepoint_dlopen { int (*tracepoint_register_lib)(struct lttng_ust_tracepoint * const *tracepoints_start, int tracepoints_count); int (*tracepoint_unregister_lib)(struct lttng_ust_tracepoint * const *tracepoints_start); - void (*rcu_read_lock_sym_bp)(void); - void (*rcu_read_unlock_sym_bp)(void); - void *(*rcu_dereference_sym_bp)(void *p); + void (*rcu_read_lock_sym)(void); + void (*rcu_read_unlock_sym)(void); + void *(*rcu_dereference_sym)(void *p); }; extern struct lttng_ust_tracepoint_dlopen tracepoint_dlopen; @@ -308,21 +308,21 @@ __tracepoint__init_urcu_sym(void) * Symbols below are needed by tracepoint call sites and probe * providers. */ - if (!tracepoint_dlopen_ptr->rcu_read_lock_sym_bp) - tracepoint_dlopen_ptr->rcu_read_lock_sym_bp = + if (!tracepoint_dlopen_ptr->rcu_read_lock_sym) + tracepoint_dlopen_ptr->rcu_read_lock_sym = URCU_FORCE_CAST(void (*)(void), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tp_rcu_read_lock_bp")); - if (!tracepoint_dlopen_ptr->rcu_read_unlock_sym_bp) - tracepoint_dlopen_ptr->rcu_read_unlock_sym_bp = + "tp_rcu_read_lock")); + if (!tracepoint_dlopen_ptr->rcu_read_unlock_sym) + tracepoint_dlopen_ptr->rcu_read_unlock_sym = URCU_FORCE_CAST(void (*)(void), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tp_rcu_read_unlock_bp")); - if (!tracepoint_dlopen_ptr->rcu_dereference_sym_bp) - tracepoint_dlopen_ptr->rcu_dereference_sym_bp = + "tp_rcu_read_unlock")); + if (!tracepoint_dlopen_ptr->rcu_dereference_sym) + tracepoint_dlopen_ptr->rcu_dereference_sym = URCU_FORCE_CAST(void *(*)(void *p), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tp_rcu_dereference_sym_bp")); + "tp_rcu_dereference_sym")); } #else static inline void lttng_ust_notrace @@ -479,11 +479,11 @@ __tracepoints__ptrs_init(void) tracepoint_dlopen_ptr->tracepoint_register_lib = URCU_FORCE_CAST(int (*)(struct lttng_ust_tracepoint * const *, int), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tracepoint_register_lib")); + "tracepoint_register_lib2")); tracepoint_dlopen_ptr->tracepoint_unregister_lib = URCU_FORCE_CAST(int (*)(struct lttng_ust_tracepoint * const *), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tracepoint_unregister_lib")); + "tracepoint_unregister_lib2")); tracepoint_destructors_syms_ptr->old_tracepoint_disable_destructors = URCU_FORCE_CAST(int *, dlsym(tracepoint_dlopen_ptr->liblttngust_handle, diff --git a/include/lttng/urcu/pointer.h b/include/lttng/urcu/pointer.h new file mode 100644 index 00000000..21bc2f4c --- /dev/null +++ b/include/lttng/urcu/pointer.h @@ -0,0 +1,129 @@ +#ifndef _LTTNG_UST_URCU_POINTER_H +#define _LTTNG_UST_URCU_POINTER_H + +/* + * lttng/urcu/pointer.h + * + * Userspace RCU header. Operations on pointers. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_LGPL_SOURCE) || defined(LTTNG_UST_URCU_INLINE_SMALL_FUNCTIONS) + +#include + +/* + * lttng_ust_rcu_dereference(ptr) + * + * Fetch a RCU-protected pointer. Typically used to copy the variable ptr to a + * local variable. + */ +#define lttng_ust_rcu_dereference _lttng_ust_rcu_dereference + +/* + * type *lttng_ust_rcu_cmpxchg_pointer(type **ptr, type *new, type *old) + * type *lttng_ust_rcu_xchg_pointer(type **ptr, type *new) + * void lttng_ust_rcu_set_pointer(type **ptr, type *new) + * + * RCU pointer updates. + * @ptr: address of the pointer to modify + * @new: new pointer value + * @old: old pointer value (expected) + * + * return: old pointer value + */ +#define lttng_ust_rcu_cmpxchg_pointer _lttng_ust_rcu_cmpxchg_pointer +#define lttng_ust_rcu_xchg_pointer _lttng_ust_rcu_xchg_pointer +#define lttng_ust_rcu_set_pointer _lttng_ust_rcu_set_pointer + +#else /* !(defined(_LGPL_SOURCE) || defined(LTTNG_UST_URCU_INLINE_SMALL_FUNCTIONS)) */ + +extern void *lttng_ust_rcu_dereference_sym(void *p); +#define lttng_ust_rcu_dereference(p) \ + __extension__ \ + ({ \ + __typeof__(p) _________p1 = URCU_FORCE_CAST(__typeof__(p), \ + lttng_ust_rcu_dereference_sym(URCU_FORCE_CAST(void *, p))); \ + (_________p1); \ + }) + +extern void *lttng_ust_rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new); +#define lttng_ust_rcu_cmpxchg_pointer(p, old, _new) \ + __extension__ \ + ({ \ + __typeof__(*(p)) _________pold = (old); \ + __typeof__(*(p)) _________pnew = (_new); \ + __typeof__(*(p)) _________p1 = URCU_FORCE_CAST(__typeof__(*(p)), \ + lttng_ust_rcu_cmpxchg_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pold, \ + _________pnew)); \ + (_________p1); \ + }) + +extern void *lttng_ust_rcu_xchg_pointer_sym(void **p, void *v); +#define lttng_ust_rcu_xchg_pointer(p, v) \ + __extension__ \ + ({ \ + __typeof__(*(p)) _________pv = (v); \ + __typeof__(*(p)) _________p1 = URCU_FORCE_CAST(__typeof__(*(p)), \ + lttng_ust_rcu_xchg_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pv)); \ + (_________p1); \ + }) + +/* + * Note: lttng_ust_rcu_set_pointer_sym returns @v because we don't want to break + * the ABI. At the API level, lttng_ust_rcu_set_pointer() now returns void. Use of + * the return value is therefore deprecated, and will cause a build + * error. + */ +extern void *lttng_ust_rcu_set_pointer_sym(void **p, void *v); +#define lttng_ust_rcu_set_pointer(p, v) \ + do { \ + __typeof__(*(p)) _________pv = (v); \ + (void) lttng_ust_rcu_set_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pv); \ + } while (0) + +#endif /* !(defined(_LGPL_SOURCE) || defined(LTTNG_UST_URCU_INLINE_SMALL_FUNCTIONS)) */ + +/* + * void lttng_ust_rcu_assign_pointer(type *ptr, type *new) + * + * Same as lttng_ust_rcu_set_pointer, but takes the pointer to assign to rather than its + * address as first parameter. Provided for compatibility with the Linux kernel + * RCU semantic. + */ +#define lttng_ust_rcu_assign_pointer(p, v) lttng_ust_rcu_set_pointer((&p), (v)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_URCU_POINTER_H */ diff --git a/include/lttng/urcu/static/pointer.h b/include/lttng/urcu/static/pointer.h new file mode 100644 index 00000000..dcb85785 --- /dev/null +++ b/include/lttng/urcu/static/pointer.h @@ -0,0 +1,146 @@ +#ifndef _LTTNG_URCU_POINTER_STATIC_H +#define _LTTNG_URCU_POINTER_STATIC_H + +/* + * lttng/urcu/static/pointer.h + * + * Userspace RCU header. Operations on pointers. + * + * TO BE INCLUDED ONLY IN CODE THAT IS TO BE RECOMPILED ON EACH LIBURCU + * RELEASE. See urcu.h for linking dynamically with the userspace rcu library. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable + * into a RCU read-side critical section. The pointer can later be safely + * dereferenced within the critical section. + * + * This ensures that the pointer copy is invariant thorough the whole critical + * section. + * + * Inserts memory barriers on architectures that require them (currently only + * Alpha) and documents which pointers are protected by RCU. + * + * The compiler memory barrier in CMM_LOAD_SHARED() ensures that value-speculative + * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the + * data read before the pointer read by speculating the value of the pointer. + * Correct ordering is ensured because the pointer is read as a volatile access. + * This acts as a global side-effect operation, which forbids reordering of + * dependent memory operations. Note that such concern about dependency-breaking + * optimizations will eventually be taken care of by the "memory_order_consume" + * addition to forthcoming C++ standard. + * + * Should match rcu_assign_pointer() or rcu_xchg_pointer(). + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_dereference(p) \ + __extension__ \ + ({ \ + __typeof__(p) _________p1 = CMM_LOAD_SHARED(p); \ + cmm_smp_read_barrier_depends(); \ + (_________p1); \ + }) + +/** + * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer + * is as expected by "old". If succeeds, returns the previous pointer to the + * data structure, which can be safely freed after waiting for a quiescent state + * using synchronize_rcu(). If fails (unexpected value), returns old (which + * should not be freed !). + * + * uatomic_cmpxchg() acts as both release and acquire barriers. + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_cmpxchg_pointer(p, old, _new) \ + __extension__ \ + ({ \ + __typeof__(*p) _________pold = (old); \ + __typeof__(*p) _________pnew = (_new); \ + uatomic_cmpxchg(p, _________pold, _________pnew); \ + }) + +/** + * _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous + * pointer to the data structure, which can be safely freed after waiting for a + * quiescent state using synchronize_rcu(). + * + * uatomic_xchg() acts as both release and acquire barriers. + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_xchg_pointer(p, v) \ + __extension__ \ + ({ \ + __typeof__(*p) _________pv = (v); \ + uatomic_xchg(p, _________pv); \ + }) + + +#define _lttng_ust_rcu_set_pointer(p, v) \ + do { \ + __typeof__(*p) _________pv = (v); \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + cmm_wmb(); \ + uatomic_set(p, _________pv); \ + } while (0) + +/** + * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure + * meant to be read by RCU read-side critical sections. Returns the assigned + * value. + * + * Documents which pointers will be dereferenced by RCU read-side critical + * sections and adds the required memory barriers on architectures requiring + * them. It also makes sure the compiler does not reorder code initializing the + * data structure before its publication. + * + * Should match rcu_dereference(). + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_assign_pointer(p, v) _lttng_ust_rcu_set_pointer(&(p), v) + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_URCU_POINTER_STATIC_H */ diff --git a/include/lttng/urcu/static/urcu-ust.h b/include/lttng/urcu/static/urcu-ust.h new file mode 100644 index 00000000..cbe27c09 --- /dev/null +++ b/include/lttng/urcu/static/urcu-ust.h @@ -0,0 +1,211 @@ +#ifndef _LTTNG_UST_URCU_STATIC_H +#define _LTTNG_UST_URCU_STATIC_H + +/* + * urcu-ust-static.h + * + * Userspace RCU header. + * + * TO BE INCLUDED ONLY IN CODE THAT IS TO BE RECOMPILED ON EACH LIBURCU + * RELEASE. See urcu.h for linking dynamically with the userspace rcu library. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This code section can only be included in LGPL 2.1 compatible source code. + * See below for the function call wrappers which can be used in code meant to + * be only linked with the Userspace RCU library. This comes with a small + * performance degradation on the read-side due to the added function calls. + * This is required to permit relinking with newer versions of the library. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +enum lttng_ust_urcu_state { + LTTNG_UST_URCU_READER_ACTIVE_CURRENT, + LTTNG_UST_URCU_READER_ACTIVE_OLD, + LTTNG_UST_URCU_READER_INACTIVE, +}; + +/* + * The trick here is that LTTNG_UST_URCU_GP_CTR_PHASE must be a multiple of 8 so we can use a + * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. + */ +#define LTTNG_UST_URCU_GP_COUNT (1UL << 0) +/* Use the amount of bits equal to half of the architecture long size */ +#define LTTNG_UST_URCU_GP_CTR_PHASE (1UL << (sizeof(long) << 2)) +#define LTTNG_UST_URCU_GP_CTR_NEST_MASK (LTTNG_UST_URCU_GP_CTR_PHASE - 1) + +/* + * Used internally by _lttng_ust_urcu_read_lock. + */ +extern void lttng_ust_urcu_register(void); + +struct lttng_ust_urcu_gp { + /* + * Global grace period counter. + * Contains the current LTTNG_UST_URCU_GP_CTR_PHASE. + * Also has a LTTNG_UST_URCU_GP_COUNT of 1, to accelerate the reader fast path. + * Written to only by writer with mutex taken. + * Read by both writer and readers. + */ + unsigned long ctr; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +extern struct lttng_ust_urcu_gp lttng_ust_urcu_gp; + +struct lttng_ust_urcu_reader { + /* Data used by both reader and lttng_ust_urcu_synchronize_rcu() */ + unsigned long ctr; + /* Data used for registry */ + struct cds_list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + pthread_t tid; + int alloc; /* registry entry allocated */ +}; + +/* + * Bulletproof version keeps a pointer to a registry not part of the TLS. + * Adds a pointer dereference on the read-side, but won't require to unregister + * the reader thread. + */ +extern DECLARE_URCU_TLS(struct lttng_ust_urcu_reader *, lttng_ust_urcu_reader); + +#ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER +#define lttng_ust_urcu_has_sys_membarrier 1 +#else +extern int lttng_ust_urcu_has_sys_membarrier; +#endif + +static inline void lttng_ust_urcu_smp_mb_slave(void) +{ + if (caa_likely(lttng_ust_urcu_has_sys_membarrier)) + cmm_barrier(); + else + cmm_smp_mb(); +} + +static inline enum lttng_ust_urcu_state lttng_ust_urcu_reader_state(unsigned long *ctr) +{ + unsigned long v; + + if (ctr == NULL) + return LTTNG_UST_URCU_READER_INACTIVE; + /* + * Make sure both tests below are done on the same version of *value + * to insure consistency. + */ + v = CMM_LOAD_SHARED(*ctr); + if (!(v & LTTNG_UST_URCU_GP_CTR_NEST_MASK)) + return LTTNG_UST_URCU_READER_INACTIVE; + if (!((v ^ lttng_ust_urcu_gp.ctr) & LTTNG_UST_URCU_GP_CTR_PHASE)) + return LTTNG_UST_URCU_READER_ACTIVE_CURRENT; + return LTTNG_UST_URCU_READER_ACTIVE_OLD; +} + +/* + * Helper for _lttng_ust_urcu_read_lock(). The format of lttng_ust_urcu_gp.ctr (as well as + * the per-thread rcu_reader.ctr) has the upper bits containing a count of + * _lttng_ust_urcu_read_lock() nesting, and a lower-order bit that contains either zero + * or LTTNG_UST_URCU_GP_CTR_PHASE. The smp_mb_slave() ensures that the accesses in + * _lttng_ust_urcu_read_lock() happen before the subsequent read-side critical section. + */ +static inline void _lttng_ust_urcu_read_lock_update(unsigned long tmp) +{ + if (caa_likely(!(tmp & LTTNG_UST_URCU_GP_CTR_NEST_MASK))) { + _CMM_STORE_SHARED(URCU_TLS(lttng_ust_urcu_reader)->ctr, _CMM_LOAD_SHARED(lttng_ust_urcu_gp.ctr)); + lttng_ust_urcu_smp_mb_slave(); + } else + _CMM_STORE_SHARED(URCU_TLS(lttng_ust_urcu_reader)->ctr, tmp + LTTNG_UST_URCU_GP_COUNT); +} + +/* + * Enter an RCU read-side critical section. + * + * The first cmm_barrier() call ensures that the compiler does not reorder + * the body of _lttng_ust_urcu_read_lock() with a mutex. + * + * This function and its helper are both less than 10 lines long. The + * intent is that this function meets the 10-line criterion in LGPL, + * allowing this function to be invoked directly from non-LGPL code. + */ +static inline void _lttng_ust_urcu_read_lock(void) +{ + unsigned long tmp; + + if (caa_unlikely(!URCU_TLS(lttng_ust_urcu_reader))) + lttng_ust_urcu_register(); /* If not yet registered. */ + cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ + tmp = URCU_TLS(lttng_ust_urcu_reader)->ctr; + urcu_assert((tmp & LTTNG_UST_URCU_GP_CTR_NEST_MASK) != LTTNG_UST_URCU_GP_CTR_NEST_MASK); + _lttng_ust_urcu_read_lock_update(tmp); +} + +/* + * Exit an RCU read-side critical section. This function is less than + * 10 lines of code, and is intended to be usable by non-LGPL code, as + * called out in LGPL. + */ +static inline void _lttng_ust_urcu_read_unlock(void) +{ + unsigned long tmp; + + tmp = URCU_TLS(lttng_ust_urcu_reader)->ctr; + urcu_assert(tmp & LTTNG_UST_URCU_GP_CTR_NEST_MASK); + /* Finish using rcu before decrementing the pointer. */ + lttng_ust_urcu_smp_mb_slave(); + _CMM_STORE_SHARED(URCU_TLS(lttng_ust_urcu_reader)->ctr, tmp - LTTNG_UST_URCU_GP_COUNT); + cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ +} + +/* + * Returns whether within a RCU read-side critical section. + * + * This function is less than 10 lines long. The intent is that this + * function meets the 10-line criterion for LGPL, allowing this function + * to be invoked directly from non-LGPL code. + */ +static inline int _lttng_ust_urcu_read_ongoing(void) +{ + if (caa_unlikely(!URCU_TLS(lttng_ust_urcu_reader))) + lttng_ust_urcu_register(); /* If not yet registered. */ + return URCU_TLS(lttng_ust_urcu_reader)->ctr & LTTNG_UST_URCU_GP_CTR_NEST_MASK; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_URCU_STATIC_H */ diff --git a/include/lttng/urcu/urcu-ust.h b/include/lttng/urcu/urcu-ust.h new file mode 100644 index 00000000..421e0cca --- /dev/null +++ b/include/lttng/urcu/urcu-ust.h @@ -0,0 +1,105 @@ +#ifndef _LTTNG_UST_URCU_H +#define _LTTNG_UST_URCU_H + +/* + * urcu-ust.h + * + * Userspace RCU header for LTTng-UST. Derived from liburcu + * "bulletproof" flavor. + * + * Slower RCU read-side adapted for tracing library. Does not require thread + * registration nor unregistration. Also signal-safe. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * LGPL-compatible code should include this header with : + * + * #define _LGPL_SOURCE + * #include + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * See lttng/urcu/pointer.h and lttng/urcu/static/pointer.h for pointer + * publication headers. + */ +#include + +#ifdef _LGPL_SOURCE + +#include + +/* + * Mappings for static use of the userspace RCU library. + * Should only be used in LGPL-compatible code. + */ + +/* + * lttng_ust_urcu_read_lock() + * lttng_ust_urcu_read_unlock() + * + * Mark the beginning and end of a read-side critical section. + */ +#define lttng_ust_urcu_read_lock _lttng_ust_urcu_read_lock +#define lttng_ust_urcu_read_unlock _lttng_ust_urcu_read_unlock +#define lttng_ust_urcu_read_ongoing _lttng_ust_urcu_read_ongoing + +#else /* !_LGPL_SOURCE */ + +/* + * library wrappers to be used by non-LGPL compatible source code. + * See LGPL-only urcu/static/pointer.h for documentation. + */ + +extern void lttng_ust_urcu_read_lock(void); +extern void lttng_ust_urcu_read_unlock(void); +extern int lttng_ust_urcu_read_ongoing(void); + +#endif /* !_LGPL_SOURCE */ + +extern void lttng_ust_urcu_synchronize_rcu(void); + +/* + * lttng_ust_urcu_before_fork, lttng_ust_urcu_after_fork_parent and + * lttng_ust_urcu_after_fork_child should be called around fork() system + * calls when the child process is not expected to immediately perform + * an exec(). For pthread users, see pthread_atfork(3). + */ +extern void lttng_ust_urcu_before_fork(void); +extern void lttng_ust_urcu_after_fork_parent(void); +extern void lttng_ust_urcu_after_fork_child(void); + +/* + * In the UST version, thread registration is performed lazily, but it can be + * forced by issuing an explicit lttng_ust_urcu_register_thread(). + */ +extern void lttng_ust_urcu_register_thread(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_URCU_H */ diff --git a/include/lttng/ust-events.h b/include/lttng/ust-events.h index 19726f88..7e70ea40 100644 --- a/include/lttng/ust-events.h +++ b/include/lttng/ust-events.h @@ -781,8 +781,6 @@ struct lttng_counter *lttng_ust_counter_create( const char *counter_transport_name, size_t number_dimensions, const struct lttng_counter_dimension *dimensions); -void synchronize_trace(void); - int lttng_probe_register(struct lttng_probe_desc *desc); void lttng_probe_unregister(struct lttng_probe_desc *desc); void lttng_probe_provider_unregister_events(struct lttng_probe_desc *desc); diff --git a/include/lttng/ust-tracepoint-event.h b/include/lttng/ust-tracepoint-event.h index 0a397e50..11025204 100644 --- a/include/lttng/ust-tracepoint-event.h +++ b/include/lttng/ust-tracepoint-event.h @@ -36,9 +36,9 @@ #undef tp_list_for_each_entry_rcu #define tp_list_for_each_entry_rcu(pos, head, member) \ - for (pos = cds_list_entry(tp_rcu_dereference_bp((head)->next), __typeof__(*pos), member); \ + for (pos = cds_list_entry(tp_rcu_dereference((head)->next), __typeof__(*pos), member); \ &pos->member != (head); \ - pos = cds_list_entry(tp_rcu_dereference_bp(pos->member.next), __typeof__(*pos), member)) + pos = cds_list_entry(tp_rcu_dereference(pos->member.next), __typeof__(*pos), member)) /* * TRACEPOINT_EVENT_CLASS declares a class of tracepoints receiving the @@ -892,8 +892,8 @@ void __event_probe__##_provider##___##_name(_TP_ARGS_DATA_PROTO(_args)) \ __event_align = __event_get_align__##_provider##___##_name(_TP_ARGS_VAR(_args)); \ memset(&__lttng_ctx, 0, sizeof(__lttng_ctx)); \ __lttng_ctx.event = __event; \ - __lttng_ctx.chan_ctx = tp_rcu_dereference_bp(__chan->ctx); \ - __lttng_ctx.event_ctx = tp_rcu_dereference_bp(__event->ctx); \ + __lttng_ctx.chan_ctx = tp_rcu_dereference(__chan->ctx); \ + __lttng_ctx.event_ctx = tp_rcu_dereference(__event->ctx); \ lib_ring_buffer_ctx_init(&__ctx, __chan->chan, __event, __event_len, \ __event_align, -1, __chan->handle, &__lttng_ctx); \ __ctx.ip = _TP_IP_PARAM(TP_IP_PARAM); \ diff --git a/liblttng-ust/Makefile.am b/liblttng-ust/Makefile.am index fab09251..2b25cfc5 100644 --- a/liblttng-ust/Makefile.am +++ b/liblttng-ust/Makefile.am @@ -10,10 +10,11 @@ liblttng_ust_tracepoint_la_SOURCES = \ tracepoint-internal.h \ lttng-tracer-core.h \ jhash.h \ - error.h + error.h \ + lttng-ust-urcu.c \ + lttng-ust-urcu-pointer.c liblttng_ust_tracepoint_la_LIBADD = \ - $(URCU_BP_LIBS) \ $(top_builddir)/snprintf/libustsnprintf.la \ $(DL_LIBS) @@ -79,7 +80,15 @@ liblttng_ust_runtime_la_SOURCES = \ string-utils.h \ event-notifier-notification.c \ ns.h \ - creds.h + creds.h \ + rculfhash.c \ + rculfhash.h \ + rculfhash-internal.h \ + rculfhash-mm-chunk.c \ + rculfhash-mm-mmap.c \ + rculfhash-mm-order.c \ + compat_futex.c \ + futex.h if HAVE_PERF_EVENT liblttng_ust_runtime_la_SOURCES += \ @@ -116,7 +125,6 @@ liblttng_ust_support_la_LIBADD = \ liblttng_ust_la_LIBADD = \ -lrt \ - $(URCU_CDS_LIBS) \ $(top_builddir)/snprintf/libustsnprintf.la \ $(top_builddir)/liblttng-ust-comm/liblttng-ust-comm.la \ liblttng-ust-tracepoint.la \ diff --git a/liblttng-ust/compat_futex.c b/liblttng-ust/compat_futex.c new file mode 100644 index 00000000..9517eda2 --- /dev/null +++ b/liblttng-ust/compat_futex.c @@ -0,0 +1,154 @@ +/* + * compat_futex.c + * + * Userspace RCU library - sys_futex compatibility code + * + * Copyright (c) 2009 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "futex.h" + +/* + * Using attribute "weak" for __lttng_ust_compat_futex_lock and + * __lttng_ust_compat_futex_cond. Those are globally visible by the entire + * program, even though many shared objects may have their own version. + * The first version that gets loaded will be used by the entire program + * (executable and all shared objects). + */ + +__attribute__((weak)) +pthread_mutex_t __lttng_ust_compat_futex_lock = PTHREAD_MUTEX_INITIALIZER; +__attribute__((weak)) +pthread_cond_t __lttng_ust_compat_futex_cond = PTHREAD_COND_INITIALIZER; + +/* + * _NOT SIGNAL-SAFE_. pthread_cond is not signal-safe anyway. Though. + * For now, timeout, uaddr2 and val3 are unused. + * Waiter will relinquish the CPU until woken up. + */ + +int lttng_ust_compat_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret = 0, lockret; + + /* + * Check if NULL. Don't let users expect that they are taken into + * account. + */ + assert(!timeout); + assert(!uaddr2); + assert(!val3); + + /* + * memory barriers to serialize with the previous uaddr modification. + */ + cmm_smp_mb(); + + lockret = pthread_mutex_lock(&__lttng_ust_compat_futex_lock); + if (lockret) { + errno = lockret; + ret = -1; + goto end; + } + switch (op) { + case FUTEX_WAIT: + /* + * Wait until *uaddr is changed to something else than "val". + * Comparing *uaddr content against val figures out which + * thread has been awakened. + */ + while (CMM_LOAD_SHARED(*uaddr) == val) + pthread_cond_wait(&__lttng_ust_compat_futex_cond, + &__lttng_ust_compat_futex_lock); + break; + case FUTEX_WAKE: + /* + * Each wake is sending a broadcast, thus attempting wakeup of + * all awaiting threads, independently of their respective + * uaddr. + */ + pthread_cond_broadcast(&__lttng_ust_compat_futex_cond); + break; + default: + errno = EINVAL; + ret = -1; + } + lockret = pthread_mutex_unlock(&__lttng_ust_compat_futex_lock); + if (lockret) { + errno = lockret; + ret = -1; + } +end: + return ret; +} + +/* + * _ASYNC SIGNAL-SAFE_. + * For now, timeout, uaddr2 and val3 are unused. + * Waiter will busy-loop trying to read the condition. + * It is OK to use compat_futex_async() on a futex address on which + * futex() WAKE operations are also performed. + */ + +int lttng_ust_compat_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret = 0; + + /* + * Check if NULL. Don't let users expect that they are taken into + * account. + */ + assert(!timeout); + assert(!uaddr2); + assert(!val3); + + /* + * Ensure previous memory operations on uaddr have completed. + */ + cmm_smp_mb(); + + switch (op) { + case FUTEX_WAIT: + while (CMM_LOAD_SHARED(*uaddr) == val) { + if (poll(NULL, 0, 10) < 0) { + ret = -1; + /* Keep poll errno. Caller handles EINTR. */ + goto end; + } + } + break; + case FUTEX_WAKE: + break; + default: + errno = EINVAL; + ret = -1; + } +end: + return ret; +} diff --git a/liblttng-ust/futex.h b/liblttng-ust/futex.h new file mode 100644 index 00000000..902ceecf --- /dev/null +++ b/liblttng-ust/futex.h @@ -0,0 +1,189 @@ +#ifndef _LTTNG_UST_FUTEX_H +#define _LTTNG_UST_FUTEX_H + +/* + * liblttng-ust/futex.h + * + * Userspace RCU - sys_futex/compat_futex header. + * + * Copyright 2011-2012 - Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 + +/* + * sys_futex compatibility header. + * Use *only* *either of* futex_noasync OR futex_async on a given address. + * + * futex_noasync cannot be executed in signal handlers, but ensures that + * it will be put in a wait queue even in compatibility mode. + * + * futex_async is signal-handler safe for the wakeup. It uses polling + * on the wait-side in compatibility mode. + * + * BEWARE: sys_futex() FUTEX_WAIT may return early if interrupted + * (returns EINTR). + */ + +extern int lttng_ust_compat_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3); +extern int lttng_ust_compat_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3); + +#if (defined(__linux__) && defined(__NR_futex)) + +#include +#include +#include +#include + +static inline int lttng_ust_futex(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return syscall(__NR_futex, uaddr, op, val, timeout, + uaddr2, val3); +} + +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret; + + ret = lttng_ust_futex(uaddr, op, val, timeout, uaddr2, val3); + if (caa_unlikely(ret < 0 && errno == ENOSYS)) { + /* + * The fallback on ENOSYS is the async-safe version of + * the compat futex implementation, because the + * async-safe compat implementation allows being used + * concurrently with calls to futex(). Indeed, sys_futex + * FUTEX_WAIT, on some architectures (mips and parisc), + * within a given process, spuriously return ENOSYS due + * to signal restart bugs on some kernel versions. + */ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, + uaddr2, val3); + } + return ret; + +} + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret; + + ret = lttng_ust_futex(uaddr, op, val, timeout, uaddr2, val3); + if (caa_unlikely(ret < 0 && errno == ENOSYS)) { + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, + uaddr2, val3); + } + return ret; +} + +#elif defined(__FreeBSD__) + +#include +#include + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int umtx_op; + void *umtx_uaddr = NULL, *umtx_uaddr2 = NULL; + struct _umtx_time umtx_timeout = { + ._flags = UMTX_ABSTIME, + ._clockid = CLOCK_MONOTONIC, + }; + + switch (op) { + case FUTEX_WAIT: + /* On FreeBSD, a "u_int" is a 32-bit integer. */ + umtx_op = UMTX_OP_WAIT_UINT; + if (timeout != NULL) { + umtx_timeout._timeout = *timeout; + umtx_uaddr = (void *) sizeof(umtx_timeout); + umtx_uaddr2 = (void *) &umtx_timeout; + } + break; + case FUTEX_WAKE: + umtx_op = UMTX_OP_WAKE; + break; + default: + errno = EINVAL; + return -1; + } + + return _umtx_op(uaddr, umtx_op, (uint32_t) val, umtx_uaddr, + umtx_uaddr2); +} + +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +#elif defined(__CYGWIN__) + +/* + * The futex_noasync compat code uses a weak symbol to share state across + * different shared object which is not possible on Windows with the + * Portable Executable format. Use the async compat code for both cases. + */ +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +#else + +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_noasync(uaddr, op, val, timeout, uaddr2, val3); +} + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_FUTEX_H */ diff --git a/liblttng-ust/lttng-bytecode-interpreter.c b/liblttng-ust/lttng-bytecode-interpreter.c index bba3cac8..637fdc1a 100644 --- a/liblttng-ust/lttng-bytecode-interpreter.c +++ b/liblttng-ust/lttng-bytecode-interpreter.c @@ -27,9 +27,9 @@ #define _LGPL_SOURCE #include #include -#include #include +#include #include #include @@ -771,7 +771,7 @@ uint64_t bytecode_interpret(void *interpreter_data, struct lttng_interpreter_output *output) { struct bytecode_runtime *bytecode = interpreter_data; - struct lttng_ctx *ctx = rcu_dereference(*bytecode->p.pctx); + struct lttng_ctx *ctx = lttng_ust_rcu_dereference(*bytecode->p.pctx); void *pc, *next_pc, *start_pc; int ret = -EINVAL; uint64_t retval = 0; diff --git a/liblttng-ust/lttng-bytecode-validator.c b/liblttng-ust/lttng-bytecode-validator.c index f60c9367..198d2fab 100644 --- a/liblttng-ust/lttng-bytecode-validator.c +++ b/liblttng-ust/lttng-bytecode-validator.c @@ -29,8 +29,7 @@ #include #include -#include -#include +#include "rculfhash.h" #include "lttng-bytecode.h" #include "lttng-hash-helper.h" @@ -49,7 +48,7 @@ /* merge point table node */ struct lfht_mp_node { - struct cds_lfht_node node; + struct lttng_ust_lfht_node node; /* Context at merge point */ struct vstack stack; @@ -60,7 +59,7 @@ static unsigned long lttng_hash_seed; static unsigned int lttng_hash_seed_ready; static -int lttng_hash_match(struct cds_lfht_node *node, const void *key) +int lttng_hash_match(struct lttng_ust_lfht_node *node, const void *key) { struct lfht_mp_node *mp_node = caa_container_of(node, struct lfht_mp_node, node); @@ -92,14 +91,14 @@ int merge_points_compare(const struct vstack *stacka, } static -int merge_point_add_check(struct cds_lfht *ht, unsigned long target_pc, +int merge_point_add_check(struct lttng_ust_lfht *ht, unsigned long target_pc, const struct vstack *stack) { struct lfht_mp_node *node; unsigned long hash = lttng_hash_mix((const char *) target_pc, sizeof(target_pc), lttng_hash_seed); - struct cds_lfht_node *ret; + struct lttng_ust_lfht_node *ret; dbg_printf("Bytecode: adding merge point at offset %lu, hash %lu\n", target_pc, hash); @@ -108,7 +107,7 @@ int merge_point_add_check(struct cds_lfht *ht, unsigned long target_pc, return -ENOMEM; node->target_pc = target_pc; memcpy(&node->stack, stack, sizeof(node->stack)); - ret = cds_lfht_add_unique(ht, hash, lttng_hash_match, + ret = lttng_ust_lfht_add_unique(ht, hash, lttng_hash_match, (const char *) target_pc, &node->node); if (ret != &node->node) { struct lfht_mp_node *ret_mp = @@ -547,16 +546,16 @@ int bytecode_validate_overflow(struct bytecode_runtime *bytecode, } static -unsigned long delete_all_nodes(struct cds_lfht *ht) +unsigned long delete_all_nodes(struct lttng_ust_lfht *ht) { - struct cds_lfht_iter iter; + struct lttng_ust_lfht_iter iter; struct lfht_mp_node *node; unsigned long nr_nodes = 0; - cds_lfht_for_each_entry(ht, &iter, node, node) { + lttng_ust_lfht_for_each_entry(ht, &iter, node, node) { int ret; - ret = cds_lfht_del(ht, cds_lfht_iter_get_node(&iter)); + ret = lttng_ust_lfht_del(ht, lttng_ust_lfht_iter_get_node(&iter)); assert(!ret); /* note: this hash table is never used concurrently */ free(node); @@ -1223,15 +1222,15 @@ end: */ static int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, - struct cds_lfht *merge_points, + struct lttng_ust_lfht *merge_points, struct vstack *stack, char *start_pc, char *pc) { int ret; unsigned long target_pc = pc - start_pc; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ust_lfht_iter iter; + struct lttng_ust_lfht_node *node; struct lfht_mp_node *mp_node; unsigned long hash; @@ -1243,9 +1242,9 @@ int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, /* Validate merge points */ hash = lttng_hash_mix((const char *) target_pc, sizeof(target_pc), lttng_hash_seed); - cds_lfht_lookup(merge_points, hash, lttng_hash_match, + lttng_ust_lfht_lookup(merge_points, hash, lttng_hash_match, (const char *) target_pc, &iter); - node = cds_lfht_iter_get_node(&iter); + node = lttng_ust_lfht_iter_get_node(&iter); if (node) { mp_node = caa_container_of(node, struct lfht_mp_node, node); @@ -1259,7 +1258,7 @@ int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, /* Once validated, we can remove the merge point */ dbg_printf("Bytecode: remove merge point at offset %lu\n", target_pc); - ret = cds_lfht_del(merge_points, node); + ret = lttng_ust_lfht_del(merge_points, node); assert(!ret); } return 0; @@ -1273,7 +1272,7 @@ int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, */ static int exec_insn(struct bytecode_runtime *bytecode, - struct cds_lfht *merge_points, + struct lttng_ust_lfht *merge_points, struct vstack *stack, char **_next_pc, char *pc) @@ -1960,7 +1959,7 @@ end: */ int lttng_bytecode_validate(struct bytecode_runtime *bytecode) { - struct cds_lfht *merge_points; + struct lttng_ust_lfht *merge_points; char *pc, *next_pc, *start_pc; int ret = -EINVAL; struct vstack stack; @@ -1977,7 +1976,7 @@ int lttng_bytecode_validate(struct bytecode_runtime *bytecode) * holding RCU read-side lock and free nodes without using * call_rcu. */ - merge_points = cds_lfht_new(DEFAULT_NR_MERGE_POINTS, + merge_points = lttng_ust_lfht_new(DEFAULT_NR_MERGE_POINTS, MIN_NR_BUCKETS, MAX_NR_BUCKETS, 0, NULL); if (!merge_points) { @@ -2017,7 +2016,7 @@ end: ret = -EINVAL; } } - if (cds_lfht_destroy(merge_points, NULL)) { + if (lttng_ust_lfht_destroy(merge_points)) { ERR("Error destroying hash table\n"); } return ret; diff --git a/liblttng-ust/lttng-context.c b/liblttng-ust/lttng-context.c index c457d3e1..fde3607d 100644 --- a/liblttng-ust/lttng-context.c +++ b/liblttng-ust/lttng-context.c @@ -24,12 +24,13 @@ #include #include #include -#include +#include #include #include #include #include #include +#include "tracepoint-internal.h" #include "context-internal.h" @@ -157,8 +158,8 @@ int lttng_context_add_rcu(struct lttng_ctx **ctx_p, } *nf = *f; lttng_context_update(new_ctx); - rcu_assign_pointer(*ctx_p, new_ctx); - synchronize_trace(); + lttng_ust_rcu_assign_pointer(*ctx_p, new_ctx); + lttng_ust_synchronize_trace(); if (old_ctx) { free(old_ctx->fields); free(old_ctx); @@ -391,8 +392,8 @@ int lttng_ust_context_set_provider_rcu(struct lttng_ctx **_ctx, new_fields[i].get_value = get_value; } new_ctx->fields = new_fields; - rcu_assign_pointer(*_ctx, new_ctx); - synchronize_trace(); + lttng_ust_rcu_assign_pointer(*_ctx, new_ctx); + lttng_ust_synchronize_trace(); free(ctx->fields); free(ctx); return 0; diff --git a/liblttng-ust/lttng-events.c b/liblttng-ust/lttng-events.c index b455c363..4fa675dd 100644 --- a/liblttng-ust/lttng-events.c +++ b/liblttng-ust/lttng-events.c @@ -34,9 +34,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -143,11 +143,6 @@ int lttng_loglevel_match(int loglevel, } } -void synchronize_trace(void) -{ - synchronize_rcu(); -} - struct lttng_session *lttng_session_create(void) { struct lttng_session *session; @@ -350,7 +345,7 @@ void lttng_session_destroy(struct lttng_session *session) cds_list_for_each_entry(event, &session->events_head, node) { _lttng_event_unregister(event); } - synchronize_trace(); /* Wait for in-flight events to complete */ + lttng_ust_synchronize_trace(); /* Wait for in-flight events to complete */ __tracepoint_probe_prune_release_queue(); cds_list_for_each_entry_safe(event_enabler, event_tmpenabler, &session->enablers_head, node) @@ -383,7 +378,7 @@ void lttng_event_notifier_group_destroy( &event_notifier_group->event_notifiers_head, node) _lttng_event_notifier_unregister(notifier); - synchronize_trace(); + lttng_ust_synchronize_trace(); cds_list_for_each_entry_safe(notifier_enabler, tmpnotifier_enabler, &event_notifier_group->enablers_head, node) @@ -1219,7 +1214,7 @@ void lttng_probe_provider_unregister_events( _lttng_event_notifier_unregister); /* Wait for grace period. */ - synchronize_trace(); + lttng_ust_synchronize_trace(); /* Prune the unregistration queue. */ __tracepoint_probe_prune_release_queue(); diff --git a/liblttng-ust/lttng-tracer-core.h b/liblttng-ust/lttng-tracer-core.h index bce6c239..76f62510 100644 --- a/liblttng-ust/lttng-tracer-core.h +++ b/liblttng-ust/lttng-tracer-core.h @@ -80,6 +80,10 @@ void lttng_ust_dummy_get_value(struct lttng_ctx_field *field, int lttng_context_is_app(const char *name); void lttng_ust_fixup_tls(void); +extern void (*lttng_ust_liburcu_bp_before_fork)(void); +extern void (*lttng_ust_liburcu_bp_after_fork_parent)(void); +extern void (*lttng_ust_liburcu_bp_after_fork_child)(void); + #ifdef LTTNG_UST_HAVE_PERF_EVENT void lttng_ust_fixup_perf_counter_tls(void); void lttng_perf_lock(void); diff --git a/liblttng-ust/lttng-ust-abi.c b/liblttng-ust/lttng-ust-abi.c index 0d2058a3..a024b616 100644 --- a/liblttng-ust/lttng-ust-abi.c +++ b/liblttng-ust/lttng-ust-abi.c @@ -57,6 +57,7 @@ #include "../libringbuffer/frontend_types.h" #include "../libringbuffer/shm.h" #include "../libcounter/counter.h" +#include "tracepoint-internal.h" #include "lttng-tracer.h" #include "string-utils.h" #include "ust-events-internal.h" @@ -438,7 +439,7 @@ long lttng_cmd(int objd, unsigned int cmd, unsigned long arg, case LTTNG_UST_TRACEPOINT_FIELD_LIST: return lttng_abi_tracepoint_field_list(owner); case LTTNG_UST_WAIT_QUIESCENT: - synchronize_trace(); + lttng_ust_synchronize_trace(); return 0; case LTTNG_UST_EVENT_NOTIFIER_GROUP_CREATE: return lttng_abi_event_notifier_send_fd(owner, diff --git a/liblttng-ust/lttng-ust-comm.c b/liblttng-ust/lttng-ust-comm.c index 4b13571d..bb22a8a9 100644 --- a/liblttng-ust/lttng-ust-comm.c +++ b/liblttng-ust/lttng-ust-comm.c @@ -39,8 +39,9 @@ #include #include #include -#include +#include "futex.h" #include +#include #include #include @@ -434,8 +435,7 @@ void lttng_fixup_ust_mutex_nest_tls(void) static void lttng_fixup_urcu_bp_tls(void) { - rcu_read_lock(); - rcu_read_unlock(); + (void) lttng_ust_urcu_read_ongoing(); } void lttng_ust_fixup_tls(void) @@ -1624,7 +1624,7 @@ void wait_for_sessiond(struct sock_info *sock_info) if (uatomic_read((int32_t *) sock_info->wait_shm_mmap)) goto end_wait; - while (futex_async((int32_t *) sock_info->wait_shm_mmap, + while (lttng_ust_futex_async((int32_t *) sock_info->wait_shm_mmap, FUTEX_WAIT, 0, NULL, NULL, 0)) { switch (errno) { case EWOULDBLOCK: @@ -2315,7 +2315,9 @@ void ust_before_fork(sigset_t *save_sigset) pthread_mutex_lock(&ust_fork_mutex); ust_lock_nocheck(); - urcu_bp_before_fork(); + lttng_ust_urcu_before_fork(); + if (lttng_ust_liburcu_bp_before_fork) + lttng_ust_liburcu_bp_before_fork(); lttng_ust_lock_fd_tracker(); lttng_perf_lock(); } @@ -2343,7 +2345,9 @@ void ust_after_fork_parent(sigset_t *restore_sigset) if (URCU_TLS(lttng_ust_nest_count)) return; DBG("process %d", getpid()); - urcu_bp_after_fork_parent(); + lttng_ust_urcu_after_fork_parent(); + if (lttng_ust_liburcu_bp_after_fork_parent) + lttng_ust_liburcu_bp_after_fork_parent(); /* Release mutexes and reenable signals */ ust_after_fork_common(restore_sigset); } @@ -2369,7 +2373,9 @@ void ust_after_fork_child(sigset_t *restore_sigset) ust_context_vgids_reset(); DBG("process %d", getpid()); /* Release urcu mutexes */ - urcu_bp_after_fork_child(); + lttng_ust_urcu_after_fork_child(); + if (lttng_ust_liburcu_bp_after_fork_child) + lttng_ust_liburcu_bp_after_fork_child(); lttng_ust_cleanup(0); /* Release mutexes and reenable signals */ ust_after_fork_common(restore_sigset); diff --git a/liblttng-ust/lttng-ust-urcu-pointer.c b/liblttng-ust/lttng-ust-urcu-pointer.c new file mode 100644 index 00000000..94b6a960 --- /dev/null +++ b/liblttng-ust/lttng-ust-urcu-pointer.c @@ -0,0 +1,54 @@ +/* + * lttng-ust-urcu-pointer.c + * + * library wrappers to be used by non-LGPL compatible source code. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include + +#include +/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */ +#include + +void *lttng_ust_rcu_dereference_sym(void *p) +{ + return _lttng_ust_rcu_dereference(p); +} + +void *lttng_ust_rcu_set_pointer_sym(void **p, void *v) +{ + cmm_wmb(); + uatomic_set(p, v); + return v; +} + +void *lttng_ust_rcu_xchg_pointer_sym(void **p, void *v) +{ + cmm_wmb(); + return uatomic_xchg(p, v); +} + +void *lttng_ust_rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new) +{ + cmm_wmb(); + return uatomic_cmpxchg(p, old, _new); +} diff --git a/liblttng-ust/lttng-ust-urcu.c b/liblttng-ust/lttng-ust-urcu.c new file mode 100644 index 00000000..88300031 --- /dev/null +++ b/liblttng-ust/lttng-ust-urcu.c @@ -0,0 +1,733 @@ +/* + * lttng-ust-urcu.c + * + * Userspace RCU library for LTTng-UST, derived from liburcu "bulletproof" version. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */ +#undef _LGPL_SOURCE +#include +#define _LGPL_SOURCE + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#ifdef __linux__ +static +void *mremap_wrapper(void *old_address, size_t old_size, + size_t new_size, int flags) +{ + return mremap(old_address, old_size, new_size, flags); +} +#else + +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 + +/* + * mremap wrapper for non-Linux systems not allowing MAYMOVE. + * This is not generic. +*/ +static +void *mremap_wrapper(void *old_address, size_t old_size, + size_t new_size, int flags) +{ + assert(!(flags & MREMAP_MAYMOVE)); + + return MAP_FAILED; +} +#endif + +/* Sleep delay in ms */ +#define RCU_SLEEP_DELAY_MS 10 +#define INIT_NR_THREADS 8 +#define ARENA_INIT_ALLOC \ + sizeof(struct registry_chunk) \ + + INIT_NR_THREADS * sizeof(struct lttng_ust_urcu_reader) + +/* + * Active attempts to check for reader Q.S. before calling sleep(). + */ +#define RCU_QS_ACTIVE_ATTEMPTS 100 + +static +int lttng_ust_urcu_refcount; + +/* If the headers do not support membarrier system call, fall back smp_mb. */ +#ifdef __NR_membarrier +# define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__) +#else +# define membarrier(...) -ENOSYS +#endif + +enum membarrier_cmd { + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), + /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ + /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), +}; + +static +void __attribute__((constructor)) _lttng_ust_urcu_init(void); +static +void __attribute__((destructor)) lttng_ust_urcu_exit(void); + +#ifndef CONFIG_RCU_FORCE_SYS_MEMBARRIER +int lttng_ust_urcu_has_sys_membarrier; +#endif + +/* + * rcu_gp_lock ensures mutual exclusion between threads calling + * synchronize_rcu(). + */ +static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER; +/* + * rcu_registry_lock ensures mutual exclusion between threads + * registering and unregistering themselves to/from the registry, and + * with threads reading that registry from synchronize_rcu(). However, + * this lock is not held all the way through the completion of awaiting + * for the grace period. It is sporadically released between iterations + * on the registry. + * rcu_registry_lock may nest inside rcu_gp_lock. + */ +static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER; + +static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; +static int initialized; + +static pthread_key_t lttng_ust_urcu_key; + +struct lttng_ust_urcu_gp lttng_ust_urcu_gp = { .ctr = LTTNG_UST_URCU_GP_COUNT }; + +/* + * Pointer to registry elements. Written to only by each individual reader. Read + * by both the reader and the writers. + */ +DEFINE_URCU_TLS(struct lttng_ust_urcu_reader *, lttng_ust_urcu_reader); + +static CDS_LIST_HEAD(registry); + +struct registry_chunk { + size_t data_len; /* data length */ + size_t used; /* amount of data used */ + struct cds_list_head node; /* chunk_list node */ + char data[]; +}; + +struct registry_arena { + struct cds_list_head chunk_list; +}; + +static struct registry_arena registry_arena = { + .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list), +}; + +/* Saved fork signal mask, protected by rcu_gp_lock */ +static sigset_t saved_fork_signal_mask; + +static void mutex_lock(pthread_mutex_t *mutex) +{ + int ret; + +#ifndef DISTRUST_SIGNALS_EXTREME + ret = pthread_mutex_lock(mutex); + if (ret) + abort(); +#else /* #ifndef DISTRUST_SIGNALS_EXTREME */ + while ((ret = pthread_mutex_trylock(mutex)) != 0) { + if (ret != EBUSY && ret != EINTR) + abort(); + poll(NULL,0,10); + } +#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */ +} + +static void mutex_unlock(pthread_mutex_t *mutex) +{ + int ret; + + ret = pthread_mutex_unlock(mutex); + if (ret) + abort(); +} + +static void smp_mb_master(void) +{ + if (caa_likely(lttng_ust_urcu_has_sys_membarrier)) { + if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0)) + abort(); + } else { + cmm_smp_mb(); + } +} + +/* + * Always called with rcu_registry lock held. Releases this lock between + * iterations and grabs it again. Holds the lock when it returns. + */ +static void wait_for_readers(struct cds_list_head *input_readers, + struct cds_list_head *cur_snap_readers, + struct cds_list_head *qsreaders) +{ + unsigned int wait_loops = 0; + struct lttng_ust_urcu_reader *index, *tmp; + + /* + * Wait for each thread URCU_TLS(lttng_ust_urcu_reader).ctr to either + * indicate quiescence (not nested), or observe the current + * rcu_gp.ctr value. + */ + for (;;) { + if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS) + wait_loops++; + + cds_list_for_each_entry_safe(index, tmp, input_readers, node) { + switch (lttng_ust_urcu_reader_state(&index->ctr)) { + case LTTNG_UST_URCU_READER_ACTIVE_CURRENT: + if (cur_snap_readers) { + cds_list_move(&index->node, + cur_snap_readers); + break; + } + /* Fall-through */ + case LTTNG_UST_URCU_READER_INACTIVE: + cds_list_move(&index->node, qsreaders); + break; + case LTTNG_UST_URCU_READER_ACTIVE_OLD: + /* + * Old snapshot. Leaving node in + * input_readers will make us busy-loop + * until the snapshot becomes current or + * the reader becomes inactive. + */ + break; + } + } + + if (cds_list_empty(input_readers)) { + break; + } else { + /* Temporarily unlock the registry lock. */ + mutex_unlock(&rcu_registry_lock); + if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) + (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS); + else + caa_cpu_relax(); + /* Re-lock the registry lock before the next loop. */ + mutex_lock(&rcu_registry_lock); + } + } +} + +void lttng_ust_urcu_synchronize_rcu(void) +{ + CDS_LIST_HEAD(cur_snap_readers); + CDS_LIST_HEAD(qsreaders); + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + assert(!ret); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + assert(!ret); + + mutex_lock(&rcu_gp_lock); + + mutex_lock(&rcu_registry_lock); + + if (cds_list_empty(®istry)) + goto out; + + /* All threads should read qparity before accessing data structure + * where new ptr points to. */ + /* Write new ptr before changing the qparity */ + smp_mb_master(); + + /* + * Wait for readers to observe original parity or be quiescent. + * wait_for_readers() can release and grab again rcu_registry_lock + * interally. + */ + wait_for_readers(®istry, &cur_snap_readers, &qsreaders); + + /* + * Adding a cmm_smp_mb() which is _not_ formally required, but makes the + * model easier to understand. It does not have a big performance impact + * anyway, given this is the write-side. + */ + cmm_smp_mb(); + + /* Switch parity: 0 -> 1, 1 -> 0 */ + CMM_STORE_SHARED(lttng_ust_urcu_gp.ctr, lttng_ust_urcu_gp.ctr ^ LTTNG_UST_URCU_GP_CTR_PHASE); + + /* + * Must commit qparity update to memory before waiting for other parity + * quiescent state. Failure to do so could result in the writer waiting + * forever while new readers are always accessing data (no progress). + * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED. + */ + + /* + * Adding a cmm_smp_mb() which is _not_ formally required, but makes the + * model easier to understand. It does not have a big performance impact + * anyway, given this is the write-side. + */ + cmm_smp_mb(); + + /* + * Wait for readers to observe new parity or be quiescent. + * wait_for_readers() can release and grab again rcu_registry_lock + * interally. + */ + wait_for_readers(&cur_snap_readers, NULL, &qsreaders); + + /* + * Put quiescent reader list back into registry. + */ + cds_list_splice(&qsreaders, ®istry); + + /* + * Finish waiting for reader threads before letting the old ptr being + * freed. + */ + smp_mb_master(); +out: + mutex_unlock(&rcu_registry_lock); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + assert(!ret); +} + +/* + * library wrappers to be used by non-LGPL compatible source code. + */ + +void lttng_ust_urcu_read_lock(void) +{ + _lttng_ust_urcu_read_lock(); +} + +void lttng_ust_urcu_read_unlock(void) +{ + _lttng_ust_urcu_read_unlock(); +} + +int lttng_ust_urcu_read_ongoing(void) +{ + return _lttng_ust_urcu_read_ongoing(); +} + +/* + * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk. + * Else, try expanding the last chunk. If this fails, allocate a new + * chunk twice as big as the last chunk. + * Memory used by chunks _never_ moves. A chunk could theoretically be + * freed when all "used" slots are released, but we don't do it at this + * point. + */ +static +void expand_arena(struct registry_arena *arena) +{ + struct registry_chunk *new_chunk, *last_chunk; + size_t old_chunk_len, new_chunk_len; + + /* No chunk. */ + if (cds_list_empty(&arena->chunk_list)) { + assert(ARENA_INIT_ALLOC >= + sizeof(struct registry_chunk) + + sizeof(struct lttng_ust_urcu_reader)); + new_chunk_len = ARENA_INIT_ALLOC; + new_chunk = (struct registry_chunk *) mmap(NULL, + new_chunk_len, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (new_chunk == MAP_FAILED) + abort(); + memset(new_chunk, 0, new_chunk_len); + new_chunk->data_len = + new_chunk_len - sizeof(struct registry_chunk); + cds_list_add_tail(&new_chunk->node, &arena->chunk_list); + return; /* We're done. */ + } + + /* Try expanding last chunk. */ + last_chunk = cds_list_entry(arena->chunk_list.prev, + struct registry_chunk, node); + old_chunk_len = + last_chunk->data_len + sizeof(struct registry_chunk); + new_chunk_len = old_chunk_len << 1; + + /* Don't allow memory mapping to move, just expand. */ + new_chunk = mremap_wrapper(last_chunk, old_chunk_len, + new_chunk_len, 0); + if (new_chunk != MAP_FAILED) { + /* Should not have moved. */ + assert(new_chunk == last_chunk); + memset((char *) last_chunk + old_chunk_len, 0, + new_chunk_len - old_chunk_len); + last_chunk->data_len = + new_chunk_len - sizeof(struct registry_chunk); + return; /* We're done. */ + } + + /* Remap did not succeed, we need to add a new chunk. */ + new_chunk = (struct registry_chunk *) mmap(NULL, + new_chunk_len, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (new_chunk == MAP_FAILED) + abort(); + memset(new_chunk, 0, new_chunk_len); + new_chunk->data_len = + new_chunk_len - sizeof(struct registry_chunk); + cds_list_add_tail(&new_chunk->node, &arena->chunk_list); +} + +static +struct lttng_ust_urcu_reader *arena_alloc(struct registry_arena *arena) +{ + struct registry_chunk *chunk; + struct lttng_ust_urcu_reader *rcu_reader_reg; + int expand_done = 0; /* Only allow to expand once per alloc */ + size_t len = sizeof(struct lttng_ust_urcu_reader); + +retry: + cds_list_for_each_entry(chunk, &arena->chunk_list, node) { + if (chunk->data_len - chunk->used < len) + continue; + /* Find spot */ + for (rcu_reader_reg = (struct lttng_ust_urcu_reader *) &chunk->data[0]; + rcu_reader_reg < (struct lttng_ust_urcu_reader *) &chunk->data[chunk->data_len]; + rcu_reader_reg++) { + if (!rcu_reader_reg->alloc) { + rcu_reader_reg->alloc = 1; + chunk->used += len; + return rcu_reader_reg; + } + } + } + + if (!expand_done) { + expand_arena(arena); + expand_done = 1; + goto retry; + } + + return NULL; +} + +/* Called with signals off and mutex locked */ +static +void add_thread(void) +{ + struct lttng_ust_urcu_reader *rcu_reader_reg; + int ret; + + rcu_reader_reg = arena_alloc(®istry_arena); + if (!rcu_reader_reg) + abort(); + ret = pthread_setspecific(lttng_ust_urcu_key, rcu_reader_reg); + if (ret) + abort(); + + /* Add to registry */ + rcu_reader_reg->tid = pthread_self(); + assert(rcu_reader_reg->ctr == 0); + cds_list_add(&rcu_reader_reg->node, ®istry); + /* + * Reader threads are pointing to the reader registry. This is + * why its memory should never be relocated. + */ + URCU_TLS(lttng_ust_urcu_reader) = rcu_reader_reg; +} + +/* Called with mutex locked */ +static +void cleanup_thread(struct registry_chunk *chunk, + struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + rcu_reader_reg->ctr = 0; + cds_list_del(&rcu_reader_reg->node); + rcu_reader_reg->tid = 0; + rcu_reader_reg->alloc = 0; + chunk->used -= sizeof(struct lttng_ust_urcu_reader); +} + +static +struct registry_chunk *find_chunk(struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + struct registry_chunk *chunk; + + cds_list_for_each_entry(chunk, ®istry_arena.chunk_list, node) { + if (rcu_reader_reg < (struct lttng_ust_urcu_reader *) &chunk->data[0]) + continue; + if (rcu_reader_reg >= (struct lttng_ust_urcu_reader *) &chunk->data[chunk->data_len]) + continue; + return chunk; + } + return NULL; +} + +/* Called with signals off and mutex locked */ +static +void remove_thread(struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg); + URCU_TLS(lttng_ust_urcu_reader) = NULL; +} + +/* Disable signals, take mutex, add to registry */ +void lttng_ust_urcu_register(void) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + if (ret) + abort(); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + if (ret) + abort(); + + /* + * Check if a signal concurrently registered our thread since + * the check in rcu_read_lock(). + */ + if (URCU_TLS(lttng_ust_urcu_reader)) + goto end; + + /* + * Take care of early registration before lttng_ust_urcu constructor. + */ + _lttng_ust_urcu_init(); + + mutex_lock(&rcu_registry_lock); + add_thread(); + mutex_unlock(&rcu_registry_lock); +end: + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + if (ret) + abort(); +} + +void lttng_ust_urcu_register_thread(void) +{ + if (caa_unlikely(!URCU_TLS(lttng_ust_urcu_reader))) + lttng_ust_urcu_register(); /* If not yet registered. */ +} + +/* Disable signals, take mutex, remove from registry */ +static +void lttng_ust_urcu_unregister(struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + if (ret) + abort(); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + if (ret) + abort(); + + mutex_lock(&rcu_registry_lock); + remove_thread(rcu_reader_reg); + mutex_unlock(&rcu_registry_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + if (ret) + abort(); + lttng_ust_urcu_exit(); +} + +/* + * Remove thread from the registry when it exits, and flag it as + * destroyed so garbage collection can take care of it. + */ +static +void lttng_ust_urcu_thread_exit_notifier(void *rcu_key) +{ + lttng_ust_urcu_unregister(rcu_key); +} + +#ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER +static +void lttng_ust_urcu_sys_membarrier_status(bool available) +{ + if (!available) + abort(); +} +#else +static +void lttng_ust_urcu_sys_membarrier_status(bool available) +{ + if (!available) + return; + lttng_ust_urcu_has_sys_membarrier = 1; +} +#endif + +static +void lttng_ust_urcu_sys_membarrier_init(void) +{ + bool available = false; + int mask; + + mask = membarrier(MEMBARRIER_CMD_QUERY, 0); + if (mask >= 0) { + if (mask & MEMBARRIER_CMD_PRIVATE_EXPEDITED) { + if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0)) + abort(); + available = true; + } + } + lttng_ust_urcu_sys_membarrier_status(available); +} + +static +void _lttng_ust_urcu_init(void) +{ + mutex_lock(&init_lock); + if (!lttng_ust_urcu_refcount++) { + int ret; + + ret = pthread_key_create(<tng_ust_urcu_key, + lttng_ust_urcu_thread_exit_notifier); + if (ret) + abort(); + lttng_ust_urcu_sys_membarrier_init(); + initialized = 1; + } + mutex_unlock(&init_lock); +} + +static +void lttng_ust_urcu_exit(void) +{ + mutex_lock(&init_lock); + if (!--lttng_ust_urcu_refcount) { + struct registry_chunk *chunk, *tmp; + int ret; + + cds_list_for_each_entry_safe(chunk, tmp, + ®istry_arena.chunk_list, node) { + munmap((void *) chunk, chunk->data_len + + sizeof(struct registry_chunk)); + } + CDS_INIT_LIST_HEAD(®istry_arena.chunk_list); + ret = pthread_key_delete(lttng_ust_urcu_key); + if (ret) + abort(); + } + mutex_unlock(&init_lock); +} + +/* + * Holding the rcu_gp_lock and rcu_registry_lock across fork will make + * sure we fork() don't race with a concurrent thread executing with + * any of those locks held. This ensures that the registry and data + * protected by rcu_gp_lock are in a coherent state in the child. + */ +void lttng_ust_urcu_before_fork(void) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + assert(!ret); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + assert(!ret); + mutex_lock(&rcu_gp_lock); + mutex_lock(&rcu_registry_lock); + saved_fork_signal_mask = oldmask; +} + +void lttng_ust_urcu_after_fork_parent(void) +{ + sigset_t oldmask; + int ret; + + oldmask = saved_fork_signal_mask; + mutex_unlock(&rcu_registry_lock); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + assert(!ret); +} + +/* + * Prune all entries from registry except our own thread. Fits the Linux + * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held. + */ +static +void lttng_ust_urcu_prune_registry(void) +{ + struct registry_chunk *chunk; + struct lttng_ust_urcu_reader *rcu_reader_reg; + + cds_list_for_each_entry(chunk, ®istry_arena.chunk_list, node) { + for (rcu_reader_reg = (struct lttng_ust_urcu_reader *) &chunk->data[0]; + rcu_reader_reg < (struct lttng_ust_urcu_reader *) &chunk->data[chunk->data_len]; + rcu_reader_reg++) { + if (!rcu_reader_reg->alloc) + continue; + if (rcu_reader_reg->tid == pthread_self()) + continue; + cleanup_thread(chunk, rcu_reader_reg); + } + } +} + +void lttng_ust_urcu_after_fork_child(void) +{ + sigset_t oldmask; + int ret; + + lttng_ust_urcu_prune_registry(); + oldmask = saved_fork_signal_mask; + mutex_unlock(&rcu_registry_lock); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + assert(!ret); +} diff --git a/liblttng-ust/rculfhash-internal.h b/liblttng-ust/rculfhash-internal.h new file mode 100644 index 00000000..86eb501f --- /dev/null +++ b/liblttng-ust/rculfhash-internal.h @@ -0,0 +1,179 @@ +#ifndef _LTTNG_UST_RCULFHASH_INTERNAL_H +#define _LTTNG_UST_RCULFHASH_INTERNAL_H + +/* + * liblttng-ust/rculfhash-internal.h + * + * Internal header for Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "rculfhash.h" +#include +#include +#include + +#ifdef DEBUG +#define dbg_printf(fmt, args...) printf("[debug lttng-ust rculfhash] " fmt, ## args) +#else +#define dbg_printf(fmt, args...) \ +do { \ + /* do nothing but check printf format */ \ + if (0) \ + printf("[debug lttng-ust rculfhash] " fmt, ## args); \ +} while (0) +#endif + +#if (CAA_BITS_PER_LONG == 32) +#define MAX_TABLE_ORDER 32 +#else +#define MAX_TABLE_ORDER 64 +#endif + +#define MAX_CHUNK_TABLE (1UL << 10) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +/* + * lttng_ust_lfht: Top-level data structure representing a lock-free hash + * table. Defined in the implementation file to make it be an opaque + * cookie to users. + * + * The fields used in fast-paths are placed near the end of the + * structure, because we need to have a variable-sized union to contain + * the mm plugin fields, which are used in the fast path. + */ +struct lttng_ust_lfht { + /* Initial configuration items */ + unsigned long max_nr_buckets; + const struct lttng_ust_lfht_mm_type *mm; /* memory management plugin */ + const struct rcu_flavor_struct *flavor; /* RCU flavor */ + + /* + * We need to put the work threads offline (QSBR) when taking this + * mutex, because we use synchronize_rcu within this mutex critical + * section, which waits on read-side critical sections, and could + * therefore cause grace-period deadlock if we hold off RCU G.P. + * completion. + */ + pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */ + unsigned int in_progress_destroy; + unsigned long resize_target; + int resize_initiated; + + /* + * Variables needed for add and remove fast-paths. + */ + int flags; + unsigned long min_alloc_buckets_order; + unsigned long min_nr_alloc_buckets; + + /* + * Variables needed for the lookup, add and remove fast-paths. + */ + unsigned long size; /* always a power of 2, shared (RCU) */ + /* + * bucket_at pointer is kept here to skip the extra level of + * dereference needed to get to "mm" (this is a fast-path). + */ + struct lttng_ust_lfht_node *(*bucket_at)(struct lttng_ust_lfht *ht, + unsigned long index); + /* + * Dynamic length "tbl_chunk" needs to be at the end of + * lttng_ust_lfht. + */ + union { + /* + * Contains the per order-index-level bucket node table. + * The size of each bucket node table is half the number + * of hashes contained in this order (except for order 0). + * The minimum allocation buckets size parameter allows + * combining the bucket node arrays of the lowermost + * levels to improve cache locality for small index orders. + */ + struct lttng_ust_lfht_node *tbl_order[MAX_TABLE_ORDER]; + + /* + * Contains the bucket node chunks. The size of each + * bucket node chunk is ->min_alloc_size (we avoid to + * allocate chunks with different size). Chunks improve + * cache locality for small index orders, and are more + * friendly with environments where allocation of large + * contiguous memory areas is challenging due to memory + * fragmentation concerns or inability to use virtual + * memory addressing. + */ + struct lttng_ust_lfht_node *tbl_chunk[0]; + + /* + * Memory mapping with room for all possible buckets. + * Their memory is allocated when needed. + */ + struct lttng_ust_lfht_node *tbl_mmap; + }; + /* + * End of variables needed for the lookup, add and remove + * fast-paths. + */ +}; + +extern unsigned int lttng_ust_lfht_fls_ulong(unsigned long x); +extern int lttng_ust_lfht_get_count_order_ulong(unsigned long x); + +#ifdef POISON_FREE +#define poison_free(ptr) \ + do { \ + if (ptr) { \ + memset(ptr, 0x42, sizeof(*(ptr))); \ + free(ptr); \ + } \ + } while (0) +#else +#define poison_free(ptr) free(ptr) +#endif + +static inline +struct lttng_ust_lfht *__default_alloc_lttng_ust_lfht( + const struct lttng_ust_lfht_mm_type *mm, + unsigned long lttng_ust_lfht_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + struct lttng_ust_lfht *ht; + + ht = calloc(1, lttng_ust_lfht_size); + assert(ht); + + ht->mm = mm; + ht->bucket_at = mm->bucket_at; + ht->min_nr_alloc_buckets = min_nr_alloc_buckets; + ht->min_alloc_buckets_order = + lttng_ust_lfht_get_count_order_ulong(min_nr_alloc_buckets); + ht->max_nr_buckets = max_nr_buckets; + + return ht; +} + +#endif /* _LTTNG_UST_RCULFHASH_INTERNAL_H */ diff --git a/liblttng-ust/rculfhash-mm-chunk.c b/liblttng-ust/rculfhash-mm-chunk.c new file mode 100644 index 00000000..2eef6ebc --- /dev/null +++ b/liblttng-ust/rculfhash-mm-chunk.c @@ -0,0 +1,97 @@ +/* + * rculfhash-mm-chunk.c + * + * Chunk based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "rculfhash-internal.h" + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + ht->tbl_chunk[0] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_chunk[0]); + } else if (order > ht->min_alloc_buckets_order) { + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order); + + for (i = len; i < 2 * len; i++) { + ht->tbl_chunk[i] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_chunk[i]); + } + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) + poison_free(ht->tbl_chunk[0]); + else if (order > ht->min_alloc_buckets_order) { + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order); + + for (i = len; i < 2 * len; i++) + poison_free(ht->tbl_chunk[i]); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + unsigned long chunk, offset; + + chunk = index >> ht->min_alloc_buckets_order; + offset = index & (ht->min_nr_alloc_buckets - 1); + return &ht->tbl_chunk[chunk][offset]; +} + +static +struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + unsigned long nr_chunks, lttng_ust_lfht_size; + + min_nr_alloc_buckets = max(min_nr_alloc_buckets, + max_nr_buckets / MAX_CHUNK_TABLE); + nr_chunks = max_nr_buckets / min_nr_alloc_buckets; + lttng_ust_lfht_size = offsetof(struct lttng_ust_lfht, tbl_chunk) + + sizeof(struct lttng_ust_lfht_node *) * nr_chunks; + lttng_ust_lfht_size = max(lttng_ust_lfht_size, sizeof(struct lttng_ust_lfht)); + + return __default_alloc_lttng_ust_lfht( + <tng_ust_lfht_mm_chunk, lttng_ust_lfht_size, + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_chunk = { + .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, + .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, + .free_bucket_table = lttng_ust_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ust/rculfhash-mm-mmap.c b/liblttng-ust/rculfhash-mm-mmap.c new file mode 100644 index 00000000..5b02c406 --- /dev/null +++ b/liblttng-ust/rculfhash-mm-mmap.c @@ -0,0 +1,214 @@ +/* + * rculfhash-mm-mmap.c + * + * mmap/reservation based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include "rculfhash-internal.h" + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* + * The allocation scheme used by the mmap based RCU hash table is to make a + * large unaccessible mapping to reserve memory without allocating it. + * Then smaller chunks are allocated by overlapping read/write mappings which + * do allocate memory. Deallocation is done by an overlapping unaccessible + * mapping. + * + * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the + * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't + * support overlapping mappings. + * + * An alternative to the overlapping mappings is to use mprotect to change the + * protection on chunks of the large mapping, read/write to allocate and none + * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a + * call to madvise is also required to deallocate and it just doesn't work on + * macOS. + * + * For this reason, we keep to original scheme on all platforms except Cygwin. + */ + + +/* Reserve inaccessible memory space without allocating it */ +static +void *memory_map(size_t length) +{ + void *ret; + + ret = mmap(NULL, length, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) { + perror("mmap"); + abort(); + } + return ret; +} + +static +void memory_unmap(void *ptr, size_t length) +{ + if (munmap(ptr, length)) { + perror("munmap"); + abort(); + } +} + +#ifdef __CYGWIN__ +/* Set protection to read/write to allocate a memory chunk */ +static +void memory_populate(void *ptr, size_t length) +{ + if (mprotect(ptr, length, PROT_READ | PROT_WRITE)) { + perror("mprotect"); + abort(); + } +} + +/* Set protection to none to deallocate a memory chunk */ +static +void memory_discard(void *ptr, size_t length) +{ + if (mprotect(ptr, length, PROT_NONE)) { + perror("mprotect"); + abort(); + } +} + +#else /* __CYGWIN__ */ + +static +void memory_populate(void *ptr, size_t length) +{ + if (mmap(ptr, length, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0) != ptr) { + perror("mmap"); + abort(); + } +} + +/* + * Discard garbage memory and avoid system save it when try to swap it out. + * Make it still reserved, inaccessible. + */ +static +void memory_discard(void *ptr, size_t length) +{ + if (mmap(ptr, length, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0) != ptr) { + perror("mmap"); + abort(); + } +} +#endif /* __CYGWIN__ */ + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { + /* small table */ + ht->tbl_mmap = calloc(ht->max_nr_buckets, + sizeof(*ht->tbl_mmap)); + assert(ht->tbl_mmap); + return; + } + /* large table */ + ht->tbl_mmap = memory_map(ht->max_nr_buckets + * sizeof(*ht->tbl_mmap)); + memory_populate(ht->tbl_mmap, + ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); + } else if (order > ht->min_alloc_buckets_order) { + /* large table */ + unsigned long len = 1UL << (order - 1); + + assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); + memory_populate(ht->tbl_mmap + len, + len * sizeof(*ht->tbl_mmap)); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { + /* small table */ + poison_free(ht->tbl_mmap); + return; + } + /* large table */ + memory_unmap(ht->tbl_mmap, + ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); + } else if (order > ht->min_alloc_buckets_order) { + /* large table */ + unsigned long len = 1UL << (order - 1); + + assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); + memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + return &ht->tbl_mmap[index]; +} + +static +struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + unsigned long page_bucket_size; + + page_bucket_size = getpagesize() / sizeof(struct lttng_ust_lfht_node); + if (max_nr_buckets <= page_bucket_size) { + /* small table */ + min_nr_alloc_buckets = max_nr_buckets; + } else { + /* large table */ + min_nr_alloc_buckets = max(min_nr_alloc_buckets, + page_bucket_size); + } + + return __default_alloc_lttng_ust_lfht( + <tng_ust_lfht_mm_mmap, sizeof(struct lttng_ust_lfht), + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_mmap = { + .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, + .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, + .free_bucket_table = lttng_ust_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ust/rculfhash-mm-order.c b/liblttng-ust/rculfhash-mm-order.c new file mode 100644 index 00000000..78f2c7ae --- /dev/null +++ b/liblttng-ust/rculfhash-mm-order.c @@ -0,0 +1,90 @@ +/* + * rculfhash-mm-order.c + * + * Order based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + ht->tbl_order[0] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_order[0]); + } else if (order > ht->min_alloc_buckets_order) { + ht->tbl_order[order] = calloc(1UL << (order -1), + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_order[order]); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) + poison_free(ht->tbl_order[0]); + else if (order > ht->min_alloc_buckets_order) + poison_free(ht->tbl_order[order]); + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + unsigned long order; + + if (index < ht->min_nr_alloc_buckets) { + dbg_printf("bucket index %lu order 0 aridx 0\n", index); + return &ht->tbl_order[0][index]; + } + /* + * equivalent to lttng_ust_lfht_get_count_order_ulong(index + 1), but + * optimizes away the non-existing 0 special-case for + * lttng_ust_lfht_get_count_order_ulong. + */ + order = lttng_ust_lfht_fls_ulong(index); + dbg_printf("bucket index %lu order %lu aridx %lu\n", + index, order, index & ((1UL << (order - 1)) - 1)); + return &ht->tbl_order[order][index & ((1UL << (order - 1)) - 1)]; +} + +static +struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + return __default_alloc_lttng_ust_lfht( + <tng_ust_lfht_mm_order, sizeof(struct lttng_ust_lfht), + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_order = { + .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, + .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, + .free_bucket_table = lttng_ust_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ust/rculfhash.c b/liblttng-ust/rculfhash.c new file mode 100644 index 00000000..8aa8bc3e --- /dev/null +++ b/liblttng-ust/rculfhash.c @@ -0,0 +1,1314 @@ +/* + * rculfhash.c + * + * Userspace RCU library - Lock-Free Resizable RCU Hash Table + * + * Copyright 2010-2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Based on the following articles: + * - Ori Shalev and Nir Shavit. Split-ordered lists: Lock-free + * extensible hash tables. J. ACM 53, 3 (May 2006), 379-405. + * - Michael, M. M. High performance dynamic lock-free hash tables + * and list-based sets. In Proceedings of the fourteenth annual ACM + * symposium on Parallel algorithms and architectures, ACM Press, + * (2002), 73-82. + * + * Some specificities of this Lock-Free Resizable RCU Hash Table + * implementation: + * + * - RCU read-side critical section allows readers to perform hash + * table lookups, as well as traversals, and use the returned objects + * safely by allowing memory reclaim to take place only after a grace + * period. + * - Add and remove operations are lock-free, and do not need to + * allocate memory. They need to be executed within RCU read-side + * critical section to ensure the objects they read are valid and to + * deal with the cmpxchg ABA problem. + * - add and add_unique operations are supported. add_unique checks if + * the node key already exists in the hash table. It ensures not to + * populate a duplicate key if the node key already exists in the hash + * table. + * - The resize operation executes concurrently with + * add/add_unique/add_replace/remove/lookup/traversal. + * - Hash table nodes are contained within a split-ordered list. This + * list is ordered by incrementing reversed-bits-hash value. + * - An index of bucket nodes is kept. These bucket nodes are the hash + * table "buckets". These buckets are internal nodes that allow to + * perform a fast hash lookup, similarly to a skip list. These + * buckets are chained together in the split-ordered list, which + * allows recursive expansion by inserting new buckets between the + * existing buckets. The split-ordered list allows adding new buckets + * between existing buckets as the table needs to grow. + * - The resize operation for small tables only allows expanding the + * hash table. It is triggered automatically by detecting long chains + * in the add operation. + * - The resize operation for larger tables (and available through an + * API) allows both expanding and shrinking the hash table. + * - Split-counters are used to keep track of the number of + * nodes within the hash table for automatic resize triggering. + * - Resize operation initiated by long chain detection is executed by a + * worker thread, which keeps lock-freedom of add and remove. + * - Resize operations are protected by a mutex. + * - The removal operation is split in two parts: first, a "removed" + * flag is set in the next pointer within the node to remove. Then, + * a "garbage collection" is performed in the bucket containing the + * removed node (from the start of the bucket up to the removed node). + * All encountered nodes with "removed" flag set in their next + * pointers are removed from the linked-list. If the cmpxchg used for + * removal fails (due to concurrent garbage-collection or concurrent + * add), we retry from the beginning of the bucket. This ensures that + * the node with "removed" flag set is removed from the hash table + * (not visible to lookups anymore) before the RCU read-side critical + * section held across removal ends. Furthermore, this ensures that + * the node with "removed" flag set is removed from the linked-list + * before its memory is reclaimed. After setting the "removal" flag, + * only the thread which removal is the first to set the "removal + * owner" flag (with an xchg) into a node's next pointer is considered + * to have succeeded its removal (and thus owns the node to reclaim). + * Because we garbage-collect starting from an invariant node (the + * start-of-bucket bucket node) up to the "removed" node (or find a + * reverse-hash that is higher), we are sure that a successful + * traversal of the chain leads to a chain that is present in the + * linked-list (the start node is never removed) and that it does not + * contain the "removed" node anymore, even if concurrent delete/add + * operations are changing the structure of the list concurrently. + * - The add operations perform garbage collection of buckets if they + * encounter nodes with removed flag set in the bucket where they want + * to add their new node. This ensures lock-freedom of add operation by + * helping the remover unlink nodes from the list rather than to wait + * for it do to so. + * - There are three memory backends for the hash table buckets: the + * "order table", the "chunks", and the "mmap". + * - These bucket containers contain a compact version of the hash table + * nodes. + * - The RCU "order table": + * - has a first level table indexed by log2(hash index) which is + * copied and expanded by the resize operation. This order table + * allows finding the "bucket node" tables. + * - There is one bucket node table per hash index order. The size of + * each bucket node table is half the number of hashes contained in + * this order (except for order 0). + * - The RCU "chunks" is best suited for close interaction with a page + * allocator. It uses a linear array as index to "chunks" containing + * each the same number of buckets. + * - The RCU "mmap" memory backend uses a single memory map to hold + * all buckets. + * - synchronize_rcu is used to garbage-collect the old bucket node table. + * + * Ordering Guarantees: + * + * To discuss these guarantees, we first define "read" operation as any + * of the the basic lttng_ust_lfht_lookup, lttng_ust_lfht_next_duplicate, + * lttng_ust_lfht_first, lttng_ust_lfht_next operation, as well as + * lttng_ust_lfht_add_unique (failure). + * + * We define "read traversal" operation as any of the following + * group of operations + * - lttng_ust_lfht_lookup followed by iteration with lttng_ust_lfht_next_duplicate + * (and/or lttng_ust_lfht_next, although less common). + * - lttng_ust_lfht_add_unique (failure) followed by iteration with + * lttng_ust_lfht_next_duplicate (and/or lttng_ust_lfht_next, although less + * common). + * - lttng_ust_lfht_first followed iteration with lttng_ust_lfht_next (and/or + * lttng_ust_lfht_next_duplicate, although less common). + * + * We define "write" operations as any of lttng_ust_lfht_add, lttng_ust_lfht_replace, + * lttng_ust_lfht_add_unique (success), lttng_ust_lfht_add_replace, lttng_ust_lfht_del. + * + * When lttng_ust_lfht_add_unique succeeds (returns the node passed as + * parameter), it acts as a "write" operation. When lttng_ust_lfht_add_unique + * fails (returns a node different from the one passed as parameter), it + * acts as a "read" operation. A lttng_ust_lfht_add_unique failure is a + * lttng_ust_lfht_lookup "read" operation, therefore, any ordering guarantee + * referring to "lookup" imply any of "lookup" or lttng_ust_lfht_add_unique + * (failure). + * + * We define "prior" and "later" node as nodes observable by reads and + * read traversals respectively before and after a write or sequence of + * write operations. + * + * Hash-table operations are often cascaded, for example, the pointer + * returned by a lttng_ust_lfht_lookup() might be passed to a lttng_ust_lfht_next(), + * whose return value might in turn be passed to another hash-table + * operation. This entire cascaded series of operations must be enclosed + * by a pair of matching rcu_read_lock() and rcu_read_unlock() + * operations. + * + * The following ordering guarantees are offered by this hash table: + * + * A.1) "read" after "write": if there is ordering between a write and a + * later read, then the read is guaranteed to see the write or some + * later write. + * A.2) "read traversal" after "write": given that there is dependency + * ordering between reads in a "read traversal", if there is + * ordering between a write and the first read of the traversal, + * then the "read traversal" is guaranteed to see the write or + * some later write. + * B.1) "write" after "read": if there is ordering between a read and a + * later write, then the read will never see the write. + * B.2) "write" after "read traversal": given that there is dependency + * ordering between reads in a "read traversal", if there is + * ordering between the last read of the traversal and a later + * write, then the "read traversal" will never see the write. + * C) "write" while "read traversal": if a write occurs during a "read + * traversal", the traversal may, or may not, see the write. + * D.1) "write" after "write": if there is ordering between a write and + * a later write, then the later write is guaranteed to see the + * effects of the first write. + * D.2) Concurrent "write" pairs: The system will assign an arbitrary + * order to any pair of concurrent conflicting writes. + * Non-conflicting writes (for example, to different keys) are + * unordered. + * E) If a grace period separates a "del" or "replace" operation + * and a subsequent operation, then that subsequent operation is + * guaranteed not to see the removed item. + * F) Uniqueness guarantee: given a hash table that does not contain + * duplicate items for a given key, there will only be one item in + * the hash table after an arbitrary sequence of add_unique and/or + * add_replace operations. Note, however, that a pair of + * concurrent read operations might well access two different items + * with that key. + * G.1) If a pair of lookups for a given key are ordered (e.g. by a + * memory barrier), then the second lookup will return the same + * node as the previous lookup, or some later node. + * G.2) A "read traversal" that starts after the end of a prior "read + * traversal" (ordered by memory barriers) is guaranteed to see the + * same nodes as the previous traversal, or some later nodes. + * G.3) Concurrent "read" pairs: concurrent reads are unordered. For + * example, if a pair of reads to the same key run concurrently + * with an insertion of that same key, the reads remain unordered + * regardless of their return values. In other words, you cannot + * rely on the values returned by the reads to deduce ordering. + * + * Progress guarantees: + * + * * Reads are wait-free. These operations always move forward in the + * hash table linked list, and this list has no loop. + * * Writes are lock-free. Any retry loop performed by a write operation + * is triggered by progress made within another update operation. + * + * Bucket node tables: + * + * hash table hash table the last all bucket node tables + * order size bucket node 0 1 2 3 4 5 6(index) + * table size + * 0 1 1 1 + * 1 2 1 1 1 + * 2 4 2 1 1 2 + * 3 8 4 1 1 2 4 + * 4 16 8 1 1 2 4 8 + * 5 32 16 1 1 2 4 8 16 + * 6 64 32 1 1 2 4 8 16 32 + * + * When growing/shrinking, we only focus on the last bucket node table + * which size is (!order ? 1 : (1 << (order -1))). + * + * Example for growing/shrinking: + * grow hash table from order 5 to 6: init the index=6 bucket node table + * shrink hash table from order 6 to 5: fini the index=6 bucket node table + * + * A bit of ascii art explanation: + * + * The order index is the off-by-one compared to the actual power of 2 + * because we use index 0 to deal with the 0 special-case. + * + * This shows the nodes for a small table ordered by reversed bits: + * + * bits reverse + * 0 000 000 + * 4 100 001 + * 2 010 010 + * 6 110 011 + * 1 001 100 + * 5 101 101 + * 3 011 110 + * 7 111 111 + * + * This shows the nodes in order of non-reversed bits, linked by + * reversed-bit order. + * + * order bits reverse + * 0 0 000 000 + * 1 | 1 001 100 <- + * 2 | | 2 010 010 <- | + * | | | 3 011 110 | <- | + * 3 -> | | | 4 100 001 | | + * -> | | 5 101 101 | + * -> | 6 110 011 + * -> 7 111 111 + */ + +/* + * Note on port to lttng-ust: auto-resize and accounting features are + * removed. + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "rculfhash.h" +#include "rculfhash-internal.h" +#include +#include +#include + +/* + * Split-counters lazily update the global counter each 1024 + * addition/removal. It automatically keeps track of resize required. + * We use the bucket length as indicator for need to expand for small + * tables and machines lacking per-cpu data support. + */ +#define COUNT_COMMIT_ORDER 10 + +/* + * Define the minimum table size. + */ +#define MIN_TABLE_ORDER 0 +#define MIN_TABLE_SIZE (1UL << MIN_TABLE_ORDER) + +/* + * Minimum number of bucket nodes to touch per thread to parallelize grow/shrink. + */ +#define MIN_PARTITION_PER_THREAD_ORDER 12 +#define MIN_PARTITION_PER_THREAD (1UL << MIN_PARTITION_PER_THREAD_ORDER) + +/* + * The removed flag needs to be updated atomically with the pointer. + * It indicates that no node must attach to the node scheduled for + * removal, and that node garbage collection must be performed. + * The bucket flag does not require to be updated atomically with the + * pointer, but it is added as a pointer low bit flag to save space. + * The "removal owner" flag is used to detect which of the "del" + * operation that has set the "removed flag" gets to return the removed + * node to its caller. Note that the replace operation does not need to + * iteract with the "removal owner" flag, because it validates that + * the "removed" flag is not set before performing its cmpxchg. + */ +#define REMOVED_FLAG (1UL << 0) +#define BUCKET_FLAG (1UL << 1) +#define REMOVAL_OWNER_FLAG (1UL << 2) +#define FLAGS_MASK ((1UL << 3) - 1) + +/* Value of the end pointer. Should not interact with flags. */ +#define END_VALUE NULL + +/* + * ht_items_count: Split-counters counting the number of node addition + * and removal in the table. Only used if the LTTNG_UST_LFHT_ACCOUNTING flag + * is set at hash table creation. + * + * These are free-running counters, never reset to zero. They count the + * number of add/remove, and trigger every (1 << COUNT_COMMIT_ORDER) + * operations to update the global counter. We choose a power-of-2 value + * for the trigger to deal with 32 or 64-bit overflow of the counter. + */ +struct ht_items_count { + unsigned long add, del; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +#ifdef CONFIG_LTTNG_UST_LFHT_ITER_DEBUG + +static +void lttng_ust_lfht_iter_debug_set_ht(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ + iter->lfht = ht; +} + +#define lttng_ust_lfht_iter_debug_assert(...) assert(__VA_ARGS__) + +#else + +static +void lttng_ust_lfht_iter_debug_set_ht(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ +} + +#define lttng_ust_lfht_iter_debug_assert(...) + +#endif + +/* + * Algorithm to reverse bits in a word by lookup table, extended to + * 64-bit words. + * Source: + * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable + * Originally from Public Domain. + */ + +static const uint8_t BitReverseTable256[256] = +{ +#define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64 +#define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16) +#define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 ) + R6(0), R6(2), R6(1), R6(3) +}; +#undef R2 +#undef R4 +#undef R6 + +static +uint8_t bit_reverse_u8(uint8_t v) +{ + return BitReverseTable256[v]; +} + +#if (CAA_BITS_PER_LONG == 32) +static +uint32_t bit_reverse_u32(uint32_t v) +{ + return ((uint32_t) bit_reverse_u8(v) << 24) | + ((uint32_t) bit_reverse_u8(v >> 8) << 16) | + ((uint32_t) bit_reverse_u8(v >> 16) << 8) | + ((uint32_t) bit_reverse_u8(v >> 24)); +} +#else +static +uint64_t bit_reverse_u64(uint64_t v) +{ + return ((uint64_t) bit_reverse_u8(v) << 56) | + ((uint64_t) bit_reverse_u8(v >> 8) << 48) | + ((uint64_t) bit_reverse_u8(v >> 16) << 40) | + ((uint64_t) bit_reverse_u8(v >> 24) << 32) | + ((uint64_t) bit_reverse_u8(v >> 32) << 24) | + ((uint64_t) bit_reverse_u8(v >> 40) << 16) | + ((uint64_t) bit_reverse_u8(v >> 48) << 8) | + ((uint64_t) bit_reverse_u8(v >> 56)); +} +#endif + +static +unsigned long bit_reverse_ulong(unsigned long v) +{ +#if (CAA_BITS_PER_LONG == 32) + return bit_reverse_u32(v); +#else + return bit_reverse_u64(v); +#endif +} + +/* + * fls: returns the position of the most significant bit. + * Returns 0 if no bit is set, else returns the position of the most + * significant bit (from 1 to 32 on 32-bit, from 1 to 64 on 64-bit). + */ +#if defined(__i386) || defined(__x86_64) +static inline +unsigned int fls_u32(uint32_t x) +{ + int r; + + __asm__ ("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n\t" + "1:\n\t" + : "=r" (r) : "rm" (x)); + return r + 1; +} +#define HAS_FLS_U32 +#endif + +#if defined(__x86_64) +static inline +unsigned int fls_u64(uint64_t x) +{ + long r; + + __asm__ ("bsrq %1,%0\n\t" + "jnz 1f\n\t" + "movq $-1,%0\n\t" + "1:\n\t" + : "=r" (r) : "rm" (x)); + return r + 1; +} +#define HAS_FLS_U64 +#endif + +#ifndef HAS_FLS_U64 +static __attribute__((unused)) +unsigned int fls_u64(uint64_t x) +{ + unsigned int r = 64; + + if (!x) + return 0; + + if (!(x & 0xFFFFFFFF00000000ULL)) { + x <<= 32; + r -= 32; + } + if (!(x & 0xFFFF000000000000ULL)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF00000000000000ULL)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF000000000000000ULL)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC000000000000000ULL)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x8000000000000000ULL)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +#ifndef HAS_FLS_U32 +static __attribute__((unused)) +unsigned int fls_u32(uint32_t x) +{ + unsigned int r = 32; + + if (!x) + return 0; + if (!(x & 0xFFFF0000U)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF000000U)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF0000000U)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC0000000U)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000U)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +unsigned int lttng_ust_lfht_fls_ulong(unsigned long x) +{ +#if (CAA_BITS_PER_LONG == 32) + return fls_u32(x); +#else + return fls_u64(x); +#endif +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +int lttng_ust_lfht_get_count_order_u32(uint32_t x) +{ + if (!x) + return -1; + + return fls_u32(x - 1); +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +int lttng_ust_lfht_get_count_order_ulong(unsigned long x) +{ + if (!x) + return -1; + + return lttng_ust_lfht_fls_ulong(x - 1); +} + +static +struct lttng_ust_lfht_node *clear_flag(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) & ~FLAGS_MASK); +} + +static +int is_removed(const struct lttng_ust_lfht_node *node) +{ + return ((unsigned long) node) & REMOVED_FLAG; +} + +static +int is_bucket(struct lttng_ust_lfht_node *node) +{ + return ((unsigned long) node) & BUCKET_FLAG; +} + +static +struct lttng_ust_lfht_node *flag_bucket(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) | BUCKET_FLAG); +} + +static +int is_removal_owner(struct lttng_ust_lfht_node *node) +{ + return ((unsigned long) node) & REMOVAL_OWNER_FLAG; +} + +static +struct lttng_ust_lfht_node *flag_removal_owner(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) | REMOVAL_OWNER_FLAG); +} + +static +struct lttng_ust_lfht_node *flag_removed_or_removal_owner(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) | REMOVED_FLAG | REMOVAL_OWNER_FLAG); +} + +static +struct lttng_ust_lfht_node *get_end(void) +{ + return (struct lttng_ust_lfht_node *) END_VALUE; +} + +static +int is_end(struct lttng_ust_lfht_node *node) +{ + return clear_flag(node) == (struct lttng_ust_lfht_node *) END_VALUE; +} + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + return ht->mm->alloc_bucket_table(ht, order); +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + return ht->mm->free_bucket_table(ht, order); +} + +static inline +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + return ht->bucket_at(ht, index); +} + +static inline +struct lttng_ust_lfht_node *lookup_bucket(struct lttng_ust_lfht *ht, unsigned long size, + unsigned long hash) +{ + assert(size > 0); + return bucket_at(ht, hash & (size - 1)); +} + +/* + * Remove all logically deleted nodes from a bucket up to a certain node key. + */ +static +void _lttng_ust_lfht_gc_bucket(struct lttng_ust_lfht_node *bucket, struct lttng_ust_lfht_node *node) +{ + struct lttng_ust_lfht_node *iter_prev, *iter, *next, *new_next; + + assert(!is_bucket(bucket)); + assert(!is_removed(bucket)); + assert(!is_removal_owner(bucket)); + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + for (;;) { + iter_prev = bucket; + /* We can always skip the bucket node initially */ + iter = lttng_ust_rcu_dereference(iter_prev->next); + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + assert(iter_prev->reverse_hash <= node->reverse_hash); + /* + * We should never be called with bucket (start of chain) + * and logically removed node (end of path compression + * marker) being the actual same node. This would be a + * bug in the algorithm implementation. + */ + assert(bucket != node); + for (;;) { + if (caa_unlikely(is_end(iter))) + return; + if (caa_likely(clear_flag(iter)->reverse_hash > node->reverse_hash)) + return; + next = lttng_ust_rcu_dereference(clear_flag(iter)->next); + if (caa_likely(is_removed(next))) + break; + iter_prev = clear_flag(iter); + iter = next; + } + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + if (is_bucket(iter)) + new_next = flag_bucket(clear_flag(next)); + else + new_next = clear_flag(next); + (void) uatomic_cmpxchg(&iter_prev->next, iter, new_next); + } +} + +static +int _lttng_ust_lfht_replace(struct lttng_ust_lfht *ht, unsigned long size, + struct lttng_ust_lfht_node *old_node, + struct lttng_ust_lfht_node *old_next, + struct lttng_ust_lfht_node *new_node) +{ + struct lttng_ust_lfht_node *bucket, *ret_next; + + if (!old_node) /* Return -ENOENT if asked to replace NULL node */ + return -ENOENT; + + assert(!is_removed(old_node)); + assert(!is_removal_owner(old_node)); + assert(!is_bucket(old_node)); + assert(!is_removed(new_node)); + assert(!is_removal_owner(new_node)); + assert(!is_bucket(new_node)); + assert(new_node != old_node); + for (;;) { + /* Insert after node to be replaced */ + if (is_removed(old_next)) { + /* + * Too late, the old node has been removed under us + * between lookup and replace. Fail. + */ + return -ENOENT; + } + assert(old_next == clear_flag(old_next)); + assert(new_node != old_next); + /* + * REMOVAL_OWNER flag is _NEVER_ set before the REMOVED + * flag. It is either set atomically at the same time + * (replace) or after (del). + */ + assert(!is_removal_owner(old_next)); + new_node->next = old_next; + /* + * Here is the whole trick for lock-free replace: we add + * the replacement node _after_ the node we want to + * replace by atomically setting its next pointer at the + * same time we set its removal flag. Given that + * the lookups/get next use an iterator aware of the + * next pointer, they will either skip the old node due + * to the removal flag and see the new node, or use + * the old node, but will not see the new one. + * This is a replacement of a node with another node + * that has the same value: we are therefore not + * removing a value from the hash table. We set both the + * REMOVED and REMOVAL_OWNER flags atomically so we own + * the node after successful cmpxchg. + */ + ret_next = uatomic_cmpxchg(&old_node->next, + old_next, flag_removed_or_removal_owner(new_node)); + if (ret_next == old_next) + break; /* We performed the replacement. */ + old_next = ret_next; + } + + /* + * Ensure that the old node is not visible to readers anymore: + * lookup for the node, and remove it (along with any other + * logically removed node) if found. + */ + bucket = lookup_bucket(ht, size, bit_reverse_ulong(old_node->reverse_hash)); + _lttng_ust_lfht_gc_bucket(bucket, new_node); + + assert(is_removed(CMM_LOAD_SHARED(old_node->next))); + return 0; +} + +/* + * A non-NULL unique_ret pointer uses the "add unique" (or uniquify) add + * mode. A NULL unique_ret allows creation of duplicate keys. + */ +static +void _lttng_ust_lfht_add(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + unsigned long size, + struct lttng_ust_lfht_node *node, + struct lttng_ust_lfht_iter *unique_ret, + int bucket_flag) +{ + struct lttng_ust_lfht_node *iter_prev, *iter, *next, *new_node, *new_next, + *return_node; + struct lttng_ust_lfht_node *bucket; + + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + bucket = lookup_bucket(ht, size, hash); + for (;;) { + /* + * iter_prev points to the non-removed node prior to the + * insert location. + */ + iter_prev = bucket; + /* We can always skip the bucket node initially */ + iter = lttng_ust_rcu_dereference(iter_prev->next); + assert(iter_prev->reverse_hash <= node->reverse_hash); + for (;;) { + if (caa_unlikely(is_end(iter))) + goto insert; + if (caa_likely(clear_flag(iter)->reverse_hash > node->reverse_hash)) + goto insert; + + /* bucket node is the first node of the identical-hash-value chain */ + if (bucket_flag && clear_flag(iter)->reverse_hash == node->reverse_hash) + goto insert; + + next = lttng_ust_rcu_dereference(clear_flag(iter)->next); + if (caa_unlikely(is_removed(next))) + goto gc_node; + + /* uniquely add */ + if (unique_ret + && !is_bucket(next) + && clear_flag(iter)->reverse_hash == node->reverse_hash) { + struct lttng_ust_lfht_iter d_iter = { + .node = node, + .next = iter, +#ifdef CONFIG_LTTNG_UST_LFHT_ITER_DEBUG + .lfht = ht, +#endif + }; + + /* + * uniquely adding inserts the node as the first + * node of the identical-hash-value node chain. + * + * This semantic ensures no duplicated keys + * should ever be observable in the table + * (including traversing the table node by + * node by forward iterations) + */ + lttng_ust_lfht_next_duplicate(ht, match, key, &d_iter); + if (!d_iter.node) + goto insert; + + *unique_ret = d_iter; + return; + } + + iter_prev = clear_flag(iter); + iter = next; + } + + insert: + assert(node != clear_flag(iter)); + assert(!is_removed(iter_prev)); + assert(!is_removal_owner(iter_prev)); + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + assert(iter_prev != node); + if (!bucket_flag) + node->next = clear_flag(iter); + else + node->next = flag_bucket(clear_flag(iter)); + if (is_bucket(iter)) + new_node = flag_bucket(node); + else + new_node = node; + if (uatomic_cmpxchg(&iter_prev->next, iter, + new_node) != iter) { + continue; /* retry */ + } else { + return_node = node; + goto end; + } + + gc_node: + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + if (is_bucket(iter)) + new_next = flag_bucket(clear_flag(next)); + else + new_next = clear_flag(next); + (void) uatomic_cmpxchg(&iter_prev->next, iter, new_next); + /* retry */ + } +end: + if (unique_ret) { + unique_ret->node = return_node; + /* unique_ret->next left unset, never used. */ + } +} + +static +int _lttng_ust_lfht_del(struct lttng_ust_lfht *ht, unsigned long size, + struct lttng_ust_lfht_node *node) +{ + struct lttng_ust_lfht_node *bucket, *next; + + if (!node) /* Return -ENOENT if asked to delete NULL node */ + return -ENOENT; + + /* logically delete the node */ + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + + /* + * We are first checking if the node had previously been + * logically removed (this check is not atomic with setting the + * logical removal flag). Return -ENOENT if the node had + * previously been removed. + */ + next = CMM_LOAD_SHARED(node->next); /* next is not dereferenced */ + if (caa_unlikely(is_removed(next))) + return -ENOENT; + assert(!is_bucket(next)); + /* + * The del operation semantic guarantees a full memory barrier + * before the uatomic_or atomic commit of the deletion flag. + */ + cmm_smp_mb__before_uatomic_or(); + /* + * We set the REMOVED_FLAG unconditionally. Note that there may + * be more than one concurrent thread setting this flag. + * Knowing which wins the race will be known after the garbage + * collection phase, stay tuned! + */ + uatomic_or(&node->next, REMOVED_FLAG); + /* We performed the (logical) deletion. */ + + /* + * Ensure that the node is not visible to readers anymore: lookup for + * the node, and remove it (along with any other logically removed node) + * if found. + */ + bucket = lookup_bucket(ht, size, bit_reverse_ulong(node->reverse_hash)); + _lttng_ust_lfht_gc_bucket(bucket, node); + + assert(is_removed(CMM_LOAD_SHARED(node->next))); + /* + * Last phase: atomically exchange node->next with a version + * having "REMOVAL_OWNER_FLAG" set. If the returned node->next + * pointer did _not_ have "REMOVAL_OWNER_FLAG" set, we now own + * the node and win the removal race. + * It is interesting to note that all "add" paths are forbidden + * to change the next pointer starting from the point where the + * REMOVED_FLAG is set, so here using a read, followed by a + * xchg() suffice to guarantee that the xchg() will ever only + * set the "REMOVAL_OWNER_FLAG" (or change nothing if the flag + * was already set). + */ + if (!is_removal_owner(uatomic_xchg(&node->next, + flag_removal_owner(node->next)))) + return 0; + else + return -ENOENT; +} + +/* + * Never called with size < 1. + */ +static +void lttng_ust_lfht_create_bucket(struct lttng_ust_lfht *ht, unsigned long size) +{ + struct lttng_ust_lfht_node *prev, *node; + unsigned long order, len, i; + int bucket_order; + + lttng_ust_lfht_alloc_bucket_table(ht, 0); + + dbg_printf("create bucket: order 0 index 0 hash 0\n"); + node = bucket_at(ht, 0); + node->next = flag_bucket(get_end()); + node->reverse_hash = 0; + + bucket_order = lttng_ust_lfht_get_count_order_ulong(size); + assert(bucket_order >= 0); + + for (order = 1; order < (unsigned long) bucket_order + 1; order++) { + len = 1UL << (order - 1); + lttng_ust_lfht_alloc_bucket_table(ht, order); + + for (i = 0; i < len; i++) { + /* + * Now, we are trying to init the node with the + * hash=(len+i) (which is also a bucket with the + * index=(len+i)) and insert it into the hash table, + * so this node has to be inserted after the bucket + * with the index=(len+i)&(len-1)=i. And because there + * is no other non-bucket node nor bucket node with + * larger index/hash inserted, so the bucket node + * being inserted should be inserted directly linked + * after the bucket node with index=i. + */ + prev = bucket_at(ht, i); + node = bucket_at(ht, len + i); + + dbg_printf("create bucket: order %lu index %lu hash %lu\n", + order, len + i, len + i); + node->reverse_hash = bit_reverse_ulong(len + i); + + /* insert after prev */ + assert(is_bucket(prev->next)); + node->next = prev->next; + prev->next = flag_bucket(node); + } + } +} + +#if (CAA_BITS_PER_LONG > 32) +/* + * For 64-bit architectures, with max number of buckets small enough not to + * use the entire 64-bit memory mapping space (and allowing a fair number of + * hash table instances), use the mmap allocator, which is faster. Otherwise, + * fallback to the order allocator. + */ +static +const struct lttng_ust_lfht_mm_type *get_mm_type(unsigned long max_nr_buckets) +{ + if (max_nr_buckets && max_nr_buckets <= (1ULL << 32)) + return <tng_ust_lfht_mm_mmap; + else + return <tng_ust_lfht_mm_order; +} +#else +/* + * For 32-bit architectures, use the order allocator. + */ +static +const struct lttng_ust_lfht_mm_type *get_mm_type(unsigned long max_nr_buckets) +{ + return <tng_ust_lfht_mm_order; +} +#endif + +struct lttng_ust_lfht *lttng_ust_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + const struct lttng_ust_lfht_mm_type *mm) +{ + struct lttng_ust_lfht *ht; + unsigned long order; + + /* min_nr_alloc_buckets must be power of two */ + if (!min_nr_alloc_buckets || (min_nr_alloc_buckets & (min_nr_alloc_buckets - 1))) + return NULL; + + /* init_size must be power of two */ + if (!init_size || (init_size & (init_size - 1))) + return NULL; + + /* + * Memory management plugin default. + */ + if (!mm) + mm = get_mm_type(max_nr_buckets); + + /* max_nr_buckets == 0 for order based mm means infinite */ + if (mm == <tng_ust_lfht_mm_order && !max_nr_buckets) + max_nr_buckets = 1UL << (MAX_TABLE_ORDER - 1); + + /* max_nr_buckets must be power of two */ + if (!max_nr_buckets || (max_nr_buckets & (max_nr_buckets - 1))) + return NULL; + + if (flags & LTTNG_UST_LFHT_AUTO_RESIZE) + return NULL; + + min_nr_alloc_buckets = max(min_nr_alloc_buckets, MIN_TABLE_SIZE); + init_size = max(init_size, MIN_TABLE_SIZE); + max_nr_buckets = max(max_nr_buckets, min_nr_alloc_buckets); + init_size = min(init_size, max_nr_buckets); + + ht = mm->alloc_lttng_ust_lfht(min_nr_alloc_buckets, max_nr_buckets); + assert(ht); + assert(ht->mm == mm); + assert(ht->bucket_at == mm->bucket_at); + + ht->flags = flags; + /* this mutex should not nest in read-side C.S. */ + pthread_mutex_init(&ht->resize_mutex, NULL); + order = lttng_ust_lfht_get_count_order_ulong(init_size); + ht->resize_target = 1UL << order; + lttng_ust_lfht_create_bucket(ht, 1UL << order); + ht->size = 1UL << order; + return ht; +} + +void lttng_ust_lfht_lookup(struct lttng_ust_lfht *ht, unsigned long hash, + lttng_ust_lfht_match_fct match, const void *key, + struct lttng_ust_lfht_iter *iter) +{ + struct lttng_ust_lfht_node *node, *next, *bucket; + unsigned long reverse_hash, size; + + lttng_ust_lfht_iter_debug_set_ht(ht, iter); + + reverse_hash = bit_reverse_ulong(hash); + + size = lttng_ust_rcu_dereference(ht->size); + bucket = lookup_bucket(ht, size, hash); + /* We can always skip the bucket node initially */ + node = lttng_ust_rcu_dereference(bucket->next); + node = clear_flag(node); + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + if (caa_unlikely(node->reverse_hash > reverse_hash)) { + node = next = NULL; + break; + } + next = lttng_ust_rcu_dereference(node->next); + assert(node == clear_flag(node)); + if (caa_likely(!is_removed(next)) + && !is_bucket(next) + && node->reverse_hash == reverse_hash + && caa_likely(match(node, key))) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(CMM_LOAD_SHARED(node->next))); + iter->node = node; + iter->next = next; +} + +void lttng_ust_lfht_next_duplicate(struct lttng_ust_lfht *ht, lttng_ust_lfht_match_fct match, + const void *key, struct lttng_ust_lfht_iter *iter) +{ + struct lttng_ust_lfht_node *node, *next; + unsigned long reverse_hash; + + lttng_ust_lfht_iter_debug_assert(ht == iter->lfht); + node = iter->node; + reverse_hash = node->reverse_hash; + next = iter->next; + node = clear_flag(next); + + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + if (caa_unlikely(node->reverse_hash > reverse_hash)) { + node = next = NULL; + break; + } + next = lttng_ust_rcu_dereference(node->next); + if (caa_likely(!is_removed(next)) + && !is_bucket(next) + && caa_likely(match(node, key))) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(CMM_LOAD_SHARED(node->next))); + iter->node = node; + iter->next = next; +} + +void lttng_ust_lfht_next(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ + struct lttng_ust_lfht_node *node, *next; + + lttng_ust_lfht_iter_debug_assert(ht == iter->lfht); + node = clear_flag(iter->next); + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + next = lttng_ust_rcu_dereference(node->next); + if (caa_likely(!is_removed(next)) + && !is_bucket(next)) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(CMM_LOAD_SHARED(node->next))); + iter->node = node; + iter->next = next; +} + +void lttng_ust_lfht_first(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ + lttng_ust_lfht_iter_debug_set_ht(ht, iter); + /* + * Get next after first bucket node. The first bucket node is the + * first node of the linked list. + */ + iter->next = bucket_at(ht, 0)->next; + lttng_ust_lfht_next(ht, iter); +} + +void lttng_ust_lfht_add(struct lttng_ust_lfht *ht, unsigned long hash, + struct lttng_ust_lfht_node *node) +{ + unsigned long size; + + node->reverse_hash = bit_reverse_ulong(hash); + size = lttng_ust_rcu_dereference(ht->size); + _lttng_ust_lfht_add(ht, hash, NULL, NULL, size, node, NULL, 0); +} + +struct lttng_ust_lfht_node *lttng_ust_lfht_add_unique(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node) +{ + unsigned long size; + struct lttng_ust_lfht_iter iter; + + node->reverse_hash = bit_reverse_ulong(hash); + size = lttng_ust_rcu_dereference(ht->size); + _lttng_ust_lfht_add(ht, hash, match, key, size, node, &iter, 0); + return iter.node; +} + +struct lttng_ust_lfht_node *lttng_ust_lfht_add_replace(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node) +{ + unsigned long size; + struct lttng_ust_lfht_iter iter; + + node->reverse_hash = bit_reverse_ulong(hash); + size = lttng_ust_rcu_dereference(ht->size); + for (;;) { + _lttng_ust_lfht_add(ht, hash, match, key, size, node, &iter, 0); + if (iter.node == node) { + return NULL; + } + + if (!_lttng_ust_lfht_replace(ht, size, iter.node, iter.next, node)) + return iter.node; + } +} + +int lttng_ust_lfht_replace(struct lttng_ust_lfht *ht, + struct lttng_ust_lfht_iter *old_iter, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *new_node) +{ + unsigned long size; + + new_node->reverse_hash = bit_reverse_ulong(hash); + if (!old_iter->node) + return -ENOENT; + if (caa_unlikely(old_iter->node->reverse_hash != new_node->reverse_hash)) + return -EINVAL; + if (caa_unlikely(!match(old_iter->node, key))) + return -EINVAL; + size = lttng_ust_rcu_dereference(ht->size); + return _lttng_ust_lfht_replace(ht, size, old_iter->node, old_iter->next, + new_node); +} + +int lttng_ust_lfht_del(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_node *node) +{ + unsigned long size; + + size = lttng_ust_rcu_dereference(ht->size); + return _lttng_ust_lfht_del(ht, size, node); +} + +int lttng_ust_lfht_is_node_deleted(const struct lttng_ust_lfht_node *node) +{ + return is_removed(CMM_LOAD_SHARED(node->next)); +} + +static +int lttng_ust_lfht_delete_bucket(struct lttng_ust_lfht *ht) +{ + struct lttng_ust_lfht_node *node; + unsigned long order, i, size; + + /* Check that the table is empty */ + node = bucket_at(ht, 0); + do { + node = clear_flag(node)->next; + if (!is_bucket(node)) + return -EPERM; + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + } while (!is_end(node)); + /* + * size accessed without lttng_ust_rcu_dereference because hash table is + * being destroyed. + */ + size = ht->size; + /* Internal sanity check: all nodes left should be buckets */ + for (i = 0; i < size; i++) { + node = bucket_at(ht, i); + dbg_printf("delete bucket: index %lu expected hash %lu hash %lu\n", + i, i, bit_reverse_ulong(node->reverse_hash)); + assert(is_bucket(node->next)); + } + + for (order = lttng_ust_lfht_get_count_order_ulong(size); (long)order >= 0; order--) + lttng_ust_lfht_free_bucket_table(ht, order); + + return 0; +} + +/* + * Should only be called when no more concurrent readers nor writers can + * possibly access the table. + */ +int lttng_ust_lfht_destroy(struct lttng_ust_lfht *ht) +{ + int ret; + + ret = lttng_ust_lfht_delete_bucket(ht); + if (ret) + return ret; + ret = pthread_mutex_destroy(&ht->resize_mutex); + if (ret) + ret = -EBUSY; + poison_free(ht); + return ret; +} diff --git a/liblttng-ust/rculfhash.h b/liblttng-ust/rculfhash.h new file mode 100644 index 00000000..58f5a5da --- /dev/null +++ b/liblttng-ust/rculfhash.h @@ -0,0 +1,467 @@ +#ifndef _LTTNG_UST_RCULFHASH_H +#define _LTTNG_UST_RCULFHASH_H + +/* + * urcu/rculfhash.h + * + * Userspace RCU library - Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct lttng_ust_lfht; + +/* + * lttng_ust_lfht_node: Contains the next pointers and reverse-hash + * value required for lookup and traversal of the hash table. + * + * struct lttng_ust_lfht_node should be aligned on 8-bytes boundaries because + * the three lower bits are used as flags. It is worth noting that the + * information contained within these three bits could be represented on + * two bits by re-using the same bit for REMOVAL_OWNER_FLAG and + * BUCKET_FLAG. This can be done if we ensure that no iterator nor + * updater check the BUCKET_FLAG after it detects that the REMOVED_FLAG + * is set. Given the minimum size of struct lttng_ust_lfht_node is 8 bytes on + * 32-bit architectures, we choose to go for simplicity and reserve + * three bits. + * + * struct lttng_ust_lfht_node can be embedded into a structure (as a field). + * caa_container_of() can be used to get the structure from the struct + * lttng_ust_lfht_node after a lookup. + * + * The structure which embeds it typically holds the key (or key-value + * pair) of the object. The caller code is responsible for calculation + * of the hash value for lttng_ust_lfht APIs. + */ +struct lttng_ust_lfht_node { + struct lttng_ust_lfht_node *next; /* ptr | REMOVAL_OWNER_FLAG | BUCKET_FLAG | REMOVED_FLAG */ + unsigned long reverse_hash; +} __attribute__((aligned(8))); + +/* lttng_ust_lfht_iter: Used to track state while traversing a hash chain. */ +struct lttng_ust_lfht_iter { + struct lttng_ust_lfht_node *node, *next; +}; + +static inline +struct lttng_ust_lfht_node *lttng_ust_lfht_iter_get_node(struct lttng_ust_lfht_iter *iter) +{ + return iter->node; +} + +struct rcu_flavor_struct; + +/* + * Caution ! + * Ensure reader and writer threads are registered as urcu readers. + */ + +typedef int (*lttng_ust_lfht_match_fct)(struct lttng_ust_lfht_node *node, const void *key); + +/* + * lttng_ust_lfht_node_init - initialize a hash table node + * @node: the node to initialize. + * + * This function is kept to be eventually used for debugging purposes + * (detection of memory corruption). + */ +static inline +void lttng_ust_lfht_node_init(struct lttng_ust_lfht_node *node) +{ +} + +/* + * Hash table creation flags. + */ +enum { + LTTNG_UST_LFHT_AUTO_RESIZE = (1U << 0), + LTTNG_UST_LFHT_ACCOUNTING = (1U << 1), +}; + +struct lttng_ust_lfht_mm_type { + struct lttng_ust_lfht *(*alloc_lttng_ust_lfht)(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets); + void (*alloc_bucket_table)(struct lttng_ust_lfht *ht, unsigned long order); + void (*free_bucket_table)(struct lttng_ust_lfht *ht, unsigned long order); + struct lttng_ust_lfht_node *(*bucket_at)(struct lttng_ust_lfht *ht, + unsigned long index); +}; + +extern const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_order; +extern const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_chunk; +extern const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_mmap; + +/* + * lttng_ust_lfht_new - allocate a hash table. + * @init_size: number of buckets to allocate initially. Must be power of two. + * @min_nr_alloc_buckets: the minimum number of allocated buckets. + * (must be power of two) + * @max_nr_buckets: the maximum number of hash table buckets allowed. + * (must be power of two, 0 is accepted, means + * "infinite") + * @flags: hash table creation flags (can be combined with bitwise or: '|'). + * 0: no flags. + * LTTNG_UST_LFHT_AUTO_RESIZE: automatically resize hash table. + * LTTNG_UST_LFHT_ACCOUNTING: count the number of node addition + * and removal in the table + * + * Return NULL on error. + * Note: the RCU flavor must be already included before the hash table header. + */ +extern +struct lttng_ust_lfht *lttng_ust_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + const struct lttng_ust_lfht_mm_type *mm); + +/* + * lttng_ust_lfht_destroy - destroy a hash table. + * @ht: the hash table to destroy. + * + * Return 0 on success, negative error value on error. + + * Prior to liburcu 0.10: + * - Threads calling this API need to be registered RCU read-side + * threads. + * - lttng_ust_lfht_destroy should *not* be called from a RCU read-side + * critical section. It should *not* be called from a call_rcu thread + * context neither. + * + * Starting from liburcu 0.10, rculfhash implements its own worker + * thread to handle resize operations, which removes RCU requirements on + * lttng_ust_lfht_destroy. + */ +extern +int lttng_ust_lfht_destroy(struct lttng_ust_lfht *ht); + +/* + * lttng_ust_lfht_count_nodes - count the number of nodes in the hash table. + * @ht: the hash table. + * @split_count_before: sample the node count split-counter before traversal. + * @count: traverse the hash table, count the number of nodes observed. + * @split_count_after: sample the node count split-counter after traversal. + * + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +extern +void lttng_ust_lfht_count_nodes(struct lttng_ust_lfht *ht, + long *split_count_before, + unsigned long *count, + long *split_count_after); + +/* + * lttng_ust_lfht_lookup - lookup a node by key. + * @ht: the hash table. + * @hash: the key hash. + * @match: the key match function. + * @key: the current node key. + * @iter: node, if found (output). *iter->node set to NULL if not found. + * + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_lookup(struct lttng_ust_lfht *ht, unsigned long hash, + lttng_ust_lfht_match_fct match, const void *key, + struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_next_duplicate - get the next item with same key, after iterator. + * @ht: the hash table. + * @match: the key match function. + * @key: the current node key. + * @iter: input: current iterator. + * output: node, if found. *iter->node set to NULL if not found. + * + * Uses an iterator initialized by a lookup or traversal. Important: the + * iterator _needs_ to be initialized before calling + * lttng_ust_lfht_next_duplicate. + * Sets *iter-node to the following node with same key. + * Sets *iter->node to NULL if no following node exists with same key. + * RCU read-side lock must be held across lttng_ust_lfht_lookup and + * lttng_ust_lfht_next calls, and also between lttng_ust_lfht_next calls using the + * node returned by a previous lttng_ust_lfht_next. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_next_duplicate(struct lttng_ust_lfht *ht, + lttng_ust_lfht_match_fct match, const void *key, + struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_first - get the first node in the table. + * @ht: the hash table. + * @iter: First node, if exists (output). *iter->node set to NULL if not found. + * + * Output in "*iter". *iter->node set to NULL if table is empty. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_first(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_next - get the next node in the table. + * @ht: the hash table. + * @iter: input: current iterator. + * output: next node, if exists. *iter->node set to NULL if not found. + * + * Input/Output in "*iter". *iter->node set to NULL if *iter was + * pointing to the last table node. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_next(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_add - add a node to the hash table. + * @ht: the hash table. + * @hash: the key hash. + * @node: the node to add. + * + * This function supports adding redundant keys into the table. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function issues a full memory barrier before and after its + * atomic commit. + */ +extern +void lttng_ust_lfht_add(struct lttng_ust_lfht *ht, unsigned long hash, + struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_add_unique - add a node to hash table, if key is not present. + * @ht: the hash table. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @node: the node to try adding. + * + * Return the node added upon success. + * Return the unique node already present upon failure. If + * lttng_ust_lfht_add_unique fails, the node passed as parameter should be + * freed by the caller. In this case, the caller does NOT need to wait + * for a grace period before freeing or re-using the node. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * + * The semantic of this function is that if only this function is used + * to add keys into the table, no duplicated keys should ever be + * observable in the table. The same guarantee apply for combination of + * add_unique and add_replace (see below). + * + * Upon success, this function issues a full memory barrier before and + * after its atomic commit. Upon failure, this function acts like a + * simple lookup operation: it acts as a rcu_dereference() to read the + * node pointer. The failure case does not guarantee any other memory + * barrier. + */ +extern +struct lttng_ust_lfht_node *lttng_ust_lfht_add_unique(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_add_replace - replace or add a node within hash table. + * @ht: the hash table. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @node: the node to add. + * + * Return the node replaced upon success. If no node matching the key + * was present, return NULL, which also means the operation succeeded. + * This replacement operation should never fail. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful replacement, a grace period must be waited for before + * freeing or re-using the memory reserved for the returned node. + * + * The semantic of replacement vs lookups and traversals is the + * following: if lookups and traversals are performed between a key + * unique insertion and its removal, we guarantee that the lookups and + * traversals will always find exactly one instance of the key if it is + * replaced concurrently with the lookups. + * + * Providing this semantic allows us to ensure that replacement-only + * schemes will never generate duplicated keys. It also allows us to + * guarantee that a combination of add_replace and add_unique updates + * will never generate duplicated keys. + * + * This function issues a full memory barrier before and after its + * atomic commit. + */ +extern +struct lttng_ust_lfht_node *lttng_ust_lfht_add_replace(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_replace - replace a node pointed to by iter within hash table. + * @ht: the hash table. + * @old_iter: the iterator position of the node to replace. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @new_node: the new node to use as replacement. + * + * Return 0 if replacement is successful, negative value otherwise. + * Replacing a NULL old node or an already removed node will fail with + * -ENOENT. + * If the hash or value of the node to replace and the new node differ, + * this function returns -EINVAL without proceeding to the replacement. + * Old node can be looked up with lttng_ust_lfht_lookup and lttng_ust_lfht_next. + * RCU read-side lock must be held between lookup and replacement. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful replacement, a grace period must be waited for before + * freeing or re-using the memory reserved for the old node (which can + * be accessed with lttng_ust_lfht_iter_get_node). + * + * The semantic of replacement vs lookups is the same as + * lttng_ust_lfht_add_replace(). + * + * Upon success, this function issues a full memory barrier before and + * after its atomic commit. Upon failure, this function does not issue + * any memory barrier. + */ +extern +int lttng_ust_lfht_replace(struct lttng_ust_lfht *ht, + struct lttng_ust_lfht_iter *old_iter, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *new_node); + +/* + * lttng_ust_lfht_del - remove node pointed to by iterator from hash table. + * @ht: the hash table. + * @node: the node to delete. + * + * Return 0 if the node is successfully removed, negative value + * otherwise. + * Deleting a NULL node or an already removed node will fail with a + * negative value. + * Node can be looked up with lttng_ust_lfht_lookup and lttng_ust_lfht_next, + * followed by use of lttng_ust_lfht_iter_get_node. + * RCU read-side lock must be held between lookup and removal. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful removal, a grace period must be waited for before + * freeing or re-using the memory reserved for old node (which can be + * accessed with lttng_ust_lfht_iter_get_node). + * Upon success, this function issues a full memory barrier before and + * after its atomic commit. Upon failure, this function does not issue + * any memory barrier. + */ +extern +int lttng_ust_lfht_del(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_is_node_deleted - query whether a node is removed from hash table. + * + * Return non-zero if the node is deleted from the hash table, 0 + * otherwise. + * Node can be looked up with lttng_ust_lfht_lookup and lttng_ust_lfht_next, + * followed by use of lttng_ust_lfht_iter_get_node. + * RCU read-side lock must be held between lookup and call to this + * function. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function does not issue any memory barrier. + */ +extern +int lttng_ust_lfht_is_node_deleted(const struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_resize - Force a hash table resize + * @ht: the hash table. + * @new_size: update to this hash table size. + * + * Threads calling this API need to be registered RCU read-side threads. + * This function does not (necessarily) issue memory barriers. + * lttng_ust_lfht_resize should *not* be called from a RCU read-side critical + * section. + */ +extern +void lttng_ust_lfht_resize(struct lttng_ust_lfht *ht, unsigned long new_size); + +/* + * Note: it is safe to perform element removal (del), replacement, or + * any hash table update operation during any of the following hash + * table traversals. + * These functions act as rcu_dereference() to read the node pointers. + */ +#define lttng_ust_lfht_for_each(ht, iter, node) \ + for (lttng_ust_lfht_first(ht, iter), \ + node = lttng_ust_lfht_iter_get_node(iter); \ + node != NULL; \ + lttng_ust_lfht_next(ht, iter), \ + node = lttng_ust_lfht_iter_get_node(iter)) + +#define lttng_ust_lfht_for_each_duplicate(ht, hash, match, key, iter, node) \ + for (lttng_ust_lfht_lookup(ht, hash, match, key, iter), \ + node = lttng_ust_lfht_iter_get_node(iter); \ + node != NULL; \ + lttng_ust_lfht_next_duplicate(ht, match, key, iter), \ + node = lttng_ust_lfht_iter_get_node(iter)) + +#define lttng_ust_lfht_for_each_entry(ht, iter, pos, member) \ + for (lttng_ust_lfht_first(ht, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member); \ + lttng_ust_lfht_iter_get_node(iter) != NULL; \ + lttng_ust_lfht_next(ht, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member)) + +#define lttng_ust_lfht_for_each_entry_duplicate(ht, hash, match, key, \ + iter, pos, member) \ + for (lttng_ust_lfht_lookup(ht, hash, match, key, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member); \ + lttng_ust_lfht_iter_get_node(iter) != NULL; \ + lttng_ust_lfht_next_duplicate(ht, match, key, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_RCULFHASH_H */ diff --git a/liblttng-ust/tracepoint-internal.h b/liblttng-ust/tracepoint-internal.h index 1e6f92b6..964b1f0e 100644 --- a/liblttng-ust/tracepoint-internal.h +++ b/liblttng-ust/tracepoint-internal.h @@ -20,8 +20,8 @@ */ #include -#include #include +#include #define TRACE_DEFAULT TRACE_DEBUG_LINE @@ -44,13 +44,15 @@ extern int __tracepoint_probe_unregister_queue_release(const char *name, void (*func)(void), void *data); extern void __tracepoint_probe_prune_release_queue(void); +void lttng_ust_synchronize_trace(void); + /* * call after disconnection of last probe implemented within a * shared object before unmapping the library that contains the probe. */ static inline void tracepoint_synchronize_unregister(void) { - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); } extern void init_tracepoint(void); diff --git a/liblttng-ust/tracepoint.c b/liblttng-ust/tracepoint.c index b109fe30..cf7cf63c 100644 --- a/liblttng-ust/tracepoint.c +++ b/liblttng-ust/tracepoint.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include @@ -151,6 +151,72 @@ struct callsite_entry { bool tp_entry_callsite_ref; /* Has a tp_entry took a ref on this callsite */ }; +static int tracepoint_v1_api_used; +static void (*lttng_ust_liburcu_bp_synchronize_rcu)(void); +static void (*lttng_ust_liburcu_bp_rcu_read_lock)(void); +static void (*lttng_ust_liburcu_bp_rcu_read_unlock)(void); +void (*lttng_ust_liburcu_bp_before_fork)(void); +void (*lttng_ust_liburcu_bp_after_fork_parent)(void); +void (*lttng_ust_liburcu_bp_after_fork_child)(void); + +static bool lttng_ust_tracepoint_v1_used(void) +{ + return uatomic_read(&tracepoint_v1_api_used); +} + +static void lttng_ust_tracepoint_set_v1_used(void) +{ + if (!lttng_ust_tracepoint_v1_used()) { + /* + * Perform dlsym here rather than lazily on first use to + * eliminate nesting of dynamic loader lock (used within + * dlsym) inside the ust lock. + */ + if (!lttng_ust_liburcu_bp_synchronize_rcu) { + lttng_ust_liburcu_bp_synchronize_rcu = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "synchronize_rcu_bp")); + if (!lttng_ust_liburcu_bp_synchronize_rcu) + abort(); + } + if (!lttng_ust_liburcu_bp_before_fork) { + lttng_ust_liburcu_bp_before_fork = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_bp_before_fork")); + if (!lttng_ust_liburcu_bp_before_fork) + abort(); + } + if (!lttng_ust_liburcu_bp_after_fork_parent) { + lttng_ust_liburcu_bp_after_fork_parent = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_bp_after_fork_parent")); + if (!lttng_ust_liburcu_bp_after_fork_parent) + abort(); + } + if (!lttng_ust_liburcu_bp_after_fork_child) { + lttng_ust_liburcu_bp_after_fork_child = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_bp_after_fork_child")); + if (!lttng_ust_liburcu_bp_after_fork_child) + abort(); + } + if (!lttng_ust_liburcu_bp_rcu_read_lock) { + lttng_ust_liburcu_bp_rcu_read_lock = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_read_lock_bp")); + if (!lttng_ust_liburcu_bp_rcu_read_lock) + abort(); + } + if (!lttng_ust_liburcu_bp_rcu_read_unlock) { + lttng_ust_liburcu_bp_rcu_read_unlock = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_read_unlock_bp")); + if (!lttng_ust_liburcu_bp_rcu_read_unlock) + abort(); + } + + /* Fixup URCU bp TLS. */ + lttng_ust_liburcu_bp_rcu_read_lock(); + lttng_ust_liburcu_bp_rcu_read_unlock(); + + uatomic_set(&tracepoint_v1_api_used, 1); + } +} + /* coverity[+alloc] */ static void *allocate_probes(int count) { @@ -166,7 +232,7 @@ static void release_probes(void *old) if (old) { struct tp_probes *tp_probes = caa_container_of(old, struct tp_probes, probes[0]); - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); free(tp_probes); } } @@ -386,7 +452,7 @@ static void set_tracepoint(struct tracepoint_entry **entry, * include/linux/tracepoints.h. A matching cmm_smp_read_barrier_depends() * is used. */ - rcu_assign_pointer(elem->probes, (*entry)->probes); + lttng_ust_rcu_assign_pointer(elem->probes, (*entry)->probes); CMM_STORE_SHARED(elem->state, active); } @@ -399,7 +465,7 @@ static void set_tracepoint(struct tracepoint_entry **entry, static void disable_tracepoint(struct lttng_ust_tracepoint *elem) { CMM_STORE_SHARED(elem->state, 0); - rcu_assign_pointer(elem->probes, NULL); + lttng_ust_rcu_assign_pointer(elem->probes, NULL); } /* @@ -750,7 +816,7 @@ void __tracepoint_probe_prune_release_queue(void) release_queue_need_update = 0; /* Wait for grace period between all sync_callsites and free. */ - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); cds_list_for_each_entry_safe(pos, next, &release_probes, u.list) { cds_list_del(&pos->u.list); @@ -841,7 +907,7 @@ void tracepoint_probe_update_all(void) tracepoint_update_probes(); /* Wait for grace period between update_probes and free. */ - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); cds_list_for_each_entry_safe(pos, next, &release_probes, u.list) { cds_list_del(&pos->u.list); free(pos); @@ -868,8 +934,20 @@ static void new_tracepoints(struct lttng_ust_tracepoint * const *start, } } -int tracepoint_register_lib(struct lttng_ust_tracepoint * const *tracepoints_start, - int tracepoints_count) +/* + * tracepoint_{un,}register_lib is meant to be looked up by instrumented + * applications through dlsym(). If found, those can register their + * tracepoints, else those tracepoints will not be available for + * tracing. The number at the end of those symbols acts as a major + * version for tracepoints. + * + * Older instrumented applications should still work with newer + * liblttng-ust, but it is fine that instrumented applications compiled + * against recent liblttng-ust headers require a recent liblttng-ust + * runtime for those tracepoints to be taken into account. + */ +int tracepoint_register_lib2(struct lttng_ust_tracepoint * const *tracepoints_start, + int tracepoints_count) { struct tracepoint_lib *pl, *iter; @@ -917,7 +995,15 @@ lib_added: return 0; } -int tracepoint_unregister_lib(struct lttng_ust_tracepoint * const *tracepoints_start) +/* Exposed for backward compatibility with old instrumented applications. */ +int tracepoint_register_lib(struct lttng_ust_tracepoint * const *tracepoints_start, + int tracepoints_count) +{ + lttng_ust_tracepoint_set_v1_used(); + return tracepoint_register_lib2(tracepoints_start, tracepoints_count); +} + +int tracepoint_unregister_lib2(struct lttng_ust_tracepoint * const *tracepoints_start) { struct tracepoint_lib *lib; @@ -943,6 +1029,13 @@ int tracepoint_unregister_lib(struct lttng_ust_tracepoint * const *tracepoints_s return 0; } +/* Exposed for backward compatibility with old instrumented applications. */ +int tracepoint_unregister_lib(struct lttng_ust_tracepoint * const *tracepoints_start) +{ + lttng_ust_tracepoint_set_v1_used(); + return tracepoint_unregister_lib2(tracepoints_start); +} + /* * Report in debug message whether the compiler correctly supports weak * hidden symbols. This test checks that the address associated with two @@ -981,23 +1074,23 @@ void exit_tracepoint(void) /* * Create the wrapper symbols. */ -#undef tp_rcu_read_lock_bp -#undef tp_rcu_read_unlock_bp -#undef tp_rcu_dereference_bp +#undef tp_rcu_read_lock +#undef tp_rcu_read_unlock +#undef tp_rcu_dereference -void tp_rcu_read_lock_bp(void) +void tp_rcu_read_lock(void) { - urcu_bp_read_lock(); + lttng_ust_urcu_read_lock(); } -void tp_rcu_read_unlock_bp(void) +void tp_rcu_read_unlock(void) { - urcu_bp_read_unlock(); + lttng_ust_urcu_read_unlock(); } -void *tp_rcu_dereference_sym_bp(void *p) +void *tp_rcu_dereference_sym(void *p) { - return urcu_bp_dereference(p); + return lttng_ust_rcu_dereference(p); } /* @@ -1024,3 +1117,32 @@ int tp_get_destructors_state(void) { return uatomic_read(&tracepoint_destructors_state); } + +void lttng_ust_synchronize_trace(void) +{ + lttng_ust_urcu_synchronize_rcu(); + /* + * For legacy tracepoint instrumentation, also wait for urcu-bp + * grace period. + */ + if (lttng_ust_liburcu_bp_synchronize_rcu) + lttng_ust_liburcu_bp_synchronize_rcu(); +} + +/* + * Create the wrapper symbols for legacy v1 API. + */ +void tp_rcu_read_lock_bp(void) +{ + lttng_ust_urcu_read_lock(); +} + +void tp_rcu_read_unlock_bp(void) +{ + lttng_ust_urcu_read_unlock(); +} + +void *tp_rcu_dereference_sym_bp(void *p) +{ + return lttng_ust_rcu_dereference(p); +} diff --git a/libringbuffer/frontend_api.h b/libringbuffer/frontend_api.h index 8a320061..c112c846 100644 --- a/libringbuffer/frontend_api.h +++ b/libringbuffer/frontend_api.h @@ -32,7 +32,6 @@ #include -#include #include #include "frontend.h" diff --git a/lttng-ust.pc.in b/lttng-ust.pc.in index f9adde87..45d1a8d4 100644 --- a/lttng-ust.pc.in +++ b/lttng-ust.pc.in @@ -7,7 +7,6 @@ Name: LTTng Userspace Tracer Description: The LTTng Userspace Tracer (UST) is a library accompanied by a set of tools to trace userspace code. Version: @PACKAGE_VERSION@ Requires: -Requires.private: liburcu-bp Libs: -L${libdir} -llttng-ust -ldl Cflags: -I${includedir} -- 2.34.1