From 9ef70f8738b524b8ea4f266c526de9d5a4fdc29c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 11 Apr 2012 21:07:13 -0400 Subject: [PATCH] Fix: work-around glibc __nptl_setxid vs clone hang hash table resize threads exit end up setting a "locked" state within libc pthread, which deadlocks with seteuid/setegid called from the cloned process in runas.c when runas() is called exactly when a resize thread exits. Temporarily fix this issue by adding a mutex cross this resize operation, which holds mutual exclusion with runas() usage. We should investigate whether we want to properly call exec() from the runas.c clone child before touching any non-async-signal-safe libc call. However, given that this change is more intrusive, let's first use this mutex-based work-around. Before this fix, running 1000 instances of "demo-trace 300" with sessiond running as root, and: lttng create lttng enable-event -u -a lttng start would sometimes lead to consumerd hang with the following clone child backtrace: setxid_mark_thread (cmdp=, t=0x7f52dd47c700) at allocatestack.c:995 995 allocatestack.c: No such file or directory. (gdb) bt full at allocatestack.c:995 ch = at allocatestack.c:1088 t = 0x80 signalled = result = runp = 0x7f52dd47c9c0 at ../sysdeps/unix/sysv/linux/setegid.c:44 __p = 0xfffffffffffffe00 __cmd = {syscall_no = 119, id = {-1, 1000, -1}, cntr = 0} result = data = 0x7f52e66e1930 writelen = writeleft = index = sendret = {i = 0, c = "\000\000\000"} ret = __func__ = "child_run_as" at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 No locals. No symbol table info available. Signed-off-by: Mathieu Desnoyers --- src/common/hashtable/rculfhash.c | 11 +++++++++++ src/common/runas.c | 7 ++++++- src/common/runas.h | 7 +++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/common/hashtable/rculfhash.c b/src/common/hashtable/rculfhash.c index 840de351b..776f9f391 100644 --- a/src/common/hashtable/rculfhash.c +++ b/src/common/hashtable/rculfhash.c @@ -170,6 +170,15 @@ #include "rculfhash-internal.h" #include "urcu-flavor.h" +/* + * We need to lock pthread exit, which deadlocks __nptl_setxid in the + * runas clone. + * This work-around will be allowed to be removed when runas.c gets + * changed to do an exec() before issuing seteuid/setegid. + * See http://sourceware.org/bugzilla/show_bug.cgi?id=10184 for details. + */ +pthread_mutex_t lttng_libc_state_lock = PTHREAD_MUTEX_INITIALIZER; + /* * Split-counters lazily update the global counter each 1024 * addition/removal. It automatically keeps track of resize required. @@ -1028,6 +1037,7 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i, partition_len = len >> cds_lfht_get_count_order_ulong(nr_threads); work = calloc(nr_threads, sizeof(*work)); assert(work); + pthread_mutex_lock(<tng_libc_state_lock); for (thread = 0; thread < nr_threads; thread++) { work[thread].ht = ht; work[thread].i = i; @@ -1042,6 +1052,7 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i, ret = pthread_join(work[thread].thread_id, NULL); assert(!ret); } + pthread_mutex_unlock(<tng_libc_state_lock); free(work); } diff --git a/src/common/runas.c b/src/common/runas.c index 7de566ddb..2c2015aa2 100644 --- a/src/common/runas.c +++ b/src/common/runas.c @@ -317,8 +317,13 @@ static int run_as(int (*cmd)(void *data), void *data, uid_t uid, gid_t gid) { if (!getenv("LTTNG_DEBUG_NOCLONE")) { + int ret; + DBG("Using run_as_clone"); - return run_as_clone(cmd, data, uid, gid); + pthread_mutex_lock(<tng_libc_state_lock); + ret = run_as_clone(cmd, data, uid, gid); + pthread_mutex_unlock(<tng_libc_state_lock); + return ret; } else { DBG("Using run_as_noclone"); return run_as_noclone(cmd, data, uid, gid); diff --git a/src/common/runas.h b/src/common/runas.h index 356bb2286..9840eb056 100644 --- a/src/common/runas.h +++ b/src/common/runas.h @@ -20,9 +20,16 @@ */ #include +#include int run_as_mkdir_recursive(const char *path, mode_t mode, uid_t uid, gid_t gid); int run_as_mkdir(const char *path, mode_t mode, uid_t uid, gid_t gid); int run_as_open(const char *path, int flags, mode_t mode, uid_t uid, gid_t gid); +/* + * We need to lock pthread exit, which deadlocks __nptl_setxid in the + * clone. + */ +extern pthread_mutex_t lttng_libc_state_lock; + #endif /* _RUNAS_H */ -- 2.34.1