From: Mathieu Desnoyers Date: Fri, 14 Jun 2013 11:44:52 +0000 (-0400) Subject: Fix: hash table cleanup call_rcu deadlock X-Git-Tag: v2.2.0-rc3~28 X-Git-Url: http://git.lttng.org/?a=commitdiff_plain;h=0b2dc8df2a6d7b3341a72a04767dd6328907c97c;hp=4950b860f76b938833046793e7221c11e8e89682;p=lttng-tools.git Fix: hash table cleanup call_rcu deadlock Implement hash table cleanup thread so HT cleanup never deadlocks on resize in progress. Signed-off-by: Mathieu Desnoyers Signed-off-by: David Goulet --- diff --git a/include/lttng/lttng.h b/include/lttng/lttng.h index 08a3be467..bcccdb477 100644 --- a/include/lttng/lttng.h +++ b/include/lttng/lttng.h @@ -139,6 +139,7 @@ enum lttng_health_component { LTTNG_HEALTH_APP_REG, LTTNG_HEALTH_KERNEL, LTTNG_HEALTH_CONSUMER, + LTTNG_HEALTH_HT_CLEANUP, LTTNG_HEALTH_ALL, }; diff --git a/src/bin/lttng-sessiond/Makefile.am b/src/bin/lttng-sessiond/Makefile.am index 244bc70eb..ae8b6eb1e 100644 --- a/src/bin/lttng-sessiond/Makefile.am +++ b/src/bin/lttng-sessiond/Makefile.am @@ -25,7 +25,7 @@ lttng_sessiond_SOURCES = utils.c utils.h \ health.c health.h \ cmd.c cmd.h \ buffer-registry.c buffer-registry.h \ - testpoint.h + testpoint.h ht-cleanup.c if HAVE_LIBLTTNG_UST_CTL lttng_sessiond_SOURCES += trace-ust.c ust-registry.c ust-app.c \ diff --git a/src/bin/lttng-sessiond/buffer-registry.c b/src/bin/lttng-sessiond/buffer-registry.c index 6a35284fe..ad103d1de 100644 --- a/src/bin/lttng-sessiond/buffer-registry.c +++ b/src/bin/lttng-sessiond/buffer-registry.c @@ -25,6 +25,7 @@ #include "fd-limit.h" #include "ust-consumer.h" #include "ust-ctl.h" +#include "utils.h" /* * Set in main.c during initialization process of the daemon. This contains @@ -548,7 +549,7 @@ static void buffer_reg_session_destroy(struct buffer_reg_session *regp, } rcu_read_unlock(); - lttng_ht_destroy(regp->channels); + ht_cleanup_push(regp->channels); switch (domain) { case LTTNG_DOMAIN_UST: @@ -692,6 +693,6 @@ void buffer_reg_pid_destroy(struct buffer_reg_pid *regp) void buffer_reg_destroy_registries(void) { DBG3("Buffer registry destroy all registry"); - lttng_ht_destroy(buffer_registry_uid); - lttng_ht_destroy(buffer_registry_pid); + ht_cleanup_push(buffer_registry_uid); + ht_cleanup_push(buffer_registry_pid); } diff --git a/src/bin/lttng-sessiond/consumer.c b/src/bin/lttng-sessiond/consumer.c index 245fda562..2e2087885 100644 --- a/src/bin/lttng-sessiond/consumer.c +++ b/src/bin/lttng-sessiond/consumer.c @@ -32,6 +32,7 @@ #include "consumer.h" #include "health.h" #include "ust-app.h" +#include "utils.h" /* * Receive a reply command status message from the consumer. Consumer socket @@ -428,7 +429,7 @@ void consumer_destroy_output(struct consumer_output *obj) rcu_read_unlock(); /* Finally destroy HT */ - lttng_ht_destroy(obj->socks); + ht_cleanup_push(obj->socks); } free(obj); diff --git a/src/bin/lttng-sessiond/health.h b/src/bin/lttng-sessiond/health.h index 28d7dc649..0b5fb4644 100644 --- a/src/bin/lttng-sessiond/health.h +++ b/src/bin/lttng-sessiond/health.h @@ -44,6 +44,7 @@ enum health_type { HEALTH_TYPE_APP_REG = 2, HEALTH_TYPE_KERNEL = 3, HEALTH_TYPE_CONSUMER = 4, + HEALTH_TYPE_HT_CLEANUP = 5, HEALTH_NUM_TYPE, }; diff --git a/src/bin/lttng-sessiond/ht-cleanup.c b/src/bin/lttng-sessiond/ht-cleanup.c new file mode 100644 index 000000000..7a33840c9 --- /dev/null +++ b/src/bin/lttng-sessiond/ht-cleanup.c @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2013 - Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE +#include + +#include +#include +#include + +#include "lttng-sessiond.h" +#include "health.h" + +void *thread_ht_cleanup(void *data) +{ + int ret, i, pollfd, err = -1; + uint32_t revents, nb_fd; + struct lttng_poll_event events; + + DBG("[ht-thread] startup."); + + rcu_register_thread(); + rcu_thread_online(); + + health_register(HEALTH_TYPE_HT_CLEANUP); + + health_code_update(); + + ret = sessiond_set_thread_pollset(&events, 2); + if (ret < 0) { + goto error_poll_create; + } + + /* Add pipe to the pollset. */ + ret = lttng_poll_add(&events, ht_cleanup_pipe[0], LPOLLIN | LPOLLERR); + if (ret < 0) { + goto error; + } + + health_code_update(); + + while (1) { + DBG3("[ht-thread] Polling on %d fds.", + LTTNG_POLL_GETNB(&events)); + + /* Inifinite blocking call, waiting for transmission */ +restart: + health_poll_entry(); + ret = lttng_poll_wait(&events, -1); + health_poll_exit(); + if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } + goto error; + } + + nb_fd = ret; + + for (i = 0; i < nb_fd; i++) { + struct lttng_ht *ht; + + health_code_update(); + + /* Fetch once the poll data */ + revents = LTTNG_POLL_GETEV(&events, i); + pollfd = LTTNG_POLL_GETFD(&events, i); + + /* Thread quit pipe has been closed. Killing thread. */ + ret = sessiond_check_thread_quit_pipe(pollfd, revents); + if (ret) { + err = 0; + goto exit; + } + assert(pollfd == ht_cleanup_pipe[0]); + + if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { + ERR("ht cleanup pipe error"); + goto error; + } else if (!(revents & LPOLLIN)) { + /* No POLLIN and not a catched error, stop the thread. */ + ERR("ht cleanup failed. revent: %u", revents); + goto error; + } + + do { + /* Get socket from dispatch thread. */ + ret = read(ht_cleanup_pipe[0], &ht, sizeof(ht)); + } while (ret < 0 && errno == EINTR); + if (ret < 0 || ret < sizeof(ht)) { + PERROR("ht cleanup notify pipe"); + goto error; + } + health_code_update(); + /* + * The whole point of this thread is to call + * lttng_ht_destroy from a context that is NOT: + * 1) a read-side RCU lock, + * 2) a call_rcu thread. + */ + lttng_ht_destroy(ht); + + health_code_update(); + } + } + +exit: +error: + lttng_poll_clean(&events); +error_poll_create: + utils_close_pipe(ht_cleanup_pipe); + ht_cleanup_pipe[0] = ht_cleanup_pipe[1] = -1; + DBG("[ust-thread] cleanup complete."); + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } + health_unregister(); + rcu_thread_offline(); + rcu_unregister_thread(); + return NULL; +} diff --git a/src/bin/lttng-sessiond/lttng-sessiond.h b/src/bin/lttng-sessiond/lttng-sessiond.h index a0916c566..6090e086c 100644 --- a/src/bin/lttng-sessiond/lttng-sessiond.h +++ b/src/bin/lttng-sessiond/lttng-sessiond.h @@ -71,6 +71,15 @@ struct ust_cmd_queue { */ extern int apps_cmd_notify_pipe[2]; +/* + * Used to notify that a hash table needs to be destroyed by dedicated + * thread. Required by design because we don't want to move destroy + * paths outside of large RCU read-side lock paths, and destroy cannot + * be called by call_rcu thread, because it may hang (waiting for + * call_rcu completion). + */ +extern int ht_cleanup_pipe[2]; + /* * Populated when the daemon starts with the current page size of the system. */ @@ -79,4 +88,6 @@ extern long page_size; int sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size); int sessiond_check_thread_quit_pipe(int fd, uint32_t events); +void *thread_ht_cleanup(void *data); + #endif /* _LTT_SESSIOND_H */ diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 4470afc5e..4a2bc4fdd 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -161,6 +161,7 @@ static pthread_t client_thread; static pthread_t kernel_thread; static pthread_t dispatch_thread; static pthread_t health_thread; +static pthread_t ht_cleanup_thread; /* * UST registration command queue. This queue is tied with a futex and uses a N @@ -3225,13 +3226,17 @@ restart: case LTTNG_HEALTH_CONSUMER: reply.ret_code = check_consumer_health(); break; + case LTTNG_HEALTH_HT_CLEANUP: + reply.ret_code = health_check_state(HEALTH_TYPE_HT_CLEANUP); + break; case LTTNG_HEALTH_ALL: reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE) && health_check_state(HEALTH_TYPE_APP_REG) && health_check_state(HEALTH_TYPE_CMD) && health_check_state(HEALTH_TYPE_KERNEL) && - check_consumer_health(); + check_consumer_health() && + health_check_state(HEALTH_TYPE_HT_CLEANUP); break; default: reply.ret_code = LTTNG_ERR_UND; @@ -4293,6 +4298,11 @@ int main(int argc, char **argv) } } + /* Setup the thread ht_cleanup communication pipe. */ + if (utils_create_pipe_cloexec(ht_cleanup_pipe) < 0) { + goto exit; + } + /* Setup the thread apps communication pipe. */ if ((ret = utils_create_pipe_cloexec(apps_cmd_pipe)) < 0) { goto exit; @@ -4331,6 +4341,14 @@ int main(int argc, char **argv) write_pidfile(); + /* Create thread to manage the client socket */ + ret = pthread_create(&ht_cleanup_thread, NULL, + thread_ht_cleanup, (void *) NULL); + if (ret != 0) { + PERROR("pthread_create ht_cleanup"); + goto exit_ht_cleanup; + } + /* Create thread to manage the client socket */ ret = pthread_create(&health_thread, NULL, thread_manage_health, (void *) NULL); @@ -4450,6 +4468,12 @@ exit_client: } exit_health: + ret = pthread_join(ht_cleanup_thread, &status); + if (ret != 0) { + PERROR("pthread_join ht cleanup thread"); + goto error; /* join error, exit without cleanup */ + } +exit_ht_cleanup: exit: /* * cleanup() is called when no other thread is running. diff --git a/src/bin/lttng-sessiond/trace-ust.c b/src/bin/lttng-sessiond/trace-ust.c index 8fb685c8d..51632bad4 100644 --- a/src/bin/lttng-sessiond/trace-ust.c +++ b/src/bin/lttng-sessiond/trace-ust.c @@ -26,6 +26,7 @@ #include "buffer-registry.h" #include "trace-ust.h" +#include "utils.h" /* * Match function for the events hash table lookup. @@ -228,7 +229,7 @@ struct ltt_ust_session *trace_ust_create_session(unsigned int session_id) return lus; error_consumer: - lttng_ht_destroy(lus->domain_global.channels); + ht_cleanup_push(lus->domain_global.channels); free(lus); error: return NULL; @@ -483,7 +484,7 @@ static void destroy_contexts(struct lttng_ht *ht) } rcu_read_unlock(); - lttng_ht_destroy(ht); + ht_cleanup_push(ht); } /* @@ -530,7 +531,7 @@ static void destroy_events(struct lttng_ht *events) } rcu_read_unlock(); - lttng_ht_destroy(events); + ht_cleanup_push(events); } /* @@ -604,7 +605,7 @@ static void destroy_channels(struct lttng_ht *channels) } rcu_read_unlock(); - lttng_ht_destroy(channels); + ht_cleanup_push(channels); } /* diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index ee3b3406d..37f644215 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -38,6 +38,7 @@ #include "ust-app.h" #include "ust-consumer.h" #include "ust-ctl.h" +#include "utils.h" /* Next available channel key. */ static unsigned long next_channel_key; @@ -307,9 +308,9 @@ void delete_ust_app_stream(int sock, struct ust_app_stream *stream) /* * We need to execute ht_destroy outside of RCU read-side critical - * section, so we postpone its execution using call_rcu. It is simpler - * than to change the semantic of the many callers of - * delete_ust_app_channel(). + * section and outside of call_rcu thread, so we postpone its execution + * using ht_cleanup_push. It is simpler than to change the semantic of + * the many callers of delete_ust_app_session(). */ static void delete_ust_app_channel_rcu(struct rcu_head *head) @@ -317,8 +318,8 @@ void delete_ust_app_channel_rcu(struct rcu_head *head) struct ust_app_channel *ua_chan = caa_container_of(head, struct ust_app_channel, rcu_head); - lttng_ht_destroy(ua_chan->ctx); - lttng_ht_destroy(ua_chan->events); + ht_cleanup_push(ua_chan->ctx); + ht_cleanup_push(ua_chan->events); free(ua_chan); } @@ -583,9 +584,9 @@ end: /* * We need to execute ht_destroy outside of RCU read-side critical - * section, so we postpone its execution using call_rcu. It is simpler - * than to change the semantic of the many callers of - * delete_ust_app_session(). + * section and outside of call_rcu thread, so we postpone its execution + * using ht_cleanup_push. It is simpler than to change the semantic of + * the many callers of delete_ust_app_session(). */ static void delete_ust_app_session_rcu(struct rcu_head *head) @@ -593,7 +594,7 @@ void delete_ust_app_session_rcu(struct rcu_head *head) struct ust_app_session *ua_sess = caa_container_of(head, struct ust_app_session, rcu_head); - lttng_ht_destroy(ua_sess->channels); + ht_cleanup_push(ua_sess->channels); free(ua_sess); } @@ -685,8 +686,8 @@ void delete_ust_app(struct ust_app *app) rcu_read_unlock(); } - lttng_ht_destroy(app->sessions); - lttng_ht_destroy(app->ust_objd); + ht_cleanup_push(app->sessions); + ht_cleanup_push(app->ust_objd); /* * Wait until we have deleted the application from the sock hash table @@ -3155,9 +3156,9 @@ void ust_app_clean_list(void) rcu_read_unlock(); /* Destroy is done only when the ht is empty */ - lttng_ht_destroy(ust_app_ht); - lttng_ht_destroy(ust_app_ht_by_sock); - lttng_ht_destroy(ust_app_ht_by_notify_sock); + ht_cleanup_push(ust_app_ht); + ht_cleanup_push(ust_app_ht_by_sock); + ht_cleanup_push(ust_app_ht_by_notify_sock); } /* diff --git a/src/bin/lttng-sessiond/ust-registry.c b/src/bin/lttng-sessiond/ust-registry.c index 399d48f68..6e9f0699b 100644 --- a/src/bin/lttng-sessiond/ust-registry.c +++ b/src/bin/lttng-sessiond/ust-registry.c @@ -23,6 +23,7 @@ #include #include "ust-registry.h" +#include "utils.h" /* * Hash table match function for event in the registry. @@ -310,9 +311,9 @@ void ust_registry_destroy_event(struct ust_registry_channel *chan, /* * We need to execute ht_destroy outside of RCU read-side critical - * section, so we postpone its execution using call_rcu. It is simpler - * than to change the semantic of the many callers of - * destroy_channel(). + * section and outside of call_rcu thread, so we postpone its execution + * using ht_cleanup_push. It is simpler than to change the semantic of + * the many callers of delete_ust_app_session(). */ static void destroy_channel_rcu(struct rcu_head *head) @@ -321,7 +322,7 @@ void destroy_channel_rcu(struct rcu_head *head) caa_container_of(head, struct ust_registry_channel, rcu_head); if (chan->ht) { - lttng_ht_destroy(chan->ht); + ht_cleanup_push(chan->ht); } free(chan); } @@ -552,7 +553,7 @@ void ust_registry_session_destroy(struct ust_registry_session *reg) rcu_read_unlock(); if (reg->channels) { - lttng_ht_destroy(reg->channels); + ht_cleanup_push(reg->channels); } free(reg->metadata); } diff --git a/src/bin/lttng-sessiond/utils.c b/src/bin/lttng-sessiond/utils.c index 5ea337450..358e407e9 100644 --- a/src/bin/lttng-sessiond/utils.c +++ b/src/bin/lttng-sessiond/utils.c @@ -23,6 +23,9 @@ #include #include "utils.h" +#include "lttng-sessiond.h" + +int ht_cleanup_pipe[2] = { -1, -1 }; /* * Write to writable pipe used to notify a thread. @@ -55,3 +58,27 @@ const char *get_home_dir(void) { return ((const char *) getenv("HOME")); } + +void ht_cleanup_push(struct lttng_ht *ht) +{ + int ret; + int fd = ht_cleanup_pipe[1]; + + if (fd < 0) + return; + do { + ret = write(fd, &ht, sizeof(ht)); + } while (ret < 0 && errno == EINTR); + if (ret < 0 || ret != sizeof(ht)) { + PERROR("write ht cleanup pipe %d", fd); + if (ret < 0) { + ret = -errno; + } + goto error; + } + + /* All good. Don't send back the write positive ret value. */ + ret = 0; +error: + assert(!ret); +} diff --git a/src/bin/lttng-sessiond/utils.h b/src/bin/lttng-sessiond/utils.h index d4bd8c286..7c06d5b03 100644 --- a/src/bin/lttng-sessiond/utils.h +++ b/src/bin/lttng-sessiond/utils.h @@ -18,7 +18,10 @@ #ifndef _LTT_UTILS_H #define _LTT_UTILS_H +struct lttng_ht; + const char *get_home_dir(void); int notify_thread_pipe(int wpipe); +void ht_cleanup_push(struct lttng_ht *ht); #endif /* _LTT_UTILS_H */ diff --git a/tests/unit/Makefile.am b/tests/unit/Makefile.am index e8a6429d8..247178177 100644 --- a/tests/unit/Makefile.am +++ b/tests/unit/Makefile.am @@ -28,6 +28,7 @@ test_uri_LDADD = $(LIBTAP) $(LIBCOMMON) $(LIBHASHTABLE) # Session unit test SESSIONS=$(top_srcdir)/src/bin/lttng-sessiond/session.o \ $(top_srcdir)/src/bin/lttng-sessiond/consumer.o \ + $(top_srcdir)/src/bin/lttng-sessiond/utils.o \ $(top_srcdir)/src/bin/lttng-sessiond/health.o \ $(top_srcdir)/src/common/uri.o \ $(top_srcdir)/src/common/utils.o \ @@ -42,6 +43,7 @@ test_session_LDADD += $(SESSIONS) if HAVE_LIBLTTNG_UST_CTL UST_DATA_TRACE=$(top_srcdir)/src/bin/lttng-sessiond/trace-ust.o \ $(top_srcdir)/src/bin/lttng-sessiond/consumer.o \ + $(top_srcdir)/src/bin/lttng-sessiond/utils.o \ $(top_srcdir)/src/bin/lttng-sessiond/buffer-registry.o \ $(top_srcdir)/src/bin/lttng-sessiond/ust-registry.o \ $(top_srcdir)/src/bin/lttng-sessiond/ust-metadata.o \ @@ -63,6 +65,7 @@ endif KERN_DATA_TRACE=$(top_srcdir)/src/bin/lttng-sessiond/trace-kernel.o \ $(top_srcdir)/src/bin/lttng-sessiond/consumer.o \ $(top_srcdir)/src/bin/lttng-sessiond/health.o \ + $(top_srcdir)/src/bin/lttng-sessiond/utils.o \ $(top_srcdir)/src/common/uri.o \ $(top_srcdir)/src/common/utils.o