Fix: sessiond: ust: deadlock with per-pid buffers
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Fri, 1 Nov 2019 20:23:05 +0000 (16:23 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Wed, 6 Nov 2019 20:13:09 +0000 (15:13 -0500)
Do not hold the registry lock while communicating with the consumerd,
because doing so causes inter-process deadlocks between consumerd and
sessiond with the metadata request notification.

The deadlock involves both sessiond and consumerd:

* lttng-sessiond:

thread 11 - thread_application_management

close_metadata()
  pthread_mutex_lock(&registry->lock);
  consumer_close_metadata()
    pthread_mutex_lock(socket->lock);

thread 15 - thread_consumer_management

ust_consumer_metadata_request()
  pthread_mutex_lock(&ust_reg->lock);

thread 8 - thread_manage_clients

consumer_is_data_pending
  pthread_mutex_lock(socket->lock);
  consumer_socket_recv()

* lttng-consumerd:

thread 4 - consumer_timer_thread

sample_channel_positions()
  pthread_mutex_lock(&stream->lock);

thread 8 - consumer_thread_sessiond_poll
  consumer_data_pending
  pthread_mutex_lock(&consumer_data.lock);
  pthread_mutex_lock(&stream->lock);

thread 7 - consumer_thread_data_poll

lttng_consumer_read_subbuffer
  pthread_mutex_lock(&stream->chan->lock);
  pthread_mutex_lock(&stream->lock);
  do_sync_metadata
    pthread_mutex_lock(&metadata->lock);
    lttng_ustconsumer_sync_metadata
      pthread_mutex_unlock(&metadata_stream->lock);
      lttng_ustconsumer_request_metadata()
        pthread_mutex_lock(&ctx->metadata_socket_lock);
        lttcomm_recv_unix_sock()

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/bin/lttng-sessiond/ust-app.c

index 1731c368328b31fcf8712a6e02309418dc686bda..856de63a3f24152d03944c003bfe28c890f4847a 100644 (file)
@@ -741,6 +741,10 @@ error:
  * nullified. The session lock MUST be held unless the application is
  * in the destroy path.
  *
  * nullified. The session lock MUST be held unless the application is
  * in the destroy path.
  *
+ * Do not hold the registry lock while communicating with the consumerd, because
+ * doing so causes inter-process deadlocks between consumerd and sessiond with
+ * the metadata request notification.
+ *
  * Return 0 on success else a negative value.
  */
 static int close_metadata(struct ust_registry_session *registry,
  * Return 0 on success else a negative value.
  */
 static int close_metadata(struct ust_registry_session *registry,
@@ -748,6 +752,8 @@ static int close_metadata(struct ust_registry_session *registry,
 {
        int ret;
        struct consumer_socket *socket;
 {
        int ret;
        struct consumer_socket *socket;
+       uint64_t metadata_key;
+       bool registry_was_already_closed;
 
        assert(registry);
        assert(consumer);
 
        assert(registry);
        assert(consumer);
@@ -755,8 +761,19 @@ static int close_metadata(struct ust_registry_session *registry,
        rcu_read_lock();
 
        pthread_mutex_lock(&registry->lock);
        rcu_read_lock();
 
        pthread_mutex_lock(&registry->lock);
+       metadata_key = registry->metadata_key;
+       registry_was_already_closed = registry->metadata_closed;
+       if (metadata_key != 0) {
+               /*
+                * Metadata closed. Even on error this means that the consumer
+                * is not responding or not found so either way a second close
+                * should NOT be emit for this registry.
+                */
+               registry->metadata_closed = 1;
+       }
+       pthread_mutex_unlock(&registry->lock);
 
 
-       if (!registry->metadata_key || registry->metadata_closed) {
+       if (metadata_key == 0 || registry_was_already_closed) {
                ret = 0;
                goto end;
        }
                ret = 0;
                goto end;
        }
@@ -766,23 +783,15 @@ static int close_metadata(struct ust_registry_session *registry,
                        consumer);
        if (!socket) {
                ret = -1;
                        consumer);
        if (!socket) {
                ret = -1;
-               goto error;
+               goto end;
        }
 
        }
 
-       ret = consumer_close_metadata(socket, registry->metadata_key);
+       ret = consumer_close_metadata(socket, metadata_key);
        if (ret < 0) {
        if (ret < 0) {
-               goto error;
+               goto end;
        }
 
        }
 
-error:
-       /*
-        * Metadata closed. Even on error this means that the consumer is not
-        * responding or not found so either way a second close should NOT be emit
-        * for this registry.
-        */
-       registry->metadata_closed = 1;
 end:
 end:
-       pthread_mutex_unlock(&registry->lock);
        rcu_read_unlock();
        return ret;
 }
        rcu_read_unlock();
        return ret;
 }
This page took 0.028472 seconds and 4 git commands to generate.