Fix: ust-app: per-PID app unregister vs tracing stop races
[lttng-tools.git] / src / bin / lttng-sessiond / ust-app.c
index 7e4bf94051028f7c361749f0a162a4baee8e53b4..dcb6e7c00ff5129cb9ec8930a46fac39d65bcc9b 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #define _GNU_SOURCE
+#define _LGPL_SOURCE
 #include <errno.h>
 #include <inttypes.h>
 #include <pthread.h>
@@ -40,6 +41,9 @@
 #include "ust-ctl.h"
 #include "utils.h"
 
+static
+int ust_app_flush_app_session(struct ust_app *app, struct ust_app_session *ua_sess);
+
 /* Next available channel key. Access under next_channel_key_lock. */
 static uint64_t _next_channel_key;
 static pthread_mutex_t next_channel_key_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -1471,27 +1475,6 @@ int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess,
                        }
                        goto error;
                }
-       } else {
-               ret = disable_ust_event(app, ua_sess, ua_event);
-               if (ret < 0) {
-                       /*
-                        * If we hit an EPERM, something is wrong with our disable call. If
-                        * we get an EEXIST, there is a problem on the tracer side since we
-                        * just created it.
-                        */
-                       switch (ret) {
-                       case -LTTNG_UST_ERR_PERM:
-                               /* Code flow problem */
-                               assert(0);
-                       case -LTTNG_UST_ERR_EXIST:
-                               /* It's OK for our use case. */
-                               ret = 0;
-                               break;
-                       default:
-                               break;
-                       }
-                       goto error;
-               }
        }
 
 error:
@@ -1752,7 +1735,6 @@ static int setup_buffer_reg_pid(struct ust_app_session *ua_sess,
                if (ret < 0) {
                        goto error;
                }
-               buffer_reg_pid_add(reg_pid);
        } else {
                goto end;
        }
@@ -1765,9 +1747,18 @@ static int setup_buffer_reg_pid(struct ust_app_session *ua_sess,
                        app->byte_order, app->version.major,
                        app->version.minor);
        if (ret < 0) {
+               /*
+                * reg_pid->registry->reg.ust is NULL upon error, so we need to
+                * destroy the buffer registry, because it is always expected
+                * that if the buffer registry can be found, its ust registry is
+                * non-NULL.
+                */
+               buffer_reg_pid_destroy(reg_pid);
                goto error;
        }
 
+       buffer_reg_pid_add(reg_pid);
+
        DBG3("UST app buffer registry per PID created successfully");
 
 end:
@@ -1808,7 +1799,6 @@ static int setup_buffer_reg_uid(struct ltt_ust_session *usess,
                if (ret < 0) {
                        goto error;
                }
-               buffer_reg_uid_add(reg_uid);
        } else {
                goto end;
        }
@@ -1821,13 +1811,21 @@ static int setup_buffer_reg_uid(struct ltt_ust_session *usess,
                        app->byte_order, app->version.major,
                        app->version.minor);
        if (ret < 0) {
+               /*
+                * reg_uid->registry->reg.ust is NULL upon error, so we need to
+                * destroy the buffer registry, because it is always expected
+                * that if the buffer registry can be found, its ust registry is
+                * non-NULL.
+                */
+               buffer_reg_uid_destroy(reg_uid, NULL);
                goto error;
        }
        /* Add node to teardown list of the session. */
        cds_list_add(&reg_uid->lnode, &usess->buffer_reg_uid_list);
 
-       DBG3("UST app buffer registry per UID created successfully");
+       buffer_reg_uid_add(reg_uid);
 
+       DBG3("UST app buffer registry per UID created successfully");
 end:
        if (regp) {
                *regp = reg_uid;
@@ -1880,6 +1878,7 @@ static int create_ust_app_session(struct ltt_ust_session *usess,
                /* Init local registry. */
                ret = setup_buffer_reg_pid(ua_sess, app, NULL);
                if (ret < 0) {
+                       delete_ust_app_session(-1, ua_sess, app);
                        goto error;
                }
                break;
@@ -1887,6 +1886,7 @@ static int create_ust_app_session(struct ltt_ust_session *usess,
                /* Look for a global registry. If none exists, create one. */
                ret = setup_buffer_reg_uid(usess, app, NULL);
                if (ret < 0) {
+                       delete_ust_app_session(-1, ua_sess, app);
                        goto error;
                }
                break;
@@ -1989,6 +1989,7 @@ no_match:
 /*
  * Lookup for an ust app context from an lttng_ust_context.
  *
+ * Must be called while holding RCU read side lock.
  * Return an ust_app_ctx object or NULL on error.
  */
 static
@@ -3081,6 +3082,7 @@ void ust_app_unregister(int sock)
 {
        struct ust_app *lta;
        struct lttng_ht_node_ulong *node;
+       struct lttng_ht_iter ust_app_sock_iter;
        struct lttng_ht_iter iter;
        struct ust_app_session *ua_sess;
        int ret;
@@ -3088,39 +3090,19 @@ void ust_app_unregister(int sock)
        rcu_read_lock();
 
        /* Get the node reference for a call_rcu */
-       lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &iter);
-       node = lttng_ht_iter_get_node_ulong(&iter);
+       lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &ust_app_sock_iter);
+       node = lttng_ht_iter_get_node_ulong(&ust_app_sock_iter);
        assert(node);
 
        lta = caa_container_of(node, struct ust_app, sock_n);
        DBG("PID %d unregistering with sock %d", lta->pid, sock);
 
-       /* Remove application from PID hash table */
-       ret = lttng_ht_del(ust_app_ht_by_sock, &iter);
-       assert(!ret);
-
        /*
-        * Remove application from notify hash table. The thread handling the
-        * notify socket could have deleted the node so ignore on error because
-        * either way it's valid. The close of that socket is handled by the other
-        * thread.
-        */
-       iter.iter.node = &lta->notify_sock_n.node;
-       (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
-
-       /*
-        * Ignore return value since the node might have been removed before by an
-        * add replace during app registration because the PID can be reassigned by
-        * the OS.
+        * Perform "push metadata" and flush all application streams
+        * before removing app from hash tables, ensuring proper
+        * behavior of data_pending check.
+        * Remove sessions so they are not visible during deletion.
         */
-       iter.iter.node = &lta->pid_n.node;
-       ret = lttng_ht_del(ust_app_ht, &iter);
-       if (ret) {
-               DBG3("Unregister app by PID %d failed. This can happen on pid reuse",
-                               lta->pid);
-       }
-
-       /* Remove sessions so they are not visible during deletion.*/
        cds_lfht_for_each_entry(lta->sessions->ht, &iter.iter, ua_sess,
                        node.node) {
                struct ust_registry_session *registry;
@@ -3131,6 +3113,8 @@ void ust_app_unregister(int sock)
                        continue;
                }
 
+               (void) ust_app_flush_app_session(lta, ua_sess);
+
                /*
                 * Add session to list for teardown. This is safe since at this point we
                 * are the only one using this list.
@@ -3165,11 +3149,36 @@ void ust_app_unregister(int sock)
                                (void) close_metadata(registry, ua_sess->consumer);
                        }
                }
-
                cds_list_add(&ua_sess->teardown_node, &lta->teardown_head);
+
                pthread_mutex_unlock(&ua_sess->lock);
        }
 
+       /* Remove application from PID hash table */
+       ret = lttng_ht_del(ust_app_ht_by_sock, &ust_app_sock_iter);
+       assert(!ret);
+
+       /*
+        * Remove application from notify hash table. The thread handling the
+        * notify socket could have deleted the node so ignore on error because
+        * either way it's valid. The close of that socket is handled by the other
+        * thread.
+        */
+       iter.iter.node = &lta->notify_sock_n.node;
+       (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
+
+       /*
+        * Ignore return value since the node might have been removed before by an
+        * add replace during app registration because the PID can be reassigned by
+        * the OS.
+        */
+       iter.iter.node = &lta->pid_n.node;
+       ret = lttng_ht_del(ust_app_ht, &iter);
+       if (ret) {
+               DBG3("Unregister app by PID %d failed. This can happen on pid reuse",
+                               lta->pid);
+       }
+
        /* Free memory */
        call_rcu(&lta->pid_n.head, delete_ust_app_rcu);
 
@@ -3410,41 +3419,63 @@ void ust_app_clean_list(void)
 
        rcu_read_lock();
 
-       cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
-               ret = lttng_ht_del(ust_app_ht, &iter);
-               assert(!ret);
-               call_rcu(&app->pid_n.head, delete_ust_app_rcu);
+       if (ust_app_ht) {
+               cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
+                       ret = lttng_ht_del(ust_app_ht, &iter);
+                       assert(!ret);
+                       call_rcu(&app->pid_n.head, delete_ust_app_rcu);
+               }
        }
 
        /* Cleanup socket hash table */
-       cds_lfht_for_each_entry(ust_app_ht_by_sock->ht, &iter.iter, app,
-                       sock_n.node) {
-               ret = lttng_ht_del(ust_app_ht_by_sock, &iter);
-               assert(!ret);
+       if (ust_app_ht_by_sock) {
+               cds_lfht_for_each_entry(ust_app_ht_by_sock->ht, &iter.iter, app,
+                               sock_n.node) {
+                       ret = lttng_ht_del(ust_app_ht_by_sock, &iter);
+                       assert(!ret);
+               }
        }
 
        /* Cleanup notify socket hash table */
-       cds_lfht_for_each_entry(ust_app_ht_by_notify_sock->ht, &iter.iter, app,
-                       notify_sock_n.node) {
-               ret = lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
-               assert(!ret);
+       if (ust_app_ht_by_notify_sock) {
+               cds_lfht_for_each_entry(ust_app_ht_by_notify_sock->ht, &iter.iter, app,
+                               notify_sock_n.node) {
+                       ret = lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
+                       assert(!ret);
+               }
        }
        rcu_read_unlock();
 
        /* Destroy is done only when the ht is empty */
-       ht_cleanup_push(ust_app_ht);
-       ht_cleanup_push(ust_app_ht_by_sock);
-       ht_cleanup_push(ust_app_ht_by_notify_sock);
+       if (ust_app_ht) {
+               ht_cleanup_push(ust_app_ht);
+       }
+       if (ust_app_ht_by_sock) {
+               ht_cleanup_push(ust_app_ht_by_sock);
+       }
+       if (ust_app_ht_by_notify_sock) {
+               ht_cleanup_push(ust_app_ht_by_notify_sock);
+       }
 }
 
 /*
  * Init UST app hash table.
  */
-void ust_app_ht_alloc(void)
+int ust_app_ht_alloc(void)
 {
        ust_app_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
+       if (!ust_app_ht) {
+               return -1;
+       }
        ust_app_ht_by_sock = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
+       if (!ust_app_ht_by_sock) {
+               return -1;
+       }
        ust_app_ht_by_notify_sock = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
+       if (!ust_app_ht_by_notify_sock) {
+               return -1;
+       }
+       return 0;
 }
 
 /*
@@ -4027,28 +4058,21 @@ error_rcu_unlock:
        return -1;
 }
 
-/*
- * Flush buffers for a specific UST session and app.
- */
 static
-int ust_app_flush_trace(struct ltt_ust_session *usess, struct ust_app *app)
+int ust_app_flush_app_session(struct ust_app *app,
+               struct ust_app_session *ua_sess)
 {
-       int ret = 0;
+       int ret, retval = 0;
        struct lttng_ht_iter iter;
-       struct ust_app_session *ua_sess;
        struct ust_app_channel *ua_chan;
+       struct consumer_socket *socket;
 
-       DBG("Flushing buffers for ust app pid %d", app->pid);
+       DBG("Flushing app session buffers for ust app pid %d", app->pid);
 
        rcu_read_lock();
 
        if (!app->compatible) {
-               goto end_no_session;
-       }
-
-       ua_sess = lookup_session_by_app(usess, app);
-       if (ua_sess == NULL) {
-               goto end_no_session;
+               goto end_not_compatible;
        }
 
        pthread_mutex_lock(&ua_sess->lock);
@@ -4056,25 +4080,16 @@ int ust_app_flush_trace(struct ltt_ust_session *usess, struct ust_app *app)
        health_code_update();
 
        /* Flushing buffers */
+       socket = consumer_find_socket_by_bitness(app->bits_per_long,
+                       ua_sess->consumer);
        cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan,
                        node.node) {
                health_code_update();
                assert(ua_chan->is_sent);
-               ret = ustctl_sock_flush_buffer(app->sock, ua_chan->obj);
-               if (ret < 0) {
-                       if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                               ERR("UST app PID %d channel %s flush failed with ret %d",
-                                               app->pid, ua_chan->name, ret);
-                       } else {
-                               DBG3("UST app failed to flush %s. Application is dead.",
-                                               ua_chan->name);
-                               /*
-                                * This is normal behavior, an application can die during the
-                                * creation process. Don't report an error so the execution can
-                                * continue normally.
-                                */
-                       }
-                       /* Continuing flushing all buffers */
+               ret = consumer_flush_channel(socket, ua_chan->key);
+               if (ret) {
+                       ERR("Error flushing consumer channel");
+                       retval = -1;
                        continue;
                }
        }
@@ -4082,10 +4097,37 @@ int ust_app_flush_trace(struct ltt_ust_session *usess, struct ust_app *app)
        health_code_update();
 
        pthread_mutex_unlock(&ua_sess->lock);
+end_not_compatible:
+       rcu_read_unlock();
+       health_code_update();
+       return retval;
+}
+
+/*
+ * Flush buffers for a specific UST session and app.
+ */
+static
+int ust_app_flush_session(struct ust_app *app, struct ltt_ust_session *usess)
+
+{
+       int ret;
+       struct ust_app_session *ua_sess;
+
+       DBG("Flushing session buffers for ust app pid %d", app->pid);
+
+       rcu_read_lock();
+
+       ua_sess = lookup_session_by_app(usess, app);
+       if (ua_sess == NULL) {
+               ret = -1;
+               goto end_no_session;
+       }
+       ret = ust_app_flush_app_session(app, ua_sess);
+
 end_no_session:
        rcu_read_unlock();
        health_code_update();
-       return 0;
+       return ret;
 }
 
 /*
@@ -4219,7 +4261,7 @@ int ust_app_stop_trace_all(struct ltt_ust_session *usess)
        }
        case LTTNG_BUFFER_PER_PID:
                cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
-                       ret = ust_app_flush_trace(usess, app);
+                       ret = ust_app_flush_session(app, usess);
                        if (ret < 0) {
                                /* Continue to next apps even on error */
                                continue;
@@ -5171,10 +5213,12 @@ unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess)
                cds_list_for_each_entry(reg, &usess->buffer_reg_uid_list, lnode) {
                        struct buffer_reg_channel *reg_chan;
 
+                       rcu_read_lock();
                        cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter,
                                        reg_chan, node.node) {
                                ret += reg_chan->stream_count;
                        }
+                       rcu_read_unlock();
                }
                break;
        }
This page took 0.028987 seconds and 4 git commands to generate.