Fix: per-uid flush and ust registry locking
[lttng-tools.git] / src / bin / lttng-sessiond / ust-app.c
index bc1d62dc7862c342bbc2cbd08d1d83cc88d59cbb..0c4045c3b26aebf4d675c4b8916b7c5fc84701ee 100644 (file)
@@ -41,6 +41,9 @@
 #include "ust-ctl.h"
 #include "utils.h"
 
+static
+int ust_app_flush_app_session(struct ust_app *app, struct ust_app_session *ua_sess);
+
 /* Next available channel key. Access under next_channel_key_lock. */
 static uint64_t _next_channel_key;
 static pthread_mutex_t next_channel_key_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -439,21 +442,28 @@ ssize_t ust_app_push_metadata(struct ust_registry_session *registry,
        assert(registry);
        assert(socket);
 
+       pthread_mutex_lock(&registry->lock);
+
+       /*
+        * Means that no metadata was assigned to the session. This can happens if
+        * no start has been done previously.
+        */
+       if (!registry->metadata_key) {
+               pthread_mutex_unlock(&registry->lock);
+               return 0;
+       }
+
        /*
         * On a push metadata error either the consumer is dead or the metadata
         * channel has been destroyed because its endpoint might have died (e.g:
         * relayd). If so, the metadata closed flag is set to 1 so we deny pushing
         * metadata again which is not valid anymore on the consumer side.
-        *
-        * The ust app session mutex locked allows us to make this check without
-        * the registry lock.
         */
        if (registry->metadata_closed) {
+               pthread_mutex_unlock(&registry->lock);
                return -EPIPE;
        }
 
-       pthread_mutex_lock(&registry->lock);
-
        offset = registry->metadata_len_sent;
        len = registry->metadata_len - registry->metadata_len_sent;
        if (len == 0) {
@@ -511,6 +521,14 @@ push_data:
 
 end:
 error:
+       if (ret_val) {
+               /*
+                * On error, flag the registry that the metadata is closed. We were unable
+                * to push anything and this means that either the consumer is not
+                * responding or the metadata cache has been destroyed on the consumer.
+                */
+               registry->metadata_closed = 1;
+       }
        pthread_mutex_unlock(&registry->lock);
 error_push:
        free(metadata_str);
@@ -518,11 +536,12 @@ error_push:
 }
 
 /*
- * For a given application and session, push metadata to consumer. The session
- * lock MUST be acquired here before calling this.
+ * For a given application and session, push metadata to consumer.
  * Either sock or consumer is required : if sock is NULL, the default
  * socket to send the metadata is retrieved from consumer, if sock
  * is not NULL we use it to send the metadata.
+ * RCU read-side lock must be held while calling this function,
+ * therefore ensuring existance of registry.
  *
  * Return 0 on success else a negative error.
  */
@@ -536,23 +555,20 @@ static int push_metadata(struct ust_registry_session *registry,
        assert(registry);
        assert(consumer);
 
-       rcu_read_lock();
+       pthread_mutex_lock(&registry->lock);
 
-       /*
-        * Means that no metadata was assigned to the session. This can happens if
-        * no start has been done previously.
-        */
-       if (!registry->metadata_key) {
-               ret_val = 0;
-               goto end_rcu_unlock;
+       if (registry->metadata_closed) {
+               pthread_mutex_unlock(&registry->lock);
+               return -EPIPE;
        }
 
        /* Get consumer socket to use to push the metadata.*/
        socket = consumer_find_socket_by_bitness(registry->bits_per_long,
                        consumer);
+       pthread_mutex_unlock(&registry->lock);
        if (!socket) {
                ret_val = -1;
-               goto error_rcu_unlock;
+               goto error;
        }
 
        /*
@@ -570,21 +586,13 @@ static int push_metadata(struct ust_registry_session *registry,
        pthread_mutex_unlock(socket->lock);
        if (ret < 0) {
                ret_val = ret;
-               goto error_rcu_unlock;
+               goto error;
        }
 
-       rcu_read_unlock();
        return 0;
 
-error_rcu_unlock:
-       /*
-        * On error, flag the registry that the metadata is closed. We were unable
-        * to push anything and this means that either the consumer is not
-        * responding or the metadata cache has been destroyed on the consumer.
-        */
-       registry->metadata_closed = 1;
-end_rcu_unlock:
-       rcu_read_unlock();
+error:
+end:
        return ret_val;
 }
 
@@ -607,6 +615,8 @@ static int close_metadata(struct ust_registry_session *registry,
 
        rcu_read_lock();
 
+       pthread_mutex_lock(&registry->lock);
+
        if (!registry->metadata_key || registry->metadata_closed) {
                ret = 0;
                goto end;
@@ -633,6 +643,7 @@ error:
         */
        registry->metadata_closed = 1;
 end:
+       pthread_mutex_unlock(&registry->lock);
        rcu_read_unlock();
        return ret;
 }
@@ -671,7 +682,7 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
        pthread_mutex_lock(&ua_sess->lock);
 
        registry = get_session_registry(ua_sess);
-       if (registry && !registry->metadata_closed) {
+       if (registry) {
                /* Push metadata for application before freeing the application. */
                (void) push_metadata(registry, ua_sess->consumer);
 
@@ -681,8 +692,7 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
                 * previous push metadata could have flag the metadata registry to
                 * close so don't send a close command if closed.
                 */
-               if (ua_sess->buffer_type != LTTNG_BUFFER_PER_UID &&
-                               !registry->metadata_closed) {
+               if (ua_sess->buffer_type != LTTNG_BUFFER_PER_UID) {
                        /* And ask to close it for this session registry. */
                        (void) close_metadata(registry, ua_sess->consumer);
                }
@@ -2838,6 +2848,8 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess,
        registry = get_session_registry(ua_sess);
        assert(registry);
 
+       pthread_mutex_lock(&registry->lock);
+
        /* Metadata already exists for this registry or it was closed previously */
        if (registry->metadata_key || registry->metadata_closed) {
                ret = 0;
@@ -2910,6 +2922,7 @@ error_consumer:
        lttng_fd_put(LTTNG_FD_APPS, 1);
        delete_ust_app_channel(-1, metadata, app);
 error:
+       pthread_mutex_unlock(&registry->lock);
        return ret;
 }
 
@@ -3079,6 +3092,7 @@ void ust_app_unregister(int sock)
 {
        struct ust_app *lta;
        struct lttng_ht_node_ulong *node;
+       struct lttng_ht_iter ust_app_sock_iter;
        struct lttng_ht_iter iter;
        struct ust_app_session *ua_sess;
        int ret;
@@ -3086,39 +3100,19 @@ void ust_app_unregister(int sock)
        rcu_read_lock();
 
        /* Get the node reference for a call_rcu */
-       lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &iter);
-       node = lttng_ht_iter_get_node_ulong(&iter);
+       lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &ust_app_sock_iter);
+       node = lttng_ht_iter_get_node_ulong(&ust_app_sock_iter);
        assert(node);
 
        lta = caa_container_of(node, struct ust_app, sock_n);
        DBG("PID %d unregistering with sock %d", lta->pid, sock);
 
-       /* Remove application from PID hash table */
-       ret = lttng_ht_del(ust_app_ht_by_sock, &iter);
-       assert(!ret);
-
        /*
-        * Remove application from notify hash table. The thread handling the
-        * notify socket could have deleted the node so ignore on error because
-        * either way it's valid. The close of that socket is handled by the other
-        * thread.
+        * For per-PID buffers, perform "push metadata" and flush all
+        * application streams before removing app from hash tables,
+        * ensuring proper behavior of data_pending check.
+        * Remove sessions so they are not visible during deletion.
         */
-       iter.iter.node = &lta->notify_sock_n.node;
-       (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
-
-       /*
-        * Ignore return value since the node might have been removed before by an
-        * add replace during app registration because the PID can be reassigned by
-        * the OS.
-        */
-       iter.iter.node = &lta->pid_n.node;
-       ret = lttng_ht_del(ust_app_ht, &iter);
-       if (ret) {
-               DBG3("Unregister app by PID %d failed. This can happen on pid reuse",
-                               lta->pid);
-       }
-
-       /* Remove sessions so they are not visible during deletion.*/
        cds_lfht_for_each_entry(lta->sessions->ht, &iter.iter, ua_sess,
                        node.node) {
                struct ust_registry_session *registry;
@@ -3129,6 +3123,10 @@ void ust_app_unregister(int sock)
                        continue;
                }
 
+               if (ua_sess->buffer_type == LTTNG_BUFFER_PER_PID) {
+                       (void) ust_app_flush_app_session(lta, ua_sess);
+               }
+
                /*
                 * Add session to list for teardown. This is safe since at this point we
                 * are the only one using this list.
@@ -3147,7 +3145,7 @@ void ust_app_unregister(int sock)
                 * session so the delete session will NOT push/close a second time.
                 */
                registry = get_session_registry(ua_sess);
-               if (registry && !registry->metadata_closed) {
+               if (registry) {
                        /* Push metadata for application before freeing the application. */
                        (void) push_metadata(registry, ua_sess->consumer);
 
@@ -3157,17 +3155,41 @@ void ust_app_unregister(int sock)
                         * previous push metadata could have flag the metadata registry to
                         * close so don't send a close command if closed.
                         */
-                       if (ua_sess->buffer_type != LTTNG_BUFFER_PER_UID &&
-                                       !registry->metadata_closed) {
+                       if (ua_sess->buffer_type != LTTNG_BUFFER_PER_UID) {
                                /* And ask to close it for this session registry. */
                                (void) close_metadata(registry, ua_sess->consumer);
                        }
                }
-
                cds_list_add(&ua_sess->teardown_node, &lta->teardown_head);
+
                pthread_mutex_unlock(&ua_sess->lock);
        }
 
+       /* Remove application from PID hash table */
+       ret = lttng_ht_del(ust_app_ht_by_sock, &ust_app_sock_iter);
+       assert(!ret);
+
+       /*
+        * Remove application from notify hash table. The thread handling the
+        * notify socket could have deleted the node so ignore on error because
+        * either way it's valid. The close of that socket is handled by the other
+        * thread.
+        */
+       iter.iter.node = &lta->notify_sock_n.node;
+       (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter);
+
+       /*
+        * Ignore return value since the node might have been removed before by an
+        * add replace during app registration because the PID can be reassigned by
+        * the OS.
+        */
+       iter.iter.node = &lta->pid_n.node;
+       ret = lttng_ht_del(ust_app_ht, &iter);
+       if (ret) {
+               DBG3("Unregister app by PID %d failed. This can happen on pid reuse",
+                               lta->pid);
+       }
+
        /* Free memory */
        call_rcu(&lta->pid_n.head, delete_ust_app_rcu);
 
@@ -4028,10 +4050,8 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app)
        registry = get_session_registry(ua_sess);
        assert(registry);
 
-       if (!registry->metadata_closed) {
-               /* Push metadata for application before freeing the application. */
-               (void) push_metadata(registry, ua_sess->consumer);
-       }
+       /* Push metadata for application before freeing the application. */
+       (void) push_metadata(registry, ua_sess->consumer);
 
 end_unlock:
        pthread_mutex_unlock(&ua_sess->lock);
@@ -4047,28 +4067,21 @@ error_rcu_unlock:
        return -1;
 }
 
-/*
- * Flush buffers for a specific UST session and app.
- */
 static
-int ust_app_flush_trace(struct ltt_ust_session *usess, struct ust_app *app)
+int ust_app_flush_app_session(struct ust_app *app,
+               struct ust_app_session *ua_sess)
 {
-       int ret = 0;
+       int ret, retval = 0;
        struct lttng_ht_iter iter;
-       struct ust_app_session *ua_sess;
        struct ust_app_channel *ua_chan;
+       struct consumer_socket *socket;
 
-       DBG("Flushing buffers for ust app pid %d", app->pid);
+       DBG("Flushing app session buffers for ust app pid %d", app->pid);
 
        rcu_read_lock();
 
        if (!app->compatible) {
-               goto end_no_session;
-       }
-
-       ua_sess = lookup_session_by_app(usess, app);
-       if (ua_sess == NULL) {
-               goto end_no_session;
+               goto end_not_compatible;
        }
 
        pthread_mutex_lock(&ua_sess->lock);
@@ -4076,36 +4089,116 @@ int ust_app_flush_trace(struct ltt_ust_session *usess, struct ust_app *app)
        health_code_update();
 
        /* Flushing buffers */
-       cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan,
-                       node.node) {
-               health_code_update();
-               assert(ua_chan->is_sent);
-               ret = ustctl_sock_flush_buffer(app->sock, ua_chan->obj);
-               if (ret < 0) {
-                       if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
-                               ERR("UST app PID %d channel %s flush failed with ret %d",
-                                               app->pid, ua_chan->name, ret);
-                       } else {
-                               DBG3("UST app failed to flush %s. Application is dead.",
-                                               ua_chan->name);
-                               /*
-                                * This is normal behavior, an application can die during the
-                                * creation process. Don't report an error so the execution can
-                                * continue normally.
-                                */
+       socket = consumer_find_socket_by_bitness(app->bits_per_long,
+                       ua_sess->consumer);
+
+       /* Flush buffers and push metadata. */
+       switch (ua_sess->buffer_type) {
+       case LTTNG_BUFFER_PER_PID:
+               cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan,
+                               node.node) {
+                       health_code_update();
+                       assert(ua_chan->is_sent);
+                       ret = consumer_flush_channel(socket, ua_chan->key);
+                       if (ret) {
+                               ERR("Error flushing consumer channel");
+                               retval = -1;
+                               continue;
                        }
-                       /* Continuing flushing all buffers */
-                       continue;
                }
+               break;
+       case LTTNG_BUFFER_PER_UID:
+       default:
+               assert(0);
+               break;
        }
 
        health_code_update();
 
        pthread_mutex_unlock(&ua_sess->lock);
+
+end_not_compatible:
+       rcu_read_unlock();
+       health_code_update();
+       return retval;
+}
+
+/*
+ * Flush buffers for all applications for a specific UST session.
+ * Called with UST session lock held.
+ */
+static
+int ust_app_flush_session(struct ltt_ust_session *usess)
+
+{
+       int ret;
+
+       DBG("Flushing session buffers for all ust apps");
+
+       rcu_read_lock();
+
+       /* Flush buffers and push metadata. */
+       switch (usess->buffer_type) {
+       case LTTNG_BUFFER_PER_UID:
+       {
+               struct buffer_reg_uid *reg;
+               struct lttng_ht_iter iter;
+
+               /* Flush all per UID buffers associated to that session. */
+               cds_list_for_each_entry(reg, &usess->buffer_reg_uid_list, lnode) {
+                       struct ust_registry_session *ust_session_reg;
+                       struct buffer_reg_channel *reg_chan;
+                       struct consumer_socket *socket;
+
+                       /* Get consumer socket to use to push the metadata.*/
+                       socket = consumer_find_socket_by_bitness(reg->bits_per_long,
+                                       usess->consumer);
+                       if (!socket) {
+                               /* Ignore request if no consumer is found for the session. */
+                               continue;
+                       }
+
+                       cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter,
+                                       reg_chan, node.node) {
+                               /*
+                                * The following call will print error values so the return
+                                * code is of little importance because whatever happens, we
+                                * have to try them all.
+                                */
+                               (void) consumer_flush_channel(socket, reg_chan->consumer_key);
+                       }
+
+                       ust_session_reg = reg->registry->reg.ust;
+                       /* Push metadata. */
+                       (void) push_metadata(ust_session_reg, usess->consumer);
+               }
+               ret = 0;
+               break;
+       }
+       case LTTNG_BUFFER_PER_PID:
+       {
+               struct ust_app_session *ua_sess;
+               struct lttng_ht_iter iter;
+               struct ust_app *app;
+
+               cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
+                       ua_sess = lookup_session_by_app(usess, app);
+                       if (ua_sess == NULL) {
+                               continue;
+                       }
+                       (void) ust_app_flush_app_session(app, ua_sess);
+               }
+               break;
+       }
+       default:
+               assert(0);
+               break;
+       }
+
 end_no_session:
        rcu_read_unlock();
        health_code_update();
-       return 0;
+       return ret;
 }
 
 /*
@@ -4179,6 +4272,7 @@ int ust_app_start_trace_all(struct ltt_ust_session *usess)
 
 /*
  * Start tracing for the UST session.
+ * Called with UST session lock held.
  */
 int ust_app_stop_trace_all(struct ltt_ust_session *usess)
 {
@@ -4198,58 +4292,7 @@ int ust_app_stop_trace_all(struct ltt_ust_session *usess)
                }
        }
 
-       /* Flush buffers and push metadata (for UID buffers). */
-       switch (usess->buffer_type) {
-       case LTTNG_BUFFER_PER_UID:
-       {
-               struct buffer_reg_uid *reg;
-
-               /* Flush all per UID buffers associated to that session. */
-               cds_list_for_each_entry(reg, &usess->buffer_reg_uid_list, lnode) {
-                       struct ust_registry_session *ust_session_reg;
-                       struct buffer_reg_channel *reg_chan;
-                       struct consumer_socket *socket;
-
-                       /* Get consumer socket to use to push the metadata.*/
-                       socket = consumer_find_socket_by_bitness(reg->bits_per_long,
-                                       usess->consumer);
-                       if (!socket) {
-                               /* Ignore request if no consumer is found for the session. */
-                               continue;
-                       }
-
-                       cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter,
-                                       reg_chan, node.node) {
-                               /*
-                                * The following call will print error values so the return
-                                * code is of little importance because whatever happens, we
-                                * have to try them all.
-                                */
-                               (void) consumer_flush_channel(socket, reg_chan->consumer_key);
-                       }
-
-                       ust_session_reg = reg->registry->reg.ust;
-                       if (!ust_session_reg->metadata_closed) {
-                               /* Push metadata. */
-                               (void) push_metadata(ust_session_reg, usess->consumer);
-                       }
-               }
-
-               break;
-       }
-       case LTTNG_BUFFER_PER_PID:
-               cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
-                       ret = ust_app_flush_trace(usess, app);
-                       if (ret < 0) {
-                               /* Continue to next apps even on error */
-                               continue;
-                       }
-               }
-               break;
-       default:
-               assert(0);
-               break;
-       }
+       (void) ust_app_flush_session(usess);
 
        rcu_read_unlock();
 
This page took 0.030286 seconds and 4 git commands to generate.