Fix: sessiond vs consumerd push/get metadata deadlock
[lttng-tools.git] / src / bin / lttng-sessiond / ust-app.c
index 0c4045c3b26aebf4d675c4b8916b7c5fc84701ee..fc4b7085ec5770e57550c169924e112bb794dd49 100644 (file)
@@ -426,46 +426,52 @@ void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan,
 /*
  * Push metadata to consumer socket.
  *
- * The socket lock MUST be acquired.
- * The ust app session lock MUST be acquired.
+ * RCU read-side lock must be held to guarantee existance of socket.
+ * Must be called with the ust app session lock held.
+ * Must be called with the registry lock held.
  *
  * On success, return the len of metadata pushed or else a negative value.
+ * Returning a -EPIPE return value means we could not send the metadata,
+ * but it can be caused by recoverable errors (e.g. the application has
+ * terminated concurrently).
  */
 ssize_t ust_app_push_metadata(struct ust_registry_session *registry,
                struct consumer_socket *socket, int send_zero_data)
 {
        int ret;
        char *metadata_str = NULL;
-       size_t len, offset;
+       size_t len, offset, new_metadata_len_sent;
        ssize_t ret_val;
+       uint64_t metadata_key;
 
        assert(registry);
        assert(socket);
 
-       pthread_mutex_lock(&registry->lock);
+       metadata_key = registry->metadata_key;
 
        /*
-        * Means that no metadata was assigned to the session. This can happens if
-        * no start has been done previously.
+        * Means that no metadata was assigned to the session. This can
+        * happens if no start has been done previously.
         */
-       if (!registry->metadata_key) {
-               pthread_mutex_unlock(&registry->lock);
+       if (!metadata_key) {
                return 0;
        }
 
        /*
-        * On a push metadata error either the consumer is dead or the metadata
-        * channel has been destroyed because its endpoint might have died (e.g:
-        * relayd). If so, the metadata closed flag is set to 1 so we deny pushing
-        * metadata again which is not valid anymore on the consumer side.
+        * On a push metadata error either the consumer is dead or the
+        * metadata channel has been destroyed because its endpoint
+        * might have died (e.g: relayd), or because the application has
+        * exited. If so, the metadata closed flag is set to 1 so we
+        * deny pushing metadata again which is not valid anymore on the
+        * consumer side.
         */
        if (registry->metadata_closed) {
-               pthread_mutex_unlock(&registry->lock);
                return -EPIPE;
        }
 
        offset = registry->metadata_len_sent;
        len = registry->metadata_len - registry->metadata_len_sent;
+       new_metadata_len_sent = registry->metadata_len;
        if (len == 0) {
                DBG3("No metadata to push for metadata key %" PRIu64,
                                registry->metadata_key);
@@ -484,38 +490,66 @@ ssize_t ust_app_push_metadata(struct ust_registry_session *registry,
                ret_val = -ENOMEM;
                goto error;
        }
-       /* Copy what we haven't send out. */
+       /* Copy what we haven't sent out. */
        memcpy(metadata_str, registry->metadata + offset, len);
-       registry->metadata_len_sent += len;
 
 push_data:
        pthread_mutex_unlock(&registry->lock);
-       ret = consumer_push_metadata(socket, registry->metadata_key,
+       /*
+        * We need to unlock the registry while we push metadata to
+        * break a circular dependency between the consumerd metadata
+        * lock and the sessiond registry lock. Indeed, pushing metadata
+        * to the consumerd awaits that it gets pushed all the way to
+        * relayd, but doing so requires grabbing the metadata lock. If
+        * a concurrent metadata request is being performed by
+        * consumerd, this can try to grab the registry lock on the
+        * sessiond while holding the metadata lock on the consumer
+        * daemon. Those push and pull schemes are performed on two
+        * different bidirectionnal communication sockets.
+        */
+       ret = consumer_push_metadata(socket, metadata_key,
                        metadata_str, len, offset);
+       pthread_mutex_lock(&registry->lock);
        if (ret < 0) {
                /*
-                * There is an acceptable race here between the registry metadata key
-                * assignment and the creation on the consumer. The session daemon can
-                * concurrently push metadata for this registry while being created on
-                * the consumer since the metadata key of the registry is assigned
-                * *before* it is setup to avoid the consumer to ask for metadata that
-                * could possibly be not found in the session daemon.
+                * There is an acceptable race here between the registry
+                * metadata key assignment and the creation on the
+                * consumer. The session daemon can concurrently push
+                * metadata for this registry while being created on the
+                * consumer since the metadata key of the registry is
+                * assigned *before* it is setup to avoid the consumer
+                * to ask for metadata that could possibly be not found
+                * in the session daemon.
                 *
-                * The metadata will get pushed either by the session being stopped or
-                * the consumer requesting metadata if that race is triggered.
+                * The metadata will get pushed either by the session
+                * being stopped or the consumer requesting metadata if
+                * that race is triggered.
                 */
                if (ret == -LTTCOMM_CONSUMERD_CHANNEL_FAIL) {
                        ret = 0;
+               } else {
+                       ERR("Error pushing metadata to consumer");
                }
-
-               /* Update back the actual metadata len sent since it failed here. */
-               pthread_mutex_lock(&registry->lock);
-               registry->metadata_len_sent -= len;
-               pthread_mutex_unlock(&registry->lock);
                ret_val = ret;
                goto error_push;
+       } else {
+               /*
+                * Metadata may have been concurrently pushed, since
+                * we're not holding the registry lock while pushing to
+                * consumer.  This is handled by the fact that we send
+                * the metadata content, size, and the offset at which
+                * that metadata belongs. This may arrive out of order
+                * on the consumer side, and the consumer is able to
+                * deal with overlapping fragments. The consumer
+                * supports overlapping fragments, which must be
+                * contiguous starting from offset 0. We keep the
+                * largest metadata_len_sent value of the concurrent
+                * send.
+                */
+               registry->metadata_len_sent =
+                       max_t(size_t, registry->metadata_len_sent,
+                               new_metadata_len_sent);
        }
-
        free(metadata_str);
        return len;
 
@@ -523,13 +557,14 @@ end:
 error:
        if (ret_val) {
                /*
-                * On error, flag the registry that the metadata is closed. We were unable
-                * to push anything and this means that either the consumer is not
-                * responding or the metadata cache has been destroyed on the consumer.
+                * On error, flag the registry that the metadata is
+                * closed. We were unable to push anything and this
+                * means that either the consumer is not responding or
+                * the metadata cache has been destroyed on the
+                * consumer.
                 */
                registry->metadata_closed = 1;
        }
-       pthread_mutex_unlock(&registry->lock);
 error_push:
        free(metadata_str);
        return ret_val;
@@ -541,9 +576,13 @@ error_push:
  * socket to send the metadata is retrieved from consumer, if sock
  * is not NULL we use it to send the metadata.
  * RCU read-side lock must be held while calling this function,
- * therefore ensuring existance of registry.
+ * therefore ensuring existance of registry. It also ensures existance
+ * of socket throughout this function.
  *
  * Return 0 on success else a negative error.
+ * Returning a -EPIPE return value means we could not send the metadata,
+ * but it can be caused by recoverable errors (e.g. the application has
+ * terminated concurrently).
  */
 static int push_metadata(struct ust_registry_session *registry,
                struct consumer_output *consumer)
@@ -556,50 +595,36 @@ static int push_metadata(struct ust_registry_session *registry,
        assert(consumer);
 
        pthread_mutex_lock(&registry->lock);
-
        if (registry->metadata_closed) {
-               pthread_mutex_unlock(&registry->lock);
-               return -EPIPE;
+               ret_val = -EPIPE;
+               goto error;
        }
 
        /* Get consumer socket to use to push the metadata.*/
        socket = consumer_find_socket_by_bitness(registry->bits_per_long,
                        consumer);
-       pthread_mutex_unlock(&registry->lock);
        if (!socket) {
                ret_val = -1;
                goto error;
        }
 
-       /*
-        * TODO: Currently, we hold the socket lock around sampling of the next
-        * metadata segment to ensure we send metadata over the consumer socket in
-        * the correct order. This makes the registry lock nest inside the socket
-        * lock.
-        *
-        * Please note that this is a temporary measure: we should move this lock
-        * back into ust_consumer_push_metadata() when the consumer gets the
-        * ability to reorder the metadata it receives.
-        */
-       pthread_mutex_lock(socket->lock);
        ret = ust_app_push_metadata(registry, socket, 0);
-       pthread_mutex_unlock(socket->lock);
        if (ret < 0) {
                ret_val = ret;
                goto error;
        }
-
+       pthread_mutex_unlock(&registry->lock);
        return 0;
 
 error:
-end:
+       pthread_mutex_unlock(&registry->lock);
        return ret_val;
 }
 
 /*
  * Send to the consumer a close metadata command for the given session. Once
  * done, the metadata channel is deleted and the session metadata pointer is
- * nullified. The session lock MUST be acquired here unless the application is
+ * nullified. The session lock MUST be held unless the application is
  * in the destroy path.
  *
  * Return 0 on success else a negative value.
@@ -681,6 +706,9 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
 
        pthread_mutex_lock(&ua_sess->lock);
 
+       assert(!ua_sess->deleted);
+       ua_sess->deleted = true;
+
        registry = get_session_registry(ua_sess);
        if (registry) {
                /* Push metadata for application before freeing the application. */
@@ -723,6 +751,8 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
        }
        pthread_mutex_unlock(&ua_sess->lock);
 
+       consumer_output_put(ua_sess->consumer);
+
        call_rcu(&ua_sess->rcu_head, delete_ust_app_session_rcu);
 }
 
@@ -984,15 +1014,15 @@ error:
  *
  * Return allocated filter or NULL on error.
  */
-static struct lttng_ust_filter_bytecode *alloc_copy_ust_app_filter(
-               struct lttng_ust_filter_bytecode *orig_f)
+static struct lttng_filter_bytecode *copy_filter_bytecode(
+               struct lttng_filter_bytecode *orig_f)
 {
-       struct lttng_ust_filter_bytecode *filter = NULL;
+       struct lttng_filter_bytecode *filter = NULL;
 
        /* Copy filter bytecode */
        filter = zmalloc(sizeof(*filter) + orig_f->len);
        if (!filter) {
-               PERROR("zmalloc alloc ust app filter");
+               PERROR("zmalloc alloc filter bytecode");
                goto error;
        }
 
@@ -1002,6 +1032,30 @@ error:
        return filter;
 }
 
+/*
+ * Create a liblttng-ust filter bytecode from given bytecode.
+ *
+ * Return allocated filter or NULL on error.
+ */
+static struct lttng_ust_filter_bytecode *create_ust_bytecode_from_bytecode(
+               struct lttng_filter_bytecode *orig_f)
+{
+       struct lttng_ust_filter_bytecode *filter = NULL;
+
+       /* Copy filter bytecode */
+       filter = zmalloc(sizeof(*filter) + orig_f->len);
+       if (!filter) {
+               PERROR("zmalloc alloc ust filter bytecode");
+               goto error;
+       }
+
+       assert(sizeof(struct lttng_filter_bytecode) ==
+                       sizeof(struct lttng_ust_filter_bytecode));
+       memcpy(filter, orig_f, sizeof(*filter) + orig_f->len);
+error:
+       return filter;
+}
+
 /*
  * Find an ust_app using the sock and return it. RCU read side lock must be
  * held before calling this helper function.
@@ -1054,7 +1108,7 @@ error:
  * Return an ust_app_event object or NULL on error.
  */
 static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht,
-               char *name, struct lttng_ust_filter_bytecode *filter, int loglevel,
+               char *name, struct lttng_filter_bytecode *filter, int loglevel,
                const struct lttng_event_exclusion *exclusion)
 {
        struct lttng_ht_iter iter;
@@ -1070,7 +1124,7 @@ static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht,
        key.filter = filter;
        key.loglevel = loglevel;
        /* lttng_event_exclusion and lttng_ust_event_exclusion structures are similar */
-       key.exclusion = (struct lttng_ust_event_exclusion *)exclusion;
+       key.exclusion = exclusion;
 
        /* Lookup using the event name as hash and a custom match fct. */
        cds_lfht_lookup(ht->ht, ht->hash_fct((void *) name, lttng_ht_seed),
@@ -1135,6 +1189,7 @@ int set_ust_event_filter(struct ust_app_event *ua_event,
                struct ust_app *app)
 {
        int ret;
+       struct lttng_ust_filter_bytecode *ust_bytecode = NULL;
 
        health_code_update();
 
@@ -1143,7 +1198,12 @@ int set_ust_event_filter(struct ust_app_event *ua_event,
                goto error;
        }
 
-       ret = ustctl_set_filter(app->sock, ua_event->filter,
+       ust_bytecode = create_ust_bytecode_from_bytecode(ua_event->filter);
+       if (!ust_bytecode) {
+               ret = -LTTNG_ERR_NOMEM;
+               goto error;
+       }
+       ret = ustctl_set_filter(app->sock, ust_bytecode,
                        ua_event->obj);
        if (ret < 0) {
                if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
@@ -1165,9 +1225,31 @@ int set_ust_event_filter(struct ust_app_event *ua_event,
 
 error:
        health_code_update();
+       free(ust_bytecode);
        return ret;
 }
 
+static
+struct lttng_ust_event_exclusion *create_ust_exclusion_from_exclusion(
+               struct lttng_event_exclusion *exclusion)
+{
+       struct lttng_ust_event_exclusion *ust_exclusion = NULL;
+       size_t exclusion_alloc_size = sizeof(struct lttng_ust_event_exclusion) +
+               LTTNG_UST_SYM_NAME_LEN * exclusion->count;
+
+       ust_exclusion = zmalloc(exclusion_alloc_size);
+       if (!ust_exclusion) {
+               PERROR("malloc");
+               goto end;
+       }
+
+       assert(sizeof(struct lttng_event_exclusion) ==
+                       sizeof(struct lttng_ust_event_exclusion));
+       memcpy(ust_exclusion, exclusion, exclusion_alloc_size);
+end:
+       return ust_exclusion;
+}
+
 /*
  * Set event exclusions on the tracer.
  */
@@ -1176,6 +1258,7 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event,
                struct ust_app *app)
 {
        int ret;
+       struct lttng_ust_event_exclusion *ust_exclusion = NULL;
 
        health_code_update();
 
@@ -1184,8 +1267,13 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event,
                goto error;
        }
 
-       ret = ustctl_set_exclusion(app->sock, ua_event->exclusion,
-                       ua_event->obj);
+       ust_exclusion = create_ust_exclusion_from_exclusion(
+                       ua_event->exclusion);
+       if (!ust_exclusion) {
+               ret = -LTTNG_ERR_NOMEM;
+               goto error;
+       }
+       ret = ustctl_set_exclusion(app->sock, ust_exclusion, ua_event->obj);
        if (ret < 0) {
                if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) {
                        ERR("UST app event %s exclusions failed for app (pid: %d) "
@@ -1206,6 +1294,7 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event,
 
 error:
        health_code_update();
+       free(ust_exclusion);
        return ret;
 }
 
@@ -1377,7 +1466,10 @@ static int send_channel_pid_to_ust(struct ust_app *app,
 
        /* Send channel to the application. */
        ret = ust_consumer_send_channel_to_ust(app, ua_sess, ua_chan);
-       if (ret < 0) {
+       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+               ret = -ENOTCONN;        /* Caused by app exiting. */
+               goto error;
+       } else if (ret < 0) {
                goto error;
        }
 
@@ -1386,7 +1478,10 @@ static int send_channel_pid_to_ust(struct ust_app *app,
        /* Send all streams to application. */
        cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) {
                ret = ust_consumer_send_stream_to_ust(app, ua_chan, stream);
-               if (ret < 0) {
+               if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                       ret = -ENOTCONN;        /* Caused by app exiting. */
+                       goto error;
+               } else if (ret < 0) {
                        goto error;
                }
                /* We don't need the stream anymore once sent to the tracer. */
@@ -1507,13 +1602,13 @@ static void shadow_copy_event(struct ust_app_event *ua_event,
 
        /* Copy filter bytecode */
        if (uevent->filter) {
-               ua_event->filter = alloc_copy_ust_app_filter(uevent->filter);
+               ua_event->filter = copy_filter_bytecode(uevent->filter);
                /* Filter might be NULL here in case of ENONEM. */
        }
 
        /* Copy exclusion data */
        if (uevent->exclusion) {
-               exclusion_alloc_size = sizeof(struct lttng_ust_event_exclusion) +
+               exclusion_alloc_size = sizeof(struct lttng_event_exclusion) +
                                LTTNG_UST_SYM_NAME_LEN * uevent->exclusion->count;
                ua_event->exclusion = zmalloc(exclusion_alloc_size);
                if (ua_event->exclusion == NULL) {
@@ -1604,6 +1699,7 @@ static void shadow_copy_session(struct ust_app_session *ua_sess,
        struct tm *timeinfo;
        char datetime[16];
        int ret;
+       char tmp_shm_path[PATH_MAX];
 
        /* Get date and time for unique app path */
        time(&rawtime);
@@ -1620,8 +1716,11 @@ static void shadow_copy_session(struct ust_app_session *ua_sess,
        ua_sess->egid = usess->gid;
        ua_sess->buffer_type = usess->buffer_type;
        ua_sess->bits_per_long = app->bits_per_long;
+
        /* There is only one consumer object per session possible. */
+       consumer_output_get(usess->consumer);
        ua_sess->consumer = usess->consumer;
+
        ua_sess->output_traces = usess->output_traces;
        ua_sess->live_timer_interval = usess->live_timer_interval;
        copy_channel_attr_to_ustctl(&ua_sess->metadata_attr,
@@ -1647,6 +1746,38 @@ static void shadow_copy_session(struct ust_app_session *ua_sess,
                goto error;
        }
 
+       strncpy(ua_sess->root_shm_path, usess->root_shm_path,
+               sizeof(ua_sess->root_shm_path));
+       ua_sess->root_shm_path[sizeof(ua_sess->root_shm_path) - 1] = '\0';
+       strncpy(ua_sess->shm_path, usess->shm_path,
+               sizeof(ua_sess->shm_path));
+       ua_sess->shm_path[sizeof(ua_sess->shm_path) - 1] = '\0';
+       if (ua_sess->shm_path[0]) {
+               switch (ua_sess->buffer_type) {
+               case LTTNG_BUFFER_PER_PID:
+                       ret = snprintf(tmp_shm_path, sizeof(tmp_shm_path),
+                                       DEFAULT_UST_TRACE_PID_PATH "/%s-%d-%s",
+                                       app->name, app->pid, datetime);
+                       break;
+               case LTTNG_BUFFER_PER_UID:
+                       ret = snprintf(tmp_shm_path, sizeof(tmp_shm_path),
+                                       DEFAULT_UST_TRACE_UID_PATH,
+                                       app->uid, app->bits_per_long);
+                       break;
+               default:
+                       assert(0);
+                       goto error;
+               }
+               if (ret < 0) {
+                       PERROR("sprintf UST shadow copy session");
+                       assert(0);
+                       goto error;
+               }
+               strncat(ua_sess->shm_path, tmp_shm_path,
+                       sizeof(ua_sess->shm_path) - strlen(ua_sess->shm_path) - 1);
+               ua_sess->shm_path[sizeof(ua_sess->shm_path) - 1] = '\0';
+       }
+
        /* Iterate over all channels in global domain. */
        cds_lfht_for_each_entry(usess->domain_global.channels->ht, &iter.iter,
                        uchan, node.node) {
@@ -1676,9 +1807,10 @@ static void shadow_copy_session(struct ust_app_session *ua_sess,
 
                lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node);
        }
+       return;
 
 error:
-       return;
+       consumer_output_put(ua_sess->consumer);
 }
 
 /*
@@ -1738,7 +1870,8 @@ static int setup_buffer_reg_pid(struct ust_app_session *ua_sess,
                 * This is the create channel path meaning that if there is NO
                 * registry available, we have to create one for this session.
                 */
-               ret = buffer_reg_pid_create(ua_sess->id, &reg_pid);
+               ret = buffer_reg_pid_create(ua_sess->id, &reg_pid,
+                       ua_sess->root_shm_path, ua_sess->shm_path);
                if (ret < 0) {
                        goto error;
                }
@@ -1752,7 +1885,9 @@ static int setup_buffer_reg_pid(struct ust_app_session *ua_sess,
                        app->uint16_t_alignment, app->uint32_t_alignment,
                        app->uint64_t_alignment, app->long_alignment,
                        app->byte_order, app->version.major,
-                       app->version.minor);
+                       app->version.minor, reg_pid->root_shm_path,
+                       reg_pid->shm_path,
+                       ua_sess->euid, ua_sess->egid);
        if (ret < 0) {
                /*
                 * reg_pid->registry->reg.ust is NULL upon error, so we need to
@@ -1785,6 +1920,7 @@ error:
  * Return 0 on success or else a negative value.
  */
 static int setup_buffer_reg_uid(struct ltt_ust_session *usess,
+               struct ust_app_session *ua_sess,
                struct ust_app *app, struct buffer_reg_uid **regp)
 {
        int ret = 0;
@@ -1802,7 +1938,8 @@ static int setup_buffer_reg_uid(struct ltt_ust_session *usess,
                 * registry available, we have to create one for this session.
                 */
                ret = buffer_reg_uid_create(usess->id, app->bits_per_long, app->uid,
-                               LTTNG_DOMAIN_UST, &reg_uid);
+                               LTTNG_DOMAIN_UST, &reg_uid,
+                               ua_sess->root_shm_path, ua_sess->shm_path);
                if (ret < 0) {
                        goto error;
                }
@@ -1816,7 +1953,8 @@ static int setup_buffer_reg_uid(struct ltt_ust_session *usess,
                        app->uint16_t_alignment, app->uint32_t_alignment,
                        app->uint64_t_alignment, app->long_alignment,
                        app->byte_order, app->version.major,
-                       app->version.minor);
+                       app->version.minor, reg_uid->root_shm_path,
+                       reg_uid->shm_path, usess->uid, usess->gid);
        if (ret < 0) {
                /*
                 * reg_uid->registry->reg.ust is NULL upon error, so we need to
@@ -1891,7 +2029,7 @@ static int create_ust_app_session(struct ltt_ust_session *usess,
                break;
        case LTTNG_BUFFER_PER_UID:
                /* Look for a global registry. If none exists, create one. */
-               ret = setup_buffer_reg_uid(usess, app, NULL);
+               ret = setup_buffer_reg_uid(usess, ua_sess, app, NULL);
                if (ret < 0) {
                        delete_ust_app_session(-1, ua_sess, app);
                        goto error;
@@ -2395,6 +2533,7 @@ static int create_buffer_reg_channel(struct buffer_reg_session *reg_sess,
        assert(reg_chan);
        reg_chan->consumer_key = ua_chan->key;
        reg_chan->subbuf_size = ua_chan->attr.subbuf_size;
+       reg_chan->num_subbuf = ua_chan->attr.num_subbuf;
 
        /* Create and add a channel registry to session. */
        ret = ust_registry_channel_add(reg_sess->reg.ust,
@@ -2478,7 +2617,10 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *reg_chan,
 
        /* Send channel to the application. */
        ret = ust_consumer_send_channel_to_ust(app, ua_sess, ua_chan);
-       if (ret < 0) {
+       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+               ret = -ENOTCONN;        /* Caused by app exiting. */
+               goto error;
+       } else if (ret < 0) {
                goto error;
        }
 
@@ -2497,6 +2639,12 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *reg_chan,
                ret = ust_consumer_send_stream_to_ust(app, ua_chan, &stream);
                if (ret < 0) {
                        (void) release_ust_app_stream(-1, &stream);
+                       if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) {
+                               ret = -ENOTCONN; /* Caused by app exiting. */
+                               goto error_stream_unlock;
+                       } else if (ret < 0) {
+                               goto error_stream_unlock;
+                       }
                        goto error_stream_unlock;
                }
 
@@ -2590,10 +2738,9 @@ static int create_channel_per_uid(struct ust_app *app,
        /* Send buffers to the application. */
        ret = send_channel_uid_to_ust(reg_chan, app, ua_sess, ua_chan);
        if (ret < 0) {
-               /*
-                * Don't report error to the console, since it may be
-                * caused by application concurrently exiting.
-                */
+               if (ret != -ENOTCONN) {
+                       ERR("Error sending channel to application");
+               }
                goto error;
        }
 
@@ -2644,10 +2791,9 @@ static int create_channel_per_pid(struct ust_app *app,
 
        ret = send_channel_pid_to_ust(app, ua_sess, ua_chan);
        if (ret < 0) {
-               /*
-                * Don't report error to the console, since it may be
-                * caused by application concurrently exiting.
-                */
+               if (ret != -ENOTCONN) {
+                       ERR("Error sending channel to application");
+               }
                goto error;
        }
 
@@ -2661,7 +2807,8 @@ error:
  * need and send it to the application. This MUST be called with a RCU read
  * side lock acquired.
  *
- * Return 0 on success or else a negative value.
+ * Return 0 on success or else a negative value. Returns -ENOTCONN if
+ * the application exited concurrently.
  */
 static int do_create_channel(struct ust_app *app,
                struct ltt_ust_session *usess, struct ust_app_session *ua_sess,
@@ -2720,7 +2867,8 @@ error:
  *
  * Called with UST app session lock and RCU read-side lock held.
  *
- * Return 0 on success or else a negative value.
+ * Return 0 on success or else a negative value. Returns -ENOTCONN if
+ * the application exited concurrently.
  */
 static int create_ust_app_channel(struct ust_app_session *ua_sess,
                struct ltt_ust_channel *uchan, struct ust_app *app,
@@ -3133,6 +3281,11 @@ void ust_app_unregister(int sock)
                 */
                pthread_mutex_lock(&ua_sess->lock);
 
+               if (ua_sess->deleted) {
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       continue;
+               }
+
                /*
                 * Normally, this is done in the delete session process which is
                 * executed in the call rcu below. However, upon registration we can't
@@ -3697,6 +3850,11 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess,
                         */
                        continue;
                }
+               if (!trace_ust_pid_tracker_lookup(usess, app->pid)) {
+                       /* Skip. */
+                       continue;
+               }
+
                /*
                 * Create session on the tracer side and add it to app session HT. Note
                 * that if session exist, it will simply return a pointer to the ust
@@ -3711,6 +3869,7 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess,
                                 * or a timeout on it. We can't inform the caller that for a
                                 * specific app, the session failed so lets continue here.
                                 */
+                               ret = 0;        /* Not an error. */
                                continue;
                        case -ENOMEM:
                        default:
@@ -3720,6 +3879,12 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess,
                assert(ua_sess);
 
                pthread_mutex_lock(&ua_sess->lock);
+
+               if (ua_sess->deleted) {
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       continue;
+               }
+
                if (!strncmp(uchan->name, DEFAULT_METADATA_NAME,
                                        sizeof(uchan->name))) {
                        copy_channel_attr_to_ustctl(&ua_sess->metadata_attr, &uchan->attr);
@@ -3731,14 +3896,23 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess,
                }
                pthread_mutex_unlock(&ua_sess->lock);
                if (ret < 0) {
-                       if (ret == -ENOMEM) {
-                               /* No more memory is a fatal error. Stop right now. */
-                               goto error_rcu_unlock;
-                       }
                        /* Cleanup the created session if it's the case. */
                        if (created) {
                                destroy_app_session(app, ua_sess);
                        }
+                       switch (ret) {
+                       case -ENOTCONN:
+                               /*
+                                * The application's socket is not valid. Either a bad socket
+                                * or a timeout on it. We can't inform the caller that for a
+                                * specific app, the session failed so lets continue here.
+                                */
+                               ret = 0;        /* Not an error. */
+                               continue;
+                       case -ENOMEM:
+                       default:
+                               goto error_rcu_unlock;
+                       }
                }
        }
 
@@ -3789,11 +3963,23 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess,
 
                pthread_mutex_lock(&ua_sess->lock);
 
+               if (ua_sess->deleted) {
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       continue;
+               }
+
                /* Lookup channel in the ust app session */
                lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter);
                ua_chan_node = lttng_ht_iter_get_node_str(&uiter);
-               /* If the channel is not found, there is a code flow error */
-               assert(ua_chan_node);
+               /*
+                * It is possible that the channel cannot be found is
+                * the channel/event creation occurs concurrently with
+                * an application exit.
+                */
+               if (!ua_chan_node) {
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       continue;
+               }
 
                ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node);
 
@@ -3855,6 +4041,12 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess,
                }
 
                pthread_mutex_lock(&ua_sess->lock);
+
+               if (ua_sess->deleted) {
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       continue;
+               }
+
                /* Lookup channel in the ust app session */
                lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter);
                ua_chan_node = lttng_ht_iter_get_node_str(&uiter);
@@ -3906,6 +4098,11 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app)
 
        pthread_mutex_lock(&ua_sess->lock);
 
+       if (ua_sess->deleted) {
+               pthread_mutex_unlock(&ua_sess->lock);
+               goto end;
+       }
+
        /* Upon restart, we skip the setup, already done */
        if (ua_sess->started) {
                goto skip_setup;
@@ -3917,7 +4114,7 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app)
                ret = run_as_mkdir_recursive(usess->consumer->dst.trace_path,
                                S_IRWXU | S_IRWXG, ua_sess->euid, ua_sess->egid);
                if (ret < 0) {
-                       if (ret != -EEXIST) {
+                       if (errno != EEXIST) {
                                ERR("Trace directory creation error");
                                goto error_unlock;
                        }
@@ -4006,6 +4203,11 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app)
 
        pthread_mutex_lock(&ua_sess->lock);
 
+       if (ua_sess->deleted) {
+               pthread_mutex_unlock(&ua_sess->lock);
+               goto end_no_session;
+       }
+
        /*
         * If started = 0, it means that stop trace has been called for a session
         * that was never started. It's possible since we can have a fail start
@@ -4086,6 +4288,10 @@ int ust_app_flush_app_session(struct ust_app *app,
 
        pthread_mutex_lock(&ua_sess->lock);
 
+       if (ua_sess->deleted) {
+               goto end_deleted;
+       }
+
        health_code_update();
 
        /* Flushing buffers */
@@ -4115,6 +4321,7 @@ int ust_app_flush_app_session(struct ust_app *app,
 
        health_code_update();
 
+end_deleted:
        pthread_mutex_unlock(&ua_sess->lock);
 
 end_not_compatible:
@@ -4131,7 +4338,7 @@ static
 int ust_app_flush_session(struct ltt_ust_session *usess)
 
 {
-       int ret;
+       int ret = 0;
 
        DBG("Flushing session buffers for all ust apps");
 
@@ -4172,7 +4379,6 @@ int ust_app_flush_session(struct ltt_ust_session *usess)
                        /* Push metadata. */
                        (void) push_metadata(ust_session_reg, usess->consumer);
                }
-               ret = 0;
                break;
        }
        case LTTNG_BUFFER_PER_PID:
@@ -4191,11 +4397,11 @@ int ust_app_flush_session(struct ltt_ust_session *usess)
                break;
        }
        default:
+               ret = -1;
                assert(0);
                break;
        }
 
-end_no_session:
        rcu_read_unlock();
        health_code_update();
        return ret;
@@ -4325,50 +4531,35 @@ int ust_app_destroy_trace_all(struct ltt_ust_session *usess)
        return 0;
 }
 
-/*
- * Add channels/events from UST global domain to registered apps at sock.
- */
-void ust_app_global_update(struct ltt_ust_session *usess, int sock)
+static
+void ust_app_global_create(struct ltt_ust_session *usess, struct ust_app *app)
 {
        int ret = 0;
        struct lttng_ht_iter iter, uiter;
-       struct ust_app *app;
        struct ust_app_session *ua_sess = NULL;
        struct ust_app_channel *ua_chan;
        struct ust_app_event *ua_event;
        struct ust_app_ctx *ua_ctx;
+       int is_created = 0;
 
-       assert(usess);
-       assert(sock >= 0);
-
-       DBG2("UST app global update for app sock %d for session id %" PRIu64, sock,
-                       usess->id);
-
-       rcu_read_lock();
-
-       app = ust_app_find_by_sock(sock);
-       if (app == NULL) {
-               /*
-                * Application can be unregistered before so this is possible hence
-                * simply stopping the update.
-                */
-               DBG3("UST app update failed to find app sock %d", sock);
-               goto error;
-       }
-
-       if (!app->compatible) {
-               goto error;
-       }
-
-       ret = create_ust_app_session(usess, app, &ua_sess, NULL);
+       ret = create_ust_app_session(usess, app, &ua_sess, &is_created);
        if (ret < 0) {
                /* Tracer is probably gone or ENOMEM. */
                goto error;
        }
+       if (!is_created) {
+               /* App session already created. */
+               goto end;
+       }
        assert(ua_sess);
 
        pthread_mutex_lock(&ua_sess->lock);
 
+       if (ua_sess->deleted) {
+               pthread_mutex_unlock(&ua_sess->lock);
+               goto end;
+       }
+
        /*
         * We can iterate safely here over all UST app session since the create ust
         * app session above made a shadow copy of the UST global domain from the
@@ -4377,11 +4568,14 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock)
        cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan,
                        node.node) {
                ret = do_create_channel(app, usess, ua_sess, ua_chan);
-               if (ret < 0) {
+               if (ret < 0 && ret != -ENOTCONN) {
                        /*
-                        * Stop everything. On error, the application failed, no more
-                        * file descriptor are available or ENOMEM so stopping here is
-                        * the only thing we can do for now.
+                        * Stop everything. On error, the application
+                        * failed, no more file descriptor are available
+                        * or ENOMEM so stopping here is the only thing
+                        * we can do for now. The only exception is
+                        * -ENOTCONN, which indicates that the application
+                        * has exit.
                         */
                        goto error_unlock;
                }
@@ -4418,9 +4612,8 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock)
 
                DBG2("UST trace started for app pid %d", app->pid);
        }
-
+end:
        /* Everything went well at this point. */
-       rcu_read_unlock();
        return;
 
 error_unlock:
@@ -4429,10 +4622,60 @@ error:
        if (ua_sess) {
                destroy_app_session(app, ua_sess);
        }
-       rcu_read_unlock();
        return;
 }
 
+static
+void ust_app_global_destroy(struct ltt_ust_session *usess, struct ust_app *app)
+{
+       struct ust_app_session *ua_sess;
+
+       ua_sess = lookup_session_by_app(usess, app);
+       if (ua_sess == NULL) {
+               return;
+       }
+       destroy_app_session(app, ua_sess);
+}
+
+/*
+ * Add channels/events from UST global domain to registered apps at sock.
+ *
+ * Called with session lock held.
+ * Called with RCU read-side lock held.
+ */
+void ust_app_global_update(struct ltt_ust_session *usess, struct ust_app *app)
+{
+       assert(usess);
+
+       DBG2("UST app global update for app sock %d for session id %" PRIu64,
+                       app->sock, usess->id);
+
+       if (!app->compatible) {
+               return;
+       }
+
+       if (trace_ust_pid_tracker_lookup(usess, app->pid)) {
+               ust_app_global_create(usess, app);
+       } else {
+               ust_app_global_destroy(usess, app);
+       }
+}
+
+/*
+ * Called with session lock held.
+ */
+void ust_app_global_update_all(struct ltt_ust_session *usess)
+{
+       struct lttng_ht_iter iter;
+       struct ust_app *app;
+
+       rcu_read_lock();
+       cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
+               ust_app_global_update(usess, app);
+       }
+       rcu_read_unlock();
+}
+
 /*
  * Add context to a specific channel for global UST domain.
  */
@@ -4462,6 +4705,12 @@ int ust_app_add_ctx_channel_glb(struct ltt_ust_session *usess,
                }
 
                pthread_mutex_lock(&ua_sess->lock);
+
+               if (ua_sess->deleted) {
+                       pthread_mutex_unlock(&ua_sess->lock);
+                       continue;
+               }
+
                /* Lookup channel in the ust app session */
                lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter);
                ua_chan_node = lttng_ht_iter_get_node_str(&uiter);
@@ -4520,6 +4769,12 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess,
        }
 
        pthread_mutex_lock(&ua_sess->lock);
+
+       if (ua_sess->deleted) {
+               ret = 0;
+               goto end_unlock;
+       }
+
        /* Lookup channel in the ust app session */
        lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter);
        ua_chan_node = lttng_ht_iter_get_node_str(&iter);
@@ -5083,7 +5338,8 @@ void ust_app_destroy(struct ust_app *app)
  * Return 0 on success or else a negative value.
  */
 int ust_app_snapshot_record(struct ltt_ust_session *usess,
-               struct snapshot_output *output, int wait, uint64_t max_stream_size)
+               struct snapshot_output *output, int wait,
+               uint64_t nb_packets_per_stream)
 {
        int ret = 0;
        unsigned int snapshot_done = 0;
@@ -5127,14 +5383,14 @@ int ust_app_snapshot_record(struct ltt_ust_session *usess,
                                        reg_chan, node.node) {
                                ret = consumer_snapshot_channel(socket, reg_chan->consumer_key,
                                                output, 0, usess->uid, usess->gid, pathname, wait,
-                                               max_stream_size);
+                                               nb_packets_per_stream);
                                if (ret < 0) {
                                        goto error;
                                }
                        }
                        ret = consumer_snapshot_channel(socket,
                                        reg->registry->reg.ust->metadata_key, output, 1,
-                                       usess->uid, usess->gid, pathname, wait, max_stream_size);
+                                       usess->uid, usess->gid, pathname, wait, 0);
                        if (ret < 0) {
                                goto error;
                        }
@@ -5178,7 +5434,7 @@ int ust_app_snapshot_record(struct ltt_ust_session *usess,
                                        ua_chan, node.node) {
                                ret = consumer_snapshot_channel(socket, ua_chan->key, output,
                                                0, ua_sess->euid, ua_sess->egid, pathname, wait,
-                                               max_stream_size);
+                                               nb_packets_per_stream);
                                if (ret < 0) {
                                        goto error;
                                }
@@ -5187,8 +5443,7 @@ int ust_app_snapshot_record(struct ltt_ust_session *usess,
                        registry = get_session_registry(ua_sess);
                        assert(registry);
                        ret = consumer_snapshot_channel(socket, registry->metadata_key, output,
-                                       1, ua_sess->euid, ua_sess->egid, pathname, wait,
-                                       max_stream_size);
+                                       1, ua_sess->euid, ua_sess->egid, pathname, wait, 0);
                        if (ret < 0) {
                                goto error;
                        }
@@ -5216,11 +5471,12 @@ error:
 }
 
 /*
- * Return the number of streams for a UST session.
+ * Return the size taken by one more packet per stream.
  */
-unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess)
+uint64_t ust_app_get_size_one_more_packet_per_stream(struct ltt_ust_session *usess,
+               uint64_t cur_nr_packets)
 {
-       unsigned int ret = 0;
+       uint64_t tot_size = 0;
        struct ust_app *app;
        struct lttng_ht_iter iter;
 
@@ -5237,7 +5493,14 @@ unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess)
                        rcu_read_lock();
                        cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter,
                                        reg_chan, node.node) {
-                               ret += reg_chan->stream_count;
+                               if (cur_nr_packets >= reg_chan->num_subbuf) {
+                                       /*
+                                        * Don't take channel into account if we
+                                        * already grab all its packets.
+                                        */
+                                       continue;
+                               }
+                               tot_size += reg_chan->subbuf_size * reg_chan->stream_count;
                        }
                        rcu_read_unlock();
                }
@@ -5259,7 +5522,14 @@ unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess)
 
                        cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter,
                                        ua_chan, node.node) {
-                               ret += ua_chan->streams.count;
+                               if (cur_nr_packets >= ua_chan->attr.num_subbuf) {
+                                       /*
+                                        * Don't take channel into account if we
+                                        * already grab all its packets.
+                                        */
+                                       continue;
+                               }
+                               tot_size += ua_chan->attr.subbuf_size * ua_chan->streams.count;
                        }
                }
                rcu_read_unlock();
@@ -5270,5 +5540,5 @@ unsigned int ust_app_get_nb_stream(struct ltt_ust_session *usess)
                break;
        }
 
-       return ret;
+       return tot_size;
 }
This page took 0.053728 seconds and 4 git commands to generate.