+ consumer_stream_free(stream);
+}
+
+/*
+ * Action done with the metadata stream when adding it to the consumer internal
+ * data structures to handle it.
+ */
+int consumer_add_metadata_stream(struct lttng_consumer_stream *stream)
+{
+ struct lttng_ht *ht = metadata_ht;
+ int ret = 0;
+ struct lttng_ht_iter iter;
+ struct lttng_ht_node_u64 *node;
+
+ assert(stream);
+ assert(ht);
+
+ DBG3("Adding metadata stream %" PRIu64 " to hash table", stream->key);
+
+ pthread_mutex_lock(&consumer_data.lock);
+ pthread_mutex_lock(&stream->chan->lock);
+ pthread_mutex_lock(&stream->chan->timer_lock);
+ pthread_mutex_lock(&stream->lock);
+
+ /*
+ * From here, refcounts are updated so be _careful_ when returning an error
+ * after this point.
+ */
+
+ rcu_read_lock();
+
+ /*
+ * Lookup the stream just to make sure it does not exist in our internal
+ * state. This should NEVER happen.
+ */
+ lttng_ht_lookup(ht, &stream->key, &iter);
+ node = lttng_ht_iter_get_node_u64(&iter);
+ assert(!node);
+
+ /*
+ * When nb_init_stream_left reaches 0, we don't need to trigger any action
+ * in terms of destroying the associated channel, because the action that
+ * causes the count to become 0 also causes a stream to be added. The
+ * channel deletion will thus be triggered by the following removal of this
+ * stream.
+ */
+ if (uatomic_read(&stream->chan->nb_init_stream_left) > 0) {
+ /* Increment refcount before decrementing nb_init_stream_left */
+ cmm_smp_wmb();
+ uatomic_dec(&stream->chan->nb_init_stream_left);
+ }
+
+ lttng_ht_add_unique_u64(ht, &stream->node);
+
+ lttng_ht_add_unique_u64(consumer_data.stream_per_chan_id_ht,
+ &stream->node_channel_id);
+
+ /*
+ * Add stream to the stream_list_ht of the consumer data. No need to steal
+ * the key since the HT does not use it and we allow to add redundant keys
+ * into this table.
+ */
+ lttng_ht_add_u64(consumer_data.stream_list_ht, &stream->node_session_id);
+
+ rcu_read_unlock();
+
+ pthread_mutex_unlock(&stream->lock);
+ pthread_mutex_unlock(&stream->chan->lock);
+ pthread_mutex_unlock(&stream->chan->timer_lock);
+ pthread_mutex_unlock(&consumer_data.lock);
+ return ret;
+}
+
+/*
+ * Delete data stream that are flagged for deletion (endpoint_status).
+ */
+static void validate_endpoint_status_data_stream(void)
+{
+ struct lttng_ht_iter iter;
+ struct lttng_consumer_stream *stream;
+
+ DBG("Consumer delete flagged data stream");
+
+ rcu_read_lock();
+ cds_lfht_for_each_entry(data_ht->ht, &iter.iter, stream, node.node) {
+ /* Validate delete flag of the stream */
+ if (stream->endpoint_status == CONSUMER_ENDPOINT_ACTIVE) {
+ continue;
+ }
+ /* Delete it right now */
+ consumer_del_stream(stream, data_ht);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * Delete metadata stream that are flagged for deletion (endpoint_status).
+ */
+static void validate_endpoint_status_metadata_stream(
+ struct lttng_poll_event *pollset)
+{
+ struct lttng_ht_iter iter;
+ struct lttng_consumer_stream *stream;
+
+ DBG("Consumer delete flagged metadata stream");
+
+ assert(pollset);
+
+ rcu_read_lock();
+ cds_lfht_for_each_entry(metadata_ht->ht, &iter.iter, stream, node.node) {
+ /* Validate delete flag of the stream */
+ if (stream->endpoint_status == CONSUMER_ENDPOINT_ACTIVE) {
+ continue;
+ }
+ /*
+ * Remove from pollset so the metadata thread can continue without
+ * blocking on a deleted stream.
+ */
+ lttng_poll_del(pollset, stream->wait_fd);
+
+ /* Delete it right now */
+ consumer_del_metadata_stream(stream, metadata_ht);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * Thread polls on metadata file descriptor and write them on disk or on the
+ * network.
+ */
+void *consumer_thread_metadata_poll(void *data)
+{
+ int ret, i, pollfd, err = -1;
+ uint32_t revents, nb_fd;
+ struct lttng_consumer_stream *stream = NULL;
+ struct lttng_ht_iter iter;
+ struct lttng_ht_node_u64 *node;
+ struct lttng_poll_event events;
+ struct lttng_consumer_local_data *ctx = data;
+ ssize_t len;
+
+ rcu_register_thread();
+
+ health_register(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA);
+
+ if (testpoint(consumerd_thread_metadata)) {
+ goto error_testpoint;
+ }
+
+ health_code_update();
+
+ DBG("Thread metadata poll started");
+
+ /* Size is set to 1 for the consumer_metadata pipe */
+ ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
+ if (ret < 0) {
+ ERR("Poll set creation failed");
+ goto end_poll;
+ }
+
+ ret = lttng_poll_add(&events,
+ lttng_pipe_get_readfd(ctx->consumer_metadata_pipe), LPOLLIN);
+ if (ret < 0) {
+ goto end;
+ }
+
+ /* Main loop */
+ DBG("Metadata main loop started");
+
+ while (1) {
+restart:
+ health_code_update();
+ health_poll_entry();
+ DBG("Metadata poll wait");
+ ret = lttng_poll_wait(&events, -1);
+ DBG("Metadata poll return from wait with %d fd(s)",
+ LTTNG_POLL_GETNB(&events));
+ health_poll_exit();
+ DBG("Metadata event catched in thread");
+ if (ret < 0) {
+ if (errno == EINTR) {
+ ERR("Poll EINTR catched");
+ goto restart;
+ }
+ if (LTTNG_POLL_GETNB(&events) == 0) {
+ err = 0; /* All is OK */
+ }
+ goto end;
+ }
+
+ nb_fd = ret;
+
+ /* From here, the event is a metadata wait fd */
+ for (i = 0; i < nb_fd; i++) {
+ health_code_update();
+
+ revents = LTTNG_POLL_GETEV(&events, i);
+ pollfd = LTTNG_POLL_GETFD(&events, i);
+
+ if (!revents) {
+ /* No activity for this FD (poll implementation). */
+ continue;
+ }
+
+ if (pollfd == lttng_pipe_get_readfd(ctx->consumer_metadata_pipe)) {
+ if (revents & (LPOLLERR | LPOLLHUP )) {
+ DBG("Metadata thread pipe hung up");
+ /*
+ * Remove the pipe from the poll set and continue the loop
+ * since their might be data to consume.
+ */
+ lttng_poll_del(&events,
+ lttng_pipe_get_readfd(ctx->consumer_metadata_pipe));
+ lttng_pipe_read_close(ctx->consumer_metadata_pipe);
+ continue;
+ } else if (revents & LPOLLIN) {
+ ssize_t pipe_len;
+
+ pipe_len = lttng_pipe_read(ctx->consumer_metadata_pipe,
+ &stream, sizeof(stream));
+ if (pipe_len < sizeof(stream)) {
+ PERROR("read metadata stream");
+ /*
+ * Continue here to handle the rest of the streams.
+ */
+ continue;
+ }
+
+ /* A NULL stream means that the state has changed. */
+ if (stream == NULL) {
+ /* Check for deleted streams. */
+ validate_endpoint_status_metadata_stream(&events);
+ goto restart;
+ }
+
+ DBG("Adding metadata stream %d to poll set",
+ stream->wait_fd);
+
+ /* Add metadata stream to the global poll events list */
+ lttng_poll_add(&events, stream->wait_fd,
+ LPOLLIN | LPOLLPRI | LPOLLHUP);
+ }
+
+ /* Handle other stream */
+ continue;
+ }
+
+ rcu_read_lock();
+ {
+ uint64_t tmp_id = (uint64_t) pollfd;
+
+ lttng_ht_lookup(metadata_ht, &tmp_id, &iter);
+ }
+ node = lttng_ht_iter_get_node_u64(&iter);
+ assert(node);
+
+ stream = caa_container_of(node, struct lttng_consumer_stream,
+ node);
+
+ /* Check for error event */
+ if (revents & (LPOLLERR | LPOLLHUP)) {
+ DBG("Metadata fd %d is hup|err.", pollfd);
+ if (!stream->hangup_flush_done
+ && (consumer_data.type == LTTNG_CONSUMER32_UST
+ || consumer_data.type == LTTNG_CONSUMER64_UST)) {
+ DBG("Attempting to flush and consume the UST buffers");
+ lttng_ustconsumer_on_stream_hangup(stream);
+
+ /* We just flushed the stream now read it. */
+ do {
+ health_code_update();
+
+ len = ctx->on_buffer_ready(stream, ctx);
+ /*
+ * We don't check the return value here since if we get
+ * a negative len, it means an error occured thus we
+ * simply remove it from the poll set and free the
+ * stream.
+ */
+ } while (len > 0);
+ }
+
+ lttng_poll_del(&events, stream->wait_fd);
+ /*
+ * This call update the channel states, closes file descriptors
+ * and securely free the stream.
+ */
+ consumer_del_metadata_stream(stream, metadata_ht);
+ } else if (revents & (LPOLLIN | LPOLLPRI)) {
+ /* Get the data out of the metadata file descriptor */
+ DBG("Metadata available on fd %d", pollfd);
+ assert(stream->wait_fd == pollfd);
+
+ do {
+ health_code_update();
+
+ len = ctx->on_buffer_ready(stream, ctx);
+ /*
+ * We don't check the return value here since if we get
+ * a negative len, it means an error occured thus we
+ * simply remove it from the poll set and free the
+ * stream.
+ */
+ } while (len > 0);
+
+ /* It's ok to have an unavailable sub-buffer */
+ if (len < 0 && len != -EAGAIN && len != -ENODATA) {
+ /* Clean up stream from consumer and free it. */
+ lttng_poll_del(&events, stream->wait_fd);
+ consumer_del_metadata_stream(stream, metadata_ht);
+ }
+ }
+
+ /* Release RCU lock for the stream looked up */
+ rcu_read_unlock();
+ }
+ }
+
+ /* All is OK */
+ err = 0;
+end:
+ DBG("Metadata poll thread exiting");
+
+ lttng_poll_clean(&events);
+end_poll:
+error_testpoint:
+ if (err) {
+ health_error();
+ ERR("Health error occurred in %s", __func__);
+ }
+ health_unregister(health_consumerd);
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/*
+ * This thread polls the fds in the set to consume the data and write
+ * it to tracefile if necessary.
+ */
+void *consumer_thread_data_poll(void *data)
+{
+ int num_rdy, num_hup, high_prio, ret, i, err = -1;
+ struct pollfd *pollfd = NULL;
+ /* local view of the streams */
+ struct lttng_consumer_stream **local_stream = NULL, *new_stream = NULL;
+ /* local view of consumer_data.fds_count */
+ int nb_fd = 0;
+ struct lttng_consumer_local_data *ctx = data;
+ ssize_t len;
+
+ rcu_register_thread();
+
+ health_register(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA);
+
+ if (testpoint(consumerd_thread_data)) {
+ goto error_testpoint;
+ }
+
+ health_code_update();
+
+ local_stream = zmalloc(sizeof(struct lttng_consumer_stream *));
+ if (local_stream == NULL) {
+ PERROR("local_stream malloc");
+ goto end;
+ }
+
+ while (1) {
+ health_code_update();
+
+ high_prio = 0;
+ num_hup = 0;
+
+ /*
+ * the fds set has been updated, we need to update our
+ * local array as well
+ */
+ pthread_mutex_lock(&consumer_data.lock);
+ if (consumer_data.need_update) {
+ free(pollfd);
+ pollfd = NULL;
+
+ free(local_stream);
+ local_stream = NULL;
+
+ /*
+ * Allocate for all fds +1 for the consumer_data_pipe and +1 for
+ * wake up pipe.
+ */
+ pollfd = zmalloc((consumer_data.stream_count + 2) * sizeof(struct pollfd));
+ if (pollfd == NULL) {
+ PERROR("pollfd malloc");
+ pthread_mutex_unlock(&consumer_data.lock);
+ goto end;
+ }
+
+ local_stream = zmalloc((consumer_data.stream_count + 2) *
+ sizeof(struct lttng_consumer_stream *));
+ if (local_stream == NULL) {
+ PERROR("local_stream malloc");
+ pthread_mutex_unlock(&consumer_data.lock);
+ goto end;
+ }
+ ret = update_poll_array(ctx, &pollfd, local_stream,
+ data_ht);
+ if (ret < 0) {
+ ERR("Error in allocating pollfd or local_outfds");
+ lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
+ pthread_mutex_unlock(&consumer_data.lock);
+ goto end;
+ }
+ nb_fd = ret;
+ consumer_data.need_update = 0;
+ }
+ pthread_mutex_unlock(&consumer_data.lock);
+
+ /* No FDs and consumer_quit, consumer_cleanup the thread */
+ if (nb_fd == 0 && consumer_quit == 1) {
+ err = 0; /* All is OK */
+ goto end;
+ }
+ /* poll on the array of fds */
+ restart:
+ DBG("polling on %d fd", nb_fd + 2);
+ health_poll_entry();
+ num_rdy = poll(pollfd, nb_fd + 2, -1);
+ health_poll_exit();
+ DBG("poll num_rdy : %d", num_rdy);
+ if (num_rdy == -1) {
+ /*
+ * Restart interrupted system call.
+ */
+ if (errno == EINTR) {
+ goto restart;
+ }
+ PERROR("Poll error");
+ lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
+ goto end;
+ } else if (num_rdy == 0) {
+ DBG("Polling thread timed out");
+ goto end;
+ }
+
+ /*
+ * If the consumer_data_pipe triggered poll go directly to the
+ * beginning of the loop to update the array. We want to prioritize
+ * array update over low-priority reads.
+ */
+ if (pollfd[nb_fd].revents & (POLLIN | POLLPRI)) {
+ ssize_t pipe_readlen;
+
+ DBG("consumer_data_pipe wake up");
+ pipe_readlen = lttng_pipe_read(ctx->consumer_data_pipe,
+ &new_stream, sizeof(new_stream));
+ if (pipe_readlen < sizeof(new_stream)) {
+ PERROR("Consumer data pipe");
+ /* Continue so we can at least handle the current stream(s). */
+ continue;
+ }
+
+ /*
+ * If the stream is NULL, just ignore it. It's also possible that
+ * the sessiond poll thread changed the consumer_quit state and is
+ * waking us up to test it.
+ */
+ if (new_stream == NULL) {
+ validate_endpoint_status_data_stream();
+ continue;
+ }
+
+ /* Continue to update the local streams and handle prio ones */
+ continue;
+ }
+
+ /* Handle wakeup pipe. */
+ if (pollfd[nb_fd + 1].revents & (POLLIN | POLLPRI)) {
+ char dummy;
+ ssize_t pipe_readlen;
+
+ pipe_readlen = lttng_pipe_read(ctx->consumer_wakeup_pipe, &dummy,
+ sizeof(dummy));
+ if (pipe_readlen < 0) {
+ PERROR("Consumer data wakeup pipe");
+ }
+ /* We've been awakened to handle stream(s). */
+ ctx->has_wakeup = 0;
+ }
+
+ /* Take care of high priority channels first. */
+ for (i = 0; i < nb_fd; i++) {
+ health_code_update();
+
+ if (local_stream[i] == NULL) {
+ continue;
+ }
+ if (pollfd[i].revents & POLLPRI) {
+ DBG("Urgent read on fd %d", pollfd[i].fd);
+ high_prio = 1;