Fix: ust-consumer: metadata thread not woken-up after version change
[lttng-tools.git] / src / common / ust-consumer / ust-consumer.c
index 1af9840cd5184b0cfebad0bf680257fbbadac9a5..5f1f93b0e1024c6c1eee422d80930b71ccaaf1a8 100644 (file)
@@ -37,6 +37,7 @@
 #include <common/utils.h>
 #include <common/index/index.h>
 #include <common/consumer/consumer.h>
+#include <common/optional.h>
 
 #include "ust-consumer.h"
 
@@ -77,6 +78,11 @@ static void destroy_channel(struct lttng_consumer_channel *channel)
                lttng_ustconsumer_del_channel(channel);
                lttng_ustconsumer_free_channel(channel);
        }
+
+       if (channel->trace_chunk) {
+               lttng_trace_chunk_put(channel->trace_chunk);
+       }
+
        free(channel);
 }
 
@@ -403,7 +409,8 @@ static int open_ust_stream_fd(struct lttng_consumer_channel *channel, int cpu,
        }
        return run_as_open(shm_path,
                O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR,
-               session_credentials->uid, session_credentials->gid);
+               lttng_credentials_get_uid(session_credentials),
+               lttng_credentials_get_gid(session_credentials));
 
 error_shm_path:
        return -1;
@@ -481,8 +488,10 @@ error_open:
                                ERR("Cannot get stream shm path");
                        }
                        closeret = run_as_unlink(shm_path,
-                                       channel->buffer_credentials.value.uid,
-                                       channel->buffer_credentials.value.gid);
+                                       lttng_credentials_get_uid(LTTNG_OPTIONAL_GET_PTR(
+                                                       channel->buffer_credentials)),
+                                       lttng_credentials_get_gid(LTTNG_OPTIONAL_GET_PTR(
+                                                       channel->buffer_credentials)));
                        if (closeret) {
                                PERROR("unlink %s", shm_path);
                        }
@@ -491,8 +500,10 @@ error_open:
        /* Try to rmdir all directories under shm_path root. */
        if (channel->root_shm_path[0]) {
                (void) run_as_rmdir_recursive(channel->root_shm_path,
-                               channel->buffer_credentials.value.uid,
-                               channel->buffer_credentials.value.gid,
+                               lttng_credentials_get_uid(LTTNG_OPTIONAL_GET_PTR(
+                                               channel->buffer_credentials)),
+                               lttng_credentials_get_gid(LTTNG_OPTIONAL_GET_PTR(
+                                               channel->buffer_credentials)),
                                LTTNG_DIRECTORY_HANDLE_SKIP_NON_EMPTY_FLAG);
        }
        free(stream_fds);
@@ -1230,7 +1241,7 @@ static int snapshot_channel(struct lttng_consumer_channel *channel,
 
                        subbuf_view = lttng_buffer_view_init(
                                        subbuf_addr, 0, padded_len);
-                       read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
+                       read_len = lttng_consumer_on_read_subbuffer_mmap(
                                        stream, &subbuf_view, padded_len - len);
                        if (use_relayd) {
                                if (read_len != len) {
@@ -1272,6 +1283,17 @@ error_unlock:
        return ret;
 }
 
+static
+void metadata_stream_reset_cache_consumed_position(
+               struct lttng_consumer_stream *stream)
+{
+       ASSERT_LOCKED(stream->lock);
+
+       DBG("Reset metadata cache of session %" PRIu64,
+                       stream->chan->session_id);
+       stream->ust_metadata_pushed = 0;
+}
+
 /*
  * Receive the metadata updates from the sessiond. Supports receiving
  * overlapping metadata, but is needs to always belong to a contiguous
@@ -1286,6 +1308,7 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
 {
        int ret, ret_code = LTTCOMM_CONSUMERD_SUCCESS;
        char *metadata_str;
+       enum consumer_metadata_cache_write_status cache_write_status;
 
        DBG("UST consumer push metadata key %" PRIu64 " of len %" PRIu64, key, len);
 
@@ -1309,9 +1332,40 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
        health_code_update();
 
        pthread_mutex_lock(&channel->metadata_cache->lock);
-       ret = consumer_metadata_cache_write(channel, offset, len, version,
-                       metadata_str);
-       if (ret < 0) {
+       cache_write_status = consumer_metadata_cache_write(
+                       channel, offset, len, version, metadata_str);
+       pthread_mutex_unlock(&channel->metadata_cache->lock);
+       switch (cache_write_status) {
+       case CONSUMER_METADATA_CACHE_WRITE_STATUS_NO_CHANGE:
+               /*
+                * The write entirely overlapped with existing contents of the
+                * same metadata version (same content); there is nothing to do.
+                */
+               break;
+       case CONSUMER_METADATA_CACHE_WRITE_STATUS_INVALIDATED:
+               /*
+                * The metadata cache was invalidated (previously pushed
+                * content has been overwritten). Reset the stream's consumed
+                * metadata position to ensure the metadata poll thread consumes
+                * the whole cache.
+                */
+               pthread_mutex_lock(&channel->metadata_stream->lock);
+               metadata_stream_reset_cache_consumed_position(
+                               channel->metadata_stream);
+               pthread_mutex_unlock(&channel->metadata_stream->lock);
+               /* Fall-through. */
+       case CONSUMER_METADATA_CACHE_WRITE_STATUS_APPENDED_CONTENT:
+               /*
+                * In both cases, the metadata poll thread has new data to
+                * consume.
+                */
+               ret = consumer_metadata_wakeup_pipe(channel);
+               if (ret) {
+                       ret_code = LTTCOMM_CONSUMERD_ERROR_METADATA;
+                       goto end_free;
+               }
+               break;
+       case CONSUMER_METADATA_CACHE_WRITE_STATUS_ERROR:
                /* Unable to handle metadata. Notify session daemon. */
                ret_code = LTTCOMM_CONSUMERD_ERROR_METADATA;
                /*
@@ -1319,10 +1373,10 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset,
                 * not have been updated which could create an infinite loop below when
                 * waiting for the metadata cache to be flushed.
                 */
-               pthread_mutex_unlock(&channel->metadata_cache->lock);
                goto end_free;
+       default:
+               abort();
        }
-       pthread_mutex_unlock(&channel->metadata_cache->lock);
 
        if (!wait) {
                goto end_free;
@@ -1455,8 +1509,8 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                struct ustctl_consumer_channel_attr attr;
                const uint64_t chunk_id = msg.u.ask_channel.chunk_id.value;
                const struct lttng_credentials buffer_credentials = {
-                       .uid = msg.u.ask_channel.buffer_credentials.uid,
-                       .gid = msg.u.ask_channel.buffer_credentials.gid,
+                       .uid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.ask_channel.buffer_credentials.uid),
+                       .gid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.ask_channel.buffer_credentials.gid),
                };
 
                /* Create a plain object and reserve a channel key. */
@@ -2066,8 +2120,8 @@ end_rotate_channel_nosignal:
        case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
        {
                const struct lttng_credentials credentials = {
-                       .uid = msg.u.create_trace_chunk.credentials.value.uid,
-                       .gid = msg.u.create_trace_chunk.credentials.value.gid,
+                       .uid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.create_trace_chunk.credentials.value.uid),
+                       .gid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.create_trace_chunk.credentials.value.gid),
                };
                const bool is_local_trace =
                                !msg.u.create_trace_chunk.relayd_id.is_set;
@@ -2175,6 +2229,28 @@ end_rotate_channel_nosignal:
                                msg.u.trace_chunk_exists.chunk_id);
                goto end_msg_sessiond;
        }
+       case LTTNG_CONSUMER_OPEN_CHANNEL_PACKETS:
+       {
+               const uint64_t key = msg.u.open_channel_packets.key;
+               struct lttng_consumer_channel *channel =
+                               consumer_find_channel(key);
+
+               if (channel) {
+                       pthread_mutex_lock(&channel->lock);
+                       ret_code = lttng_consumer_open_channel_packets(channel);
+                       pthread_mutex_unlock(&channel->lock);
+               } else {
+                       /*
+                        * The channel could have disappeared in per-pid
+                        * buffering mode.
+                        */
+                       DBG("Channel %" PRIu64 " not found", key);
+                       ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
+               }
+
+               health_code_update();
+               goto end_msg_sessiond;
+       }
        default:
                break;
        }
@@ -2374,8 +2450,10 @@ void lttng_ustconsumer_del_channel(struct lttng_consumer_channel *chan)
                                ERR("Cannot get stream shm path");
                        }
                        ret = run_as_unlink(shm_path,
-                                       chan->buffer_credentials.value.uid,
-                                       chan->buffer_credentials.value.gid);
+                                       lttng_credentials_get_uid(LTTNG_OPTIONAL_GET_PTR(
+                                                       chan->buffer_credentials)),
+                                       lttng_credentials_get_gid(LTTNG_OPTIONAL_GET_PTR(
+                                                       chan->buffer_credentials)));
                        if (ret) {
                                PERROR("unlink %s", shm_path);
                        }
@@ -2394,8 +2472,10 @@ void lttng_ustconsumer_free_channel(struct lttng_consumer_channel *chan)
        /* Try to rmdir all directories under shm_path root. */
        if (chan->root_shm_path[0]) {
                (void) run_as_rmdir_recursive(chan->root_shm_path,
-                               chan->buffer_credentials.value.uid,
-                               chan->buffer_credentials.value.gid,
+                               lttng_credentials_get_uid(LTTNG_OPTIONAL_GET_PTR(
+                                               chan->buffer_credentials)),
+                               lttng_credentials_get_gid(LTTNG_OPTIONAL_GET_PTR(
+                                               chan->buffer_credentials)),
                                LTTNG_DIRECTORY_HANDLE_SKIP_NON_EMPTY_FLAG);
        }
        free(chan->stream_fds);
@@ -2428,21 +2508,11 @@ int lttng_ustconsumer_close_wakeup_fd(struct lttng_consumer_stream *stream)
        return ustctl_stream_close_wakeup_fd(stream->ustream);
 }
 
-static
-void metadata_stream_reset_cache(struct lttng_consumer_stream *stream)
-{
-       DBG("Reset metadata cache of session %" PRIu64,
-                       stream->chan->session_id);
-       stream->ust_metadata_pushed = 0;
-       stream->metadata_version = stream->chan->metadata_cache->version;
-       stream->reset_metadata_flag = 1;
-}
-
 /*
  * Write up to one packet from the metadata cache to the channel.
  *
- * Returns the number of bytes pushed in the cache, or a negative value
- * on error.
+ * Returns the number of bytes pushed from the cache into the ring buffer, or a
+ * negative value on error.
  */
 static
 int commit_one_metadata_packet(struct lttng_consumer_stream *stream)
@@ -2451,10 +2521,41 @@ int commit_one_metadata_packet(struct lttng_consumer_stream *stream)
        int ret;
 
        pthread_mutex_lock(&stream->chan->metadata_cache->lock);
-       if (stream->chan->metadata_cache->max_offset
-                       == stream->ust_metadata_pushed) {
-               ret = 0;
-               goto end;
+       if (stream->chan->metadata_cache->max_offset ==
+           stream->ust_metadata_pushed) {
+               /*
+                * In the context of a user space metadata channel, a
+                * change in version can be detected in two ways:
+                *   1) During the pre-consume of the `read_subbuffer` loop,
+                *   2) When populating the metadata ring buffer (i.e. here).
+                *
+                * This function is invoked when there is no metadata
+                * available in the ring-buffer. If all data was consumed
+                * up to the size of the metadata cache, there is no metadata
+                * to insert in the ring-buffer.
+                *
+                * However, the metadata version could still have changed (a
+                * regeneration without any new data will yield the same cache
+                * size).
+                *
+                * The cache's version is checked for a version change and the
+                * consumed position is reset if one occurred.
+                *
+                * This check is only necessary for the user space domain as
+                * it has to manage the cache explicitly. If this reset was not
+                * performed, no metadata would be consumed (and no reset would
+                * occur as part of the pre-consume) until the metadata size
+                * exceeded the cache size.
+                */
+               if (stream->metadata_version !=
+                               stream->chan->metadata_cache->version) {
+                       metadata_stream_reset_cache_consumed_position(stream);
+                       consumer_stream_metadata_set_version(stream,
+                                       stream->chan->metadata_cache->version);
+               } else {
+                       ret = 0;
+                       goto end;
+               }
        }
 
        write_len = ustctl_write_one_packet_to_channel(stream->chan->uchan,
@@ -2464,7 +2565,7 @@ int commit_one_metadata_packet(struct lttng_consumer_stream *stream)
        assert(write_len != 0);
        if (write_len < 0) {
                ERR("Writing one metadata packet");
-               ret = -1;
+               ret = write_len;
                goto end;
        }
        stream->ust_metadata_pushed += write_len;
@@ -2495,15 +2596,13 @@ end:
  * awaiting on metadata to be pushed out.
  *
  * The RCU read side lock must be held by the caller.
- *
- * Return 0 if new metadatda is available, EAGAIN if the metadata stream
- * is empty or a negative value on error.
  */
-int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx,
+enum sync_metadata_status lttng_ustconsumer_sync_metadata(
+               struct lttng_consumer_local_data *ctx,
                struct lttng_consumer_stream *metadata_stream)
 {
        int ret;
-       int retry = 0;
+       enum sync_metadata_status status;
        struct lttng_consumer_channel *metadata_channel;
 
        assert(ctx);
@@ -2518,6 +2617,7 @@ int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx,
        ret = lttng_ustconsumer_request_metadata(ctx, metadata_channel, 0, 0);
        pthread_mutex_lock(&metadata_stream->lock);
        if (ret < 0) {
+               status = SYNC_METADATA_STATUS_ERROR;
                goto end;
        }
 
@@ -2535,38 +2635,30 @@ int lttng_ustconsumer_sync_metadata(struct lttng_consumer_local_data *ctx,
        if (consumer_stream_is_deleted(metadata_stream)) {
                DBG("Metadata stream %" PRIu64 " was deleted during the metadata synchronization",
                                metadata_stream->key);
-               ret = 0;
+               status = SYNC_METADATA_STATUS_NO_DATA;
                goto end;
        }
 
        ret = commit_one_metadata_packet(metadata_stream);
-       if (ret <= 0) {
+       if (ret < 0) {
+               status = SYNC_METADATA_STATUS_ERROR;
                goto end;
        } else if (ret > 0) {
-               retry = 1;
+               status = SYNC_METADATA_STATUS_NEW_DATA;
+       } else /* ret == 0 */ {
+               status = SYNC_METADATA_STATUS_NO_DATA;
+               goto end;
        }
 
        ret = ustctl_snapshot(metadata_stream->ustream);
        if (ret < 0) {
-               if (errno != EAGAIN) {
-                       ERR("Sync metadata, taking UST snapshot");
-                       goto end;
-               }
-               DBG("No new metadata when syncing them.");
-               /* No new metadata, exit. */
-               ret = ENODATA;
+               ERR("Failed to take a snapshot of the metadata ring-buffer positions, ret = %d", ret);
+               status = SYNC_METADATA_STATUS_ERROR;
                goto end;
        }
 
-       /*
-        * After this flush, we still need to extract metadata.
-        */
-       if (retry) {
-               ret = EAGAIN;
-       }
-
 end:
-       return ret;
+       return status;
 }
 
 /*
@@ -2679,7 +2771,7 @@ static int extract_metadata_subbuffer_info(struct lttng_consumer_stream *stream,
                goto end;
        }
 
-       subbuf->info.metadata.version = stream->chan->metadata_cache->version;
+       subbuf->info.metadata.version = stream->metadata_version;
 
 end:
        return ret;
@@ -2810,28 +2902,88 @@ static int get_next_subbuffer_metadata(struct lttng_consumer_stream *stream,
                struct stream_subbuffer *subbuffer)
 {
        int ret;
+       bool cache_empty;
+       bool got_subbuffer;
+       bool coherent;
+       bool buffer_empty;
+       unsigned long consumed_pos, produced_pos;
 
-       ret = ustctl_get_next_subbuf(stream->ustream);
-       if (ret) {
-               ret = commit_one_metadata_packet(stream);
-               if (ret < 0) {
-                       goto end;
-               } else if (ret == 0) {
-                       /* Not an error, the cache is empty. */
-                       ret = -ENODATA;
-                       goto end;
+       do {
+               ret = ustctl_get_next_subbuf(stream->ustream);
+               if (ret == 0) {
+                       got_subbuffer = true;
+               } else {
+                       got_subbuffer = false;
+                       if (ret != -EAGAIN) {
+                               /* Fatal error. */
+                               goto end;
+                       }
                }
 
-               ret = ustctl_get_next_subbuf(stream->ustream);
-               if (ret) {
-                       goto end;
+               /*
+                * Determine if the cache is empty and ensure that a sub-buffer
+                * is made available if the cache is not empty.
+                */
+               if (!got_subbuffer) {
+                       ret = commit_one_metadata_packet(stream);
+                       if (ret < 0 && ret != -ENOBUFS) {
+                               goto end;
+                       } else if (ret == 0) {
+                               /* Not an error, the cache is empty. */
+                               cache_empty = true;
+                               ret = -ENODATA;
+                               goto end;
+                       } else {
+                               cache_empty = false;
+                       }
+               } else {
+                       pthread_mutex_lock(&stream->chan->metadata_cache->lock);
+                       cache_empty = stream->chan->metadata_cache->max_offset ==
+                                     stream->ust_metadata_pushed;
+                       pthread_mutex_unlock(&stream->chan->metadata_cache->lock);
                }
-       }
+       } while (!got_subbuffer);
 
+       /* Populate sub-buffer infos and view. */
        ret = get_next_subbuffer_common(stream, subbuffer);
        if (ret) {
                goto end;
        }
+
+       ret = lttng_ustconsumer_sample_snapshot_positions(stream);
+       if (ret < 0) {
+               /*
+                * -EAGAIN is not expected since we got a sub-buffer and haven't
+                * pushed the consumption position yet (on put_next).
+                */
+               PERROR("Failed to take a snapshot of metadata buffer positions");
+               goto end;
+       }
+
+       ret = lttng_ustconsumer_get_consumed_snapshot(stream, &consumed_pos);
+       if (ret) {
+               PERROR("Failed to get metadata consumed position");
+               goto end;
+       }
+
+       ret = lttng_ustconsumer_get_produced_snapshot(stream, &produced_pos);
+       if (ret) {
+               PERROR("Failed to get metadata produced position");
+               goto end;
+       }
+
+       /* Last sub-buffer of the ring buffer ? */
+       buffer_empty = (consumed_pos + stream->max_sb_size) == produced_pos;
+
+       /*
+        * The sessiond registry lock ensures that coherent units of metadata
+        * are pushed to the consumer daemon at once. Hence, if a sub-buffer is
+        * acquired, the cache is empty, and it is the only available sub-buffer
+        * available, it is safe to assume that it is "coherent".
+        */
+       coherent = got_subbuffer && cache_empty && buffer_empty;
+
+       LTTNG_OPTIONAL_SET(&subbuffer->info.metadata.coherent, coherent);
 end:
        return ret;
 }
@@ -2848,12 +3000,15 @@ static int put_next_subbuffer(struct lttng_consumer_stream *stream,
 static int signal_metadata(struct lttng_consumer_stream *stream,
                struct lttng_consumer_local_data *ctx)
 {
+       ASSERT_LOCKED(stream->metadata_rdv_lock);
        return pthread_cond_broadcast(&stream->metadata_rdv) ? -errno : 0;
 }
 
-static void lttng_ustconsumer_set_stream_ops(
+static int lttng_ustconsumer_set_stream_ops(
                struct lttng_consumer_stream *stream)
 {
+       int ret = 0;
+
        stream->read_subbuffer_ops.on_wake_up = consumer_stream_ust_on_wake_up;
        if (stream->metadata_flag) {
                stream->read_subbuffer_ops.get_next_subbuffer =
@@ -2861,8 +3016,15 @@ static void lttng_ustconsumer_set_stream_ops(
                stream->read_subbuffer_ops.extract_subbuffer_info =
                                extract_metadata_subbuffer_info;
                stream->read_subbuffer_ops.reset_metadata =
-                               metadata_stream_reset_cache;
-               stream->read_subbuffer_ops.on_sleep = signal_metadata;
+                               metadata_stream_reset_cache_consumed_position;
+               if (stream->chan->is_live) {
+                       stream->read_subbuffer_ops.on_sleep = signal_metadata;
+                       ret = consumer_stream_enable_metadata_bucketization(
+                                       stream);
+                       if (ret) {
+                               goto end;
+                       }
+               }
        } else {
                stream->read_subbuffer_ops.get_next_subbuffer =
                                get_next_subbuffer;
@@ -2876,6 +3038,8 @@ static void lttng_ustconsumer_set_stream_ops(
        }
 
        stream->read_subbuffer_ops.put_next_subbuffer = put_next_subbuffer;
+end:
+       return ret;
 }
 
 /*
@@ -2922,6 +3086,7 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream)
 
        assert(stream);
        assert(stream->ustream);
+       ASSERT_LOCKED(stream->lock);
 
        DBG("UST consumer checking data pending");
 
@@ -2934,7 +3099,9 @@ int lttng_ustconsumer_data_pending(struct lttng_consumer_stream *stream)
                uint64_t contiguous, pushed;
 
                /* Ease our life a bit. */
+               pthread_mutex_lock(&stream->chan->metadata_cache->lock);
                contiguous = stream->chan->metadata_cache->max_offset;
+               pthread_mutex_unlock(&stream->chan->metadata_cache->lock);
                pushed = stream->ust_metadata_pushed;
 
                /*
This page took 0.029898 seconds and 4 git commands to generate.