From 07b86b528dc279d59cdf16e6cb946c144fe773f2 Mon Sep 17 00:00:00 2001 From: Julien Desfossez Date: Mon, 6 May 2013 15:57:24 -0400 Subject: [PATCH] Add kernel snapshot support Signed-off-by: Julien Desfossez Signed-off-by: David Goulet --- configure.ac | 1 + src/bin/lttng-sessiond/cmd.c | 4 +- src/bin/lttng-sessiond/consumer.c | 1 + src/bin/lttng-sessiond/kernel-consumer.c | 66 ++- src/bin/lttng-sessiond/kernel-consumer.h | 6 + src/bin/lttng-sessiond/kernel.c | 41 +- src/common/consumer-stream.c | 12 +- src/common/consumer.c | 54 ++- src/common/consumer.h | 10 + src/common/kernel-consumer/kernel-consumer.c | 418 +++++++++++++++++-- src/common/uri.c | 2 +- src/common/utils.c | 2 +- src/common/utils.h | 2 +- tests/regression/tools/Makefile.am | 2 +- tests/regression/tools/snapshots/Makefile.am | 2 + tests/regression/tools/snapshots/test_kernel | 173 ++++++++ tests/root_regression | 1 + tests/utils/utils.sh | 66 ++- 18 files changed, 808 insertions(+), 55 deletions(-) create mode 100644 tests/regression/tools/snapshots/Makefile.am create mode 100755 tests/regression/tools/snapshots/test_kernel diff --git a/configure.ac b/configure.ac index 40e6fd6bd..792679a35 100644 --- a/configure.ac +++ b/configure.ac @@ -333,6 +333,7 @@ AC_CONFIG_FILES([ tests/regression/tools/filtering/Makefile tests/regression/tools/health/Makefile tests/regression/tools/tracefile-limits/Makefile + tests/regression/tools/snapshots/Makefile tests/regression/ust/Makefile tests/regression/ust/nprocesses/Makefile tests/regression/ust/high-throughput/Makefile diff --git a/src/bin/lttng-sessiond/cmd.c b/src/bin/lttng-sessiond/cmd.c index 211fc5a34..c783e82f0 100644 --- a/src/bin/lttng-sessiond/cmd.c +++ b/src/bin/lttng-sessiond/cmd.c @@ -721,7 +721,7 @@ static int start_kernel_session(struct ltt_kernel_session *ksess, int wpipe) struct ltt_kernel_channel *kchan; /* Open kernel metadata */ - if (ksess->metadata == NULL) { + if (ksess->metadata == NULL && ksess->output_traces) { ret = kernel_open_metadata(ksess); if (ret < 0) { ret = LTTNG_ERR_KERN_META_FAIL; @@ -730,7 +730,7 @@ static int start_kernel_session(struct ltt_kernel_session *ksess, int wpipe) } /* Open kernel metadata stream */ - if (ksess->metadata_stream_fd < 0) { + if (ksess->metadata && ksess->metadata_stream_fd < 0) { ret = kernel_open_metadata_stream(ksess); if (ret < 0) { ERR("Kernel create metadata stream failed"); diff --git a/src/bin/lttng-sessiond/consumer.c b/src/bin/lttng-sessiond/consumer.c index d3f6c071c..1b7a39733 100644 --- a/src/bin/lttng-sessiond/consumer.c +++ b/src/bin/lttng-sessiond/consumer.c @@ -1229,6 +1229,7 @@ int consumer_snapshot_channel(struct consumer_socket *socket, uint64_t key, ret = -LTTNG_ERR_NOMEM; goto error; } + msg.u.snapshot_channel.relayd_id = (uint64_t) -1ULL; /* Create directory. Ignore if exist. */ ret = run_as_mkdir_recursive(msg.u.snapshot_channel.pathname, diff --git a/src/bin/lttng-sessiond/kernel-consumer.c b/src/bin/lttng-sessiond/kernel-consumer.c index ef6dccb53..bf3559a21 100644 --- a/src/bin/lttng-sessiond/kernel-consumer.c +++ b/src/bin/lttng-sessiond/kernel-consumer.c @@ -202,14 +202,6 @@ int kernel_consumer_add_metadata(struct consumer_socket *sock, session->metadata_stream_fd, 0); /* CPU: 0 for metadata. */ - /* - * Set the no monitor flag. If set to 1, it indicates the consumer to NOT - * monitor the stream but rather add it to a special list in the associated - * channel. This is used to handle ephemeral stream used by the snapshot - * command or store streams for the flight recorder mode. - */ - lkm.u.stream.no_monitor = no_monitor; - health_code_update(); /* Send stream and file descriptor */ @@ -370,3 +362,61 @@ int kernel_consumer_send_session(struct consumer_socket *sock, error: return ret; } + +int kernel_consumer_destroy_channel(struct consumer_socket *socket, + struct ltt_kernel_channel *channel) +{ + int ret; + struct lttcomm_consumer_msg msg; + + assert(channel); + assert(socket); + assert(socket->fd >= 0); + + DBG("Sending kernel consumer destroy channel key %d", channel->fd); + + msg.cmd_type = LTTNG_CONSUMER_DESTROY_CHANNEL; + msg.u.destroy_channel.key = channel->fd; + + pthread_mutex_lock(socket->lock); + health_code_update(); + + ret = consumer_send_msg(socket, &msg); + if (ret < 0) { + goto error; + } + +error: + health_code_update(); + pthread_mutex_unlock(socket->lock); + return ret; +} + +int kernel_consumer_destroy_metadata(struct consumer_socket *socket, + struct ltt_kernel_metadata *metadata) +{ + int ret; + struct lttcomm_consumer_msg msg; + + assert(metadata); + assert(socket); + assert(socket->fd >= 0); + + DBG("Sending kernel consumer destroy channel key %d", metadata->fd); + + msg.cmd_type = LTTNG_CONSUMER_DESTROY_CHANNEL; + msg.u.destroy_channel.key = metadata->fd; + + pthread_mutex_lock(socket->lock); + health_code_update(); + + ret = consumer_send_msg(socket, &msg); + if (ret < 0) { + goto error; + } + +error: + health_code_update(); + pthread_mutex_unlock(socket->lock); + return ret; +} diff --git a/src/bin/lttng-sessiond/kernel-consumer.h b/src/bin/lttng-sessiond/kernel-consumer.h index 64795111a..5390edafe 100644 --- a/src/bin/lttng-sessiond/kernel-consumer.h +++ b/src/bin/lttng-sessiond/kernel-consumer.h @@ -38,3 +38,9 @@ int kernel_consumer_add_metadata(struct consumer_socket *sock, int kernel_consumer_add_channel(struct consumer_socket *sock, struct ltt_kernel_channel *channel, struct ltt_kernel_session *session, unsigned int monitor); + +int kernel_consumer_destroy_channel(struct consumer_socket *socket, + struct ltt_kernel_channel *channel); + +int kernel_consumer_destroy_metadata(struct consumer_socket *socket, + struct ltt_kernel_metadata *metadata); diff --git a/src/bin/lttng-sessiond/kernel.c b/src/bin/lttng-sessiond/kernel.c index 154b2d623..1ba6b16e6 100644 --- a/src/bin/lttng-sessiond/kernel.c +++ b/src/bin/lttng-sessiond/kernel.c @@ -754,6 +754,32 @@ void kernel_destroy_session(struct ltt_kernel_session *ksess) DBG("Tearing down kernel session"); + /* + * Destroy channels on the consumer if in no output mode because the + * streams are in *no* monitor mode so we have to send a command to clean + * them up or else they leaked. + */ + if (!ksess->output_traces) { + int ret; + struct consumer_socket *socket; + struct lttng_ht_iter iter; + + /* For each consumer socket. */ + cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter, + socket, node.node) { + struct ltt_kernel_channel *chan; + + /* For each channel, ask the consumer to destroy it. */ + cds_list_for_each_entry(chan, &ksess->channel_list.head, list) { + ret = kernel_consumer_destroy_channel(socket, chan); + if (ret < 0) { + /* Consumer is probably dead. Use next socket. */ + continue; + } + } + } + } + /* Close any relayd session */ consumer_output_send_destroy_relayd(ksess->consumer); @@ -844,10 +870,9 @@ int kernel_snapshot_record(struct ltt_kernel_session *ksess, pthread_mutex_lock(socket->lock); /* This stream must not be monitored by the consumer. */ - ret = kernel_consumer_add_metadata(socket, ksess, 1); - ret = 0; + ret = kernel_consumer_add_metadata(socket, ksess, 0); pthread_mutex_unlock(socket->lock); - /* Put back the savec consumer output into the session. */ + /* Put back the saved consumer output into the session. */ ksess->consumer = saved_output; if (ret < 0) { ret = LTTNG_ERR_KERN_CONSUMER_FAIL; @@ -856,8 +881,10 @@ int kernel_snapshot_record(struct ltt_kernel_session *ksess, /* For each channel, ask the consumer to snapshot it. */ cds_list_for_each_entry(chan, &ksess->channel_list.head, list) { + pthread_mutex_lock(socket->lock); ret = consumer_snapshot_channel(socket, chan->fd, output, 0, ksess->uid, ksess->gid, wait); + pthread_mutex_unlock(socket->lock); if (ret < 0) { ret = LTTNG_ERR_KERN_CONSUMER_FAIL; goto error_consumer; @@ -865,12 +892,20 @@ int kernel_snapshot_record(struct ltt_kernel_session *ksess, } /* Snapshot metadata, */ + pthread_mutex_lock(socket->lock); ret = consumer_snapshot_channel(socket, ksess->metadata->fd, output, 1, ksess->uid, ksess->gid, wait); + pthread_mutex_unlock(socket->lock); if (ret < 0) { ret = LTTNG_ERR_KERN_CONSUMER_FAIL; goto error_consumer; } + + /* + * The metadata snapshot is done, ask the consumer to destroy it since + * it's not monitored on the consumer side. + */ + (void) kernel_consumer_destroy_metadata(socket, ksess->metadata); } error_consumer: diff --git a/src/common/consumer-stream.c b/src/common/consumer-stream.c index 24f1b8a42..b5ab6c6be 100644 --- a/src/common/consumer-stream.c +++ b/src/common/consumer-stream.c @@ -177,9 +177,15 @@ void consumer_stream_delete(struct lttng_consumer_stream *stream, rcu_read_unlock(); - /* Decrement the stream count of the global consumer data. */ - assert(consumer_data.stream_count > 0); - consumer_data.stream_count--; + /* + * For a *non* monitored stream, we MUST NOT decrement or else the data + * thread will use the wrong value or stream for its local stream set. + */ + if (stream->chan->monitor) { + /* Decrement the stream count of the global consumer data. */ + assert(consumer_data.stream_count > 0); + consumer_data.stream_count--; + } } /* diff --git a/src/common/consumer.c b/src/common/consumer.c index e3b6cd5bf..0565f54df 100644 --- a/src/common/consumer.c +++ b/src/common/consumer.c @@ -312,6 +312,25 @@ void consumer_del_channel(struct lttng_consumer_channel *channel) goto end; } + /* Empty no monitor streams list. */ + if (!channel->monitor) { + struct lttng_consumer_stream *stream, *stmp; + + /* + * So, these streams are not visible to any data thread. This is why we + * close and free them because they were never added to any data + * structure apart from this one. + */ + cds_list_for_each_entry_safe(stream, stmp, + &channel->stream_no_monitor_list.head, no_monitor_node) { + cds_list_del(&stream->no_monitor_node); + /* Close everything in that stream. */ + consumer_stream_close(stream); + /* Free the ressource. */ + consumer_stream_free(stream); + } + } + rcu_read_lock(); iter.iter.node = &channel->node.node; ret = lttng_ht_del(consumer_data.channel_ht, &iter); @@ -509,8 +528,10 @@ struct lttng_consumer_stream *consumer_allocate_stream(uint64_t channel_key, /* Init session id node with the stream session id */ lttng_ht_node_init_u64(&stream->node_session_id, stream->session_id); - DBG3("Allocated stream %s (key %" PRIu64 ", chan_key %" PRIu64 " relayd_id %" PRIu64 ", session_id %" PRIu64, - stream->name, stream->key, channel_key, stream->net_seq_idx, stream->session_id); + DBG3("Allocated stream %s (key %" PRIu64 ", chan_key %" PRIu64 + " relayd_id %" PRIu64 ", session_id %" PRIu64, + stream->name, stream->key, channel_key, + stream->net_seq_idx, stream->session_id); rcu_read_unlock(); return stream; @@ -769,6 +790,33 @@ struct lttng_consumer_channel *consumer_allocate_channel(uint64_t key, channel->tracefile_count = tracefile_count; channel->monitor = monitor; + /* + * In monitor mode, the streams associated with the channel will be put in + * a special list ONLY owned by this channel. So, the refcount is set to 1 + * here meaning that the channel itself has streams that are referenced. + * + * On a channel deletion, once the channel is no longer visible, the + * refcount is decremented and checked for a zero value to delete it. With + * streams in no monitor mode, it will now be safe to destroy the channel. + */ + if (!channel->monitor) { + channel->refcount = 1; + } + + switch (output) { + case LTTNG_EVENT_SPLICE: + channel->output = CONSUMER_CHANNEL_SPLICE; + break; + case LTTNG_EVENT_MMAP: + channel->output = CONSUMER_CHANNEL_MMAP; + break; + default: + ERR("Allocate channel output unknown %d", output); + free(channel); + channel = NULL; + goto end; + } + strncpy(channel->pathname, pathname, sizeof(channel->pathname)); channel->pathname[sizeof(channel->pathname) - 1] = '\0'; @@ -780,6 +828,7 @@ struct lttng_consumer_channel *consumer_allocate_channel(uint64_t key, channel->wait_fd = -1; CDS_INIT_LIST_HEAD(&channel->streams.head); + CDS_INIT_LIST_HEAD(&channel->stream_no_monitor_list.head); DBG("Allocated channel (key %" PRIu64 ")", channel->key) @@ -1785,7 +1834,6 @@ void consumer_del_metadata_stream(struct lttng_consumer_stream *stream, PERROR("munmap metadata stream"); } } - if (stream->wait_fd >= 0) { ret = close(stream->wait_fd); if (ret < 0) { diff --git a/src/common/consumer.h b/src/common/consumer.h index 9fadc5a06..f804b7260 100644 --- a/src/common/consumer.h +++ b/src/common/consumer.h @@ -134,6 +134,13 @@ struct lttng_consumer_channel { * LTTNG_CONSUMER_GET_CHANNEL. */ struct stream_list streams; + + /* + * List of streams in no monitor mode for this channel. Used ONLY for + * snapshots recording. + */ + struct stream_list stream_no_monitor_list; + /* * Set if the channel is metadata. We keep a reference to the stream * because we have to flush data once pushed by the session daemon. For a @@ -253,6 +260,9 @@ struct lttng_consumer_stream { /* On-disk circular buffer */ uint64_t tracefile_size_current; uint64_t tracefile_count_current; + + /* Node for the no monitor stream list in a channel. */ + struct cds_list_head no_monitor_node; }; /* diff --git a/src/common/kernel-consumer/kernel-consumer.c b/src/common/kernel-consumer/kernel-consumer.c index 86428f066..29ef3a4ce 100644 --- a/src/common/kernel-consumer/kernel-consumer.c +++ b/src/common/kernel-consumer/kernel-consumer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "kernel-consumer.h" @@ -83,6 +84,321 @@ int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream, return ret; } +/* + * Get the consumerd position + * + * Returns 0 on success, < 0 on error + */ +int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream, + unsigned long *pos) +{ + int ret; + int infd = stream->wait_fd; + + ret = kernctl_snapshot_get_consumed(infd, pos); + if (ret != 0) { + errno = -ret; + perror("kernctl_snapshot_get_consumed"); + } + + return ret; +} + +/* + * Find a relayd and send the stream + * + * Returns 0 on success, < 0 on error + */ +static +int send_relayd_stream(struct lttng_consumer_stream *stream, char *path) +{ + struct consumer_relayd_sock_pair *relayd; + int ret = 0; + char *stream_path; + + if (path != NULL) { + stream_path = path; + } else { + stream_path = stream->chan->pathname; + } + /* The stream is not metadata. Get relayd reference if exists. */ + rcu_read_lock(); + relayd = consumer_find_relayd(stream->net_seq_idx); + if (relayd != NULL) { + /* Add stream on the relayd */ + pthread_mutex_lock(&relayd->ctrl_sock_mutex); + ret = relayd_add_stream(&relayd->control_sock, + stream->name, stream_path, + &stream->relayd_stream_id, + stream->chan->tracefile_size, + stream->chan->tracefile_count); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); + if (ret < 0) { + goto end; + } + uatomic_inc(&relayd->refcount); + } else if (stream->net_seq_idx != (uint64_t) -1ULL) { + ERR("Network sequence index %" PRIu64 " unknown. Not adding stream.", + stream->net_seq_idx); + ret = -1; + goto end; + } + +end: + rcu_read_unlock(); + return ret; +} + +/* + * Find a relayd and close the stream + */ +static +void close_relayd_stream(struct lttng_consumer_stream *stream) +{ + struct consumer_relayd_sock_pair *relayd; + + /* The stream is not metadata. Get relayd reference if exists. */ + rcu_read_lock(); + relayd = consumer_find_relayd(stream->net_seq_idx); + if (relayd != NULL) { + consumer_stream_relayd_close(stream, relayd); + } + rcu_read_unlock(); +} + +/* + * Take a snapshot of all the stream of a channel + * + * Returns 0 on success, < 0 on error + */ +int lttng_kconsumer_snapshot_channel(uint64_t key, char *path, + uint64_t relayd_id, struct lttng_consumer_local_data *ctx) +{ + int ret; + unsigned long consumed_pos, produced_pos; + struct lttng_consumer_channel *channel; + struct lttng_consumer_stream *stream; + + DBG("Kernel consumer snapshot channel %lu", key); + + rcu_read_lock(); + + channel = consumer_find_channel(key); + if (!channel) { + ERR("No channel found for key %lu", key); + ret = -1; + goto end; + } + + /* Splice is not supported yet for channel snapshot. */ + if (channel->output != CONSUMER_CHANNEL_MMAP) { + ERR("Unsupported output %d", channel->output); + ret = -1; + goto end; + } + + cds_list_for_each_entry(stream, &channel->stream_no_monitor_list.head, + no_monitor_node) { + /* + * Lock stream because we are about to change its state. + */ + pthread_mutex_lock(&stream->lock); + + stream->net_seq_idx = relayd_id; + channel->relayd_id = relayd_id; + if (relayd_id != (uint64_t) -1ULL) { + ret = send_relayd_stream(stream, path); + if (ret < 0) { + ERR("sending stream to relayd"); + goto end_unlock; + } + DBG("Stream %s sent to the relayd", stream->name); + } else { + ret = utils_create_stream_file(path, stream->name, + stream->chan->tracefile_size, stream->tracefile_count_current, + stream->uid, stream->gid); + if (ret < 0) { + ERR("utils_create_stream_file"); + goto end_unlock; + } + + stream->out_fd = ret; + stream->tracefile_size_current = 0; + + DBG("Kernel consumer snapshot stream %s/%s (%lu)", path, + stream->name, stream->key); + } + + ret = kernctl_buffer_flush(stream->wait_fd); + if (ret < 0) { + ERR("Failed to flush kernel metadata stream"); + goto end_unlock; + } + + ret = lttng_kconsumer_take_snapshot(stream); + if (ret < 0) { + ERR("Taking kernel snapshot"); + goto end_unlock; + } + + ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos); + if (ret < 0) { + ERR("Produced kernel snapshot position"); + goto end_unlock; + } + + ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos); + if (ret < 0) { + ERR("Consumerd kernel snapshot position"); + goto end_unlock; + } + + if (stream->max_sb_size == 0) { + ret = kernctl_get_max_subbuf_size(stream->wait_fd, + &stream->max_sb_size); + if (ret < 0) { + ERR("Getting kernel max_sb_size"); + goto end_unlock; + } + } + + while (consumed_pos < produced_pos) { + ssize_t read_len; + unsigned long len, padded_len; + + DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos); + + ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos); + if (ret < 0) { + if (errno != EAGAIN) { + PERROR("kernctl_get_subbuf snapshot"); + goto end_unlock; + } + DBG("Kernel consumer get subbuf failed. Skipping it."); + consumed_pos += stream->max_sb_size; + continue; + } + + ret = kernctl_get_subbuf_size(stream->wait_fd, &len); + if (ret < 0) { + ERR("Snapshot kernctl_get_subbuf_size"); + goto end_unlock; + } + + ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len); + if (ret < 0) { + ERR("Snapshot kernctl_get_padded_subbuf_size"); + goto end_unlock; + } + + read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len, + padded_len - len); + /* + * We write the padded len in local tracefiles but the + * data len when using a relay. + * Display the error but continue processing to try to + * release the subbuffer. + */ + if (relayd_id != (uint64_t) -1ULL) { + if (read_len != len) { + ERR("Error sending to the relay (ret: %zd != len: %lu)", + read_len, len); + } + } else { + if (read_len != padded_len) { + ERR("Error writing to tracefile (ret: %zd != len: %lu)", + read_len, padded_len); + } + } + + ret = kernctl_put_subbuf(stream->wait_fd); + if (ret < 0) { + ERR("Snapshot kernctl_put_subbuf"); + goto end_unlock; + } + consumed_pos += stream->max_sb_size; + } + + if (relayd_id == (uint64_t) -1ULL) { + ret = close(stream->out_fd); + if (ret < 0) { + PERROR("Kernel consumer snapshot close out_fd"); + goto end_unlock; + } + stream->out_fd = -1; + } else { + close_relayd_stream(stream); + stream->net_seq_idx = (uint64_t) -1ULL; + } + pthread_mutex_unlock(&stream->lock); + } + + /* All good! */ + ret = 0; + goto end; + +end_unlock: + pthread_mutex_unlock(&stream->lock); +end: + rcu_read_unlock(); + return ret; +} + +/* + * Read the whole metadata available for a snapshot. + * + * Returns 0 on success, < 0 on error + */ +int lttng_kconsumer_snapshot_metadata(uint64_t key, char *path, + struct lttng_consumer_local_data *ctx) +{ + struct lttng_consumer_channel *metadata_channel; + struct lttng_consumer_stream *metadata_stream; + int ret; + + DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s", + key, path); + + rcu_read_lock(); + + metadata_channel = consumer_find_channel(key); + if (!metadata_channel) { + ERR("Snapshot kernel metadata channel not found for key %lu", key); + ret = -1; + goto end; + } + + metadata_stream = metadata_channel->metadata_stream; + assert(metadata_stream); + + ret = utils_create_stream_file(path, metadata_stream->name, + metadata_stream->chan->tracefile_size, + metadata_stream->tracefile_count_current, + metadata_stream->uid, metadata_stream->gid); + if (ret < 0) { + goto end; + } + metadata_stream->out_fd = ret; + + ret = 0; + while (ret >= 0) { + ret = lttng_kconsumer_read_subbuffer(metadata_stream, ctx); + if (ret < 0) { + if (ret != -EPERM) { + ERR("Kernel snapshot reading subbuffer"); + goto end; + } + /* "ret" is negative at this point so we will exit the loop. */ + continue; + } + } + + ret = 0; +end: + rcu_read_unlock(); + return ret; +} + /* * Receive command from session daemon and process it. * @@ -189,7 +505,6 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, { int fd; struct lttng_pipe *stream_pipe; - struct consumer_relayd_sock_pair *relayd = NULL; struct lttng_consumer_stream *new_stream; struct lttng_consumer_channel *channel; int alloc_ret = 0; @@ -272,12 +587,28 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, } new_stream->chan = channel; new_stream->wait_fd = fd; + switch (channel->output) { + case CONSUMER_CHANNEL_SPLICE: + new_stream->output = LTTNG_EVENT_SPLICE; + break; + case CONSUMER_CHANNEL_MMAP: + new_stream->output = LTTNG_EVENT_MMAP; + break; + default: + ERR("Stream output unknown %d", channel->output); + goto end_nosignal; + } /* * We've just assigned the channel to the stream so increment the - * refcount right now. + * refcount right now. We don't need to increment the refcount for + * streams in no monitor because we handle manually the cleanup of + * those. It is very important to make sure there is NO prior + * consumer_del_stream() calls or else the refcount will be unbalanced. */ - uatomic_inc(&new_stream->chan->refcount); + if (channel->monitor) { + uatomic_inc(&new_stream->chan->refcount); + } /* * The buffer flush is done on the session daemon side for the kernel @@ -288,24 +619,8 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, */ new_stream->hangup_flush_done = 0; - /* The stream is not metadata. Get relayd reference if exists. */ - relayd = consumer_find_relayd(new_stream->net_seq_idx); - if (relayd != NULL) { - /* Add stream on the relayd */ - pthread_mutex_lock(&relayd->ctrl_sock_mutex); - ret = relayd_add_stream(&relayd->control_sock, - new_stream->name, new_stream->chan->pathname, - &new_stream->relayd_stream_id, - new_stream->chan->tracefile_size, - new_stream->chan->tracefile_count); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); - if (ret < 0) { - consumer_del_stream(new_stream, NULL); - goto end_nosignal; - } - } else if (new_stream->net_seq_idx != (uint64_t) -1ULL) { - ERR("Network sequence index %" PRIu64 " unknown. Not adding stream.", - new_stream->net_seq_idx); + ret = send_relayd_stream(new_stream, NULL); + if (ret < 0) { consumer_del_stream(new_stream, NULL); goto end_nosignal; } @@ -318,11 +633,17 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, } } + if (new_stream->metadata_flag) { + channel->metadata_stream = new_stream; + } + /* Do not monitor this stream. */ if (!channel->monitor) { DBG("Kernel consumer add stream %s in no monitor mode with" "relayd id %" PRIu64, new_stream->name, new_stream->relayd_stream_id); + cds_list_add(&new_stream->no_monitor_node, + &channel->stream_no_monitor_list.head); break; } @@ -411,6 +732,23 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, } case LTTNG_CONSUMER_SNAPSHOT_CHANNEL: { + if (msg.u.snapshot_channel.metadata == 1) { + ret = lttng_kconsumer_snapshot_metadata(msg.u.snapshot_channel.key, + msg.u.snapshot_channel.pathname, ctx); + if (ret < 0) { + ERR("Snapshot metadata failed"); + ret_code = LTTNG_ERR_KERN_META_FAIL; + } + } else { + ret = lttng_kconsumer_snapshot_channel(msg.u.snapshot_channel.key, + msg.u.snapshot_channel.pathname, + msg.u.snapshot_channel.relayd_id, ctx); + if (ret < 0) { + ERR("Snapshot channel failed"); + ret_code = LTTNG_ERR_KERN_CHAN_FAIL; + } + } + ret = consumer_send_status_msg(sock, ret_code); if (ret < 0) { /* Somehow, the session daemon is not responding anymore. */ @@ -418,6 +756,39 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, } break; } + case LTTNG_CONSUMER_DESTROY_CHANNEL: + { + uint64_t key = msg.u.destroy_channel.key; + struct lttng_consumer_channel *channel; + + channel = consumer_find_channel(key); + if (!channel) { + ERR("Kernel consumer destroy channel %" PRIu64 " not found", key); + ret_code = LTTNG_ERR_KERN_CHAN_NOT_FOUND; + } + + ret = consumer_send_status_msg(sock, ret_code); + if (ret < 0) { + /* Somehow, the session daemon is not responding anymore. */ + goto end_nosignal; + } + + /* + * This command should ONLY be issued for channel with streams set in + * no monitor mode. + */ + assert(!channel->monitor); + + /* + * The refcount should ALWAYS be 0 in the case of a channel in no + * monitor mode. + */ + assert(!uatomic_sub_return(&channel->refcount, 1)); + + consumer_del_channel(channel); + + goto end_nosignal; + } default: goto end_nosignal; } @@ -474,8 +845,7 @@ ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream, } switch (stream->chan->output) { - case LTTNG_EVENT_SPLICE: - + case CONSUMER_CHANNEL_SPLICE: /* * XXX: The lttng-modules splice "actor" does not handle copying * partial pages hence only using the subbuffer size without the @@ -500,7 +870,7 @@ ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream, ret, subbuf_size); } break; - case LTTNG_EVENT_MMAP: + case CONSUMER_CHANNEL_MMAP: /* Get subbuffer size without padding */ err = kernctl_get_subbuf_size(infd, &subbuf_size); if (err != 0) { diff --git a/src/common/uri.c b/src/common/uri.c index e54eb387d..4af94072c 100644 --- a/src/common/uri.c +++ b/src/common/uri.c @@ -186,7 +186,7 @@ int uri_to_str_url(struct lttng_uri *uri, char *dst, size_t size) { int ipver, ret; const char *addr; - char proto[4], port[7]; + char proto[5], port[7]; assert(uri); assert(dst); diff --git a/src/common/utils.c b/src/common/utils.c index f253e797f..ecac5381f 100644 --- a/src/common/utils.c +++ b/src/common/utils.c @@ -308,7 +308,7 @@ error: * Return 0 on success or else a negative value. */ LTTNG_HIDDEN -int utils_create_stream_file(char *path_name, char *file_name, uint64_t size, +int utils_create_stream_file(const char *path_name, char *file_name, uint64_t size, uint64_t count, int uid, int gid) { int ret, out_fd, flags, mode; diff --git a/src/common/utils.h b/src/common/utils.h index 9e6fb37f0..70dc4b7aa 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -34,7 +34,7 @@ char *utils_strdupdelim(const char *begin, const char *end); int utils_set_fd_cloexec(int fd); int utils_create_pid_file(pid_t pid, const char *filepath); int utils_mkdir_recursive(const char *path, mode_t mode); -int utils_create_stream_file(char *path_name, char *file_name, uint64_t size, +int utils_create_stream_file(const char *path_name, char *file_name, uint64_t size, uint64_t count, int uid, int gid); int utils_rotate_stream_file(char *path_name, char *file_name, uint64_t size, uint64_t count, int uid, int gid, int out_fd, uint64_t *new_count); diff --git a/tests/regression/tools/Makefile.am b/tests/regression/tools/Makefile.am index 1b256d78e..83c93420a 100644 --- a/tests/regression/tools/Makefile.am +++ b/tests/regression/tools/Makefile.am @@ -1 +1 @@ -SUBDIRS = streaming filtering health tracefile-limits +SUBDIRS = streaming filtering health tracefile-limits snapshots diff --git a/tests/regression/tools/snapshots/Makefile.am b/tests/regression/tools/snapshots/Makefile.am new file mode 100644 index 000000000..dd97cf6d3 --- /dev/null +++ b/tests/regression/tools/snapshots/Makefile.am @@ -0,0 +1,2 @@ +noinst_SCRIPTS = test_kernel +EXTRA_DIST = test_kernel diff --git a/tests/regression/tools/snapshots/test_kernel b/tests/regression/tools/snapshots/test_kernel new file mode 100755 index 000000000..6910b0e62 --- /dev/null +++ b/tests/regression/tools/snapshots/test_kernel @@ -0,0 +1,173 @@ +#!/bin/bash +# +# Copyright (C) - 2013 Julien Desfossez +# +# This library is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; version 2.1 of the License. +# +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this library; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +TEST_DESC="Snapshots - Kernel tracing" + +CURDIR=$(dirname $0)/ +TESTDIR=$CURDIR/../../.. +EVENT_NAME="sched_switch" +PID_RELAYD=0 +SESSION_NAME="" +CHANNEL_NAME="snapchan" + +TRACE_PATH=$(mktemp -d) + +NUM_TESTS=2040 + +source $TESTDIR/utils/utils.sh + +# LTTng kernel modules check +out=`ls /lib/modules/$(uname -r)/extra | grep lttng` +if [ -z "$out" ]; then + BAIL_OUT "LTTng modules not detected." +fi + +function test_kernel_local_snapshot () +{ + diag "Test local kernel snapshots" + create_lttng_session_no_output $SESSION_NAME + enable_lttng_mmap_overwrite_kernel_channel $SESSION_NAME $CHANNEL_NAME + lttng_enable_kernel_event $SESSION_NAME $EVENT_NAME $CHANNEL_NAME + start_lttng_tracing $SESSION_NAME + lttng_snapshot_add_output $SESSION_NAME $TRACE_PATH + lttng_snapshot_record $SESSION_NAME + stop_lttng_tracing $SESSION_NAME + destroy_lttng_session $SESSION_NAME + + # Validate test + validate_trace $EVENT_NAME $TRACE_PATH/snapshot/ + if [ $? -eq 0 ]; then + # Only delete if successful + rm -rf $TRACE_PATH + else + break + fi +} + +function test_kernel_local_snapshot_after_stop () +{ + diag "Test local kernel snapshots after stop" + create_lttng_session_no_output $SESSION_NAME + enable_lttng_mmap_overwrite_kernel_channel $SESSION_NAME $CHANNEL_NAME + lttng_enable_kernel_event $SESSION_NAME $EVENT_NAME $CHANNEL_NAME + start_lttng_tracing $SESSION_NAME + stop_lttng_tracing $SESSION_NAME + lttng_snapshot_add_output $SESSION_NAME $TRACE_PATH + lttng_snapshot_record $SESSION_NAME + destroy_lttng_session $SESSION_NAME + + # Validate test + validate_trace $EVENT_NAME $TRACE_PATH/snapshot/ + if [ $? -eq 0 ]; then + # Only delete if successful + rm -rf $TRACE_PATH + else + break + fi +} + +function test_kernel_local_snapshot_append_to_metadata () +{ + EVENT1=sched_switch + EVENT2=sched_process_exec + + diag "Test local kernel snapshots with one event $EVENT1" + create_lttng_session_no_output $SESSION_NAME + enable_lttng_mmap_overwrite_kernel_channel $SESSION_NAME $CHANNEL_NAME + lttng_enable_kernel_event $SESSION_NAME $EVENT1 $CHANNEL_NAME + start_lttng_tracing $SESSION_NAME + lttng_snapshot_add_output $SESSION_NAME $TRACE_PATH + + # first snapshot with only 1 event + lttng_snapshot_record $SESSION_NAME + validate_trace $EVENT_NAME $TRACE_PATH/snapshot/ + if [ $? -eq 0 ]; then + # Only delete if successful + rm -rf $TRACE_PATH + else + break + fi + + diag "Adding event $EVENT2" + # second snapshot with 2 events + lttng_enable_kernel_event $SESSION_NAME $EVENT2 $CHANNEL_NAME + rm -rf $TRACE_PATH/snapshot/* 2>/dev/null + lttng_snapshot_record $SESSION_NAME + validate_trace "${EVENT1},${EVENT2}" $TRACE_PATH/snapshot/ + if [ $? -eq 0 ]; then + # Only delete if successful + rm -rf $TRACE_PATH + else + break + fi + + stop_lttng_tracing $SESSION_NAME + destroy_lttng_session $SESSION_NAME +} + +function test_kernel_1000_local_snapshots () +{ + NB_SNAP=1000 + + diag "Test local kernel snapshots" + create_lttng_session_no_output $SESSION_NAME + enable_lttng_mmap_overwrite_kernel_channel $SESSION_NAME $CHANNEL_NAME + lttng_enable_kernel_event $SESSION_NAME $EVENT_NAME $CHANNEL_NAME + start_lttng_tracing $SESSION_NAME + lttng_snapshot_add_output $SESSION_NAME $TRACE_PATH + for i in $(seq 1 $NB_SNAP); do + diag "Snapshot $i/$NB_SNAP" + rm -rf $TRACE_PATH/snapshot/* 2>/dev/null + lttng_snapshot_record $SESSION_NAME + # Validate test + validate_trace $EVENT_NAME $TRACE_PATH/snapshot/ + if [ $? -eq 0 ]; then + # Only delete if successful + rm -rf $TRACE_PATH + else + break + fi + done + stop_lttng_tracing $SESSION_NAME + destroy_lttng_session $SESSION_NAME +} + +plan_tests $NUM_TESTS + +print_test_banner "$TEST_DESC" + +if [ "$(id -u)" == "0" ]; then + isroot=1 +else + isroot=0 +fi + +skip $isroot "Root access is needed. Skipping all kernel snapshot tests." $NUM_TESTS || +{ + start_lttng_sessiond + + #tests=( test_kernel_1000_local_snapshots ) + tests=( test_kernel_local_snapshot test_kernel_local_snapshot_after_stop test_kernel_local_snapshot_append_to_metadata test_kernel_1000_local_snapshots ) + + for fct_test in ${tests[@]}; + do + SESSION_NAME=$(randstring 16 0) + ${fct_test} + + done + + stop_lttng_sessiond +} diff --git a/tests/root_regression b/tests/root_regression index 2ae884d38..bb13b4ea5 100644 --- a/tests/root_regression +++ b/tests/root_regression @@ -2,3 +2,4 @@ regression/kernel/test_all_events regression/kernel/test_event_basic regression/tools/streaming/test_high_throughput_limits regression/tools/streaming/test_kernel +regression/tools/snapshots/test_kernel diff --git a/tests/utils/utils.sh b/tests/utils/utils.sh index 6d72a068f..f73b1764c 100644 --- a/tests/utils/utils.sh +++ b/tests/utils/utils.sh @@ -82,13 +82,21 @@ function lttng_enable_kernel_event { sess_name=$1 event_name=$2 + channel_name=$3 if [ -z $event_name ]; then # Enable all event if no event name specified event_name="-a" fi - $TESTDIR/../src/bin/lttng/$LTTNG_BIN enable-event "$event_name" -s $sess_name -k >/dev/null 2>&1 + if [ -z $channel_name ]; then + # default channel if none specified + chan="" + else + chan="-c $channel_name" + fi + + $TESTDIR/../src/bin/lttng/$LTTNG_BIN enable-event "$event_name" $chan -s $sess_name -k >/dev/null 2>&1 ok $? "Enable kernel event $event_name for session $sess_name" } @@ -181,6 +189,14 @@ function stop_lttng_sessiond () fi } +function create_lttng_session_no_output () +{ + sess_name=$1 + + $TESTDIR/../src/bin/lttng/$LTTNG_BIN create $sess_name --no-output >/dev/null 2>&1 + ok $? "Create session $sess_name in no-output mode" +} + function create_lttng_session () { sess_name=$1 @@ -208,6 +224,15 @@ function disable_ust_lttng_channel() ok $? "Disable channel $channel_name for session $sess_name" } +function enable_lttng_mmap_overwrite_kernel_channel() +{ + sess_name=$1 + channel_name=$2 + + $TESTDIR/../src/bin/lttng/$LTTNG_BIN enable-channel -s $sess_name $channel_name -k --output mmap --overwrite >/dev/null 2>&1 + ok $? "Enable channel $channel_name for session $sess_name" +} + function enable_ust_lttng_event () { sess_name=$1 @@ -281,6 +306,24 @@ function destroy_lttng_session () ok $? "Destroy lttng session $sess_name" } +function lttng_snapshot_add_output () +{ + sess_name=$1 + trace_path=$2 + + $TESTDIR/../src/bin/lttng/$LTTNG_BIN snapshot add-output -s $sess_name file://$trace_path >/dev/null 2>&1 + ok $? "Added snapshot output file://$trace_path" +} + +function lttng_snapshot_record () +{ + sess_name=$1 + trace_path=$2 + + $TESTDIR/../src/bin/lttng/$LTTNG_BIN snapshot record -s $sess_name >/dev/null 2>&1 + ok $? "Snapshot recorded" +} + function trace_matches () { event_name=$1 @@ -310,11 +353,18 @@ function validate_trace skip 0 "Babeltrace binary not found. Skipping trace validation" fi - traced=$($BABELTRACE_BIN $trace_path 2>/dev/null | grep $event_name | wc -l) - if [ "$traced" -ne 0 ]; then - pass "Validate trace for event $event_name" - else - fail "Validate trace for event $event_name" - diag "Found $traced occurences of $event_name" - fi + OLDIFS=$IFS + IFS="," + for i in $event_name; do + traced=$($BABELTRACE_BIN $trace_path 2>/dev/null | grep $i | wc -l) + if [ "$traced" -ne 0 ]; then + pass "Validate trace for event $i" + else + fail "Validate trace for event $i" + diag "Found $traced occurences of $i" + fi + done + ret=$? + IFS=$OLDIFS + return $ret } -- 2.34.1