X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fcommon%2Fust-consumer%2Fust-consumer.c;h=780a601f67e74b70b3632a6d489067d82e1359ef;hp=431b94626736d042d92db89db0774e7e941cc3e7;hb=821fffb2f13c9d5178df306f3c87edbeaf881a22;hpb=331744e34f56a5aec69b05d356d6901e67926acc diff --git a/src/common/ust-consumer/ust-consumer.c b/src/common/ust-consumer/ust-consumer.c index 431b94626..780a601f6 100644 --- a/src/common/ust-consumer/ust-consumer.c +++ b/src/common/ust-consumer/ust-consumer.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ust-consumer.h" @@ -112,13 +113,14 @@ error: */ static struct lttng_consumer_channel *allocate_channel(uint64_t session_id, const char *pathname, const char *name, uid_t uid, gid_t gid, - int relayd_id, uint64_t key, enum lttng_event_output output) + int relayd_id, uint64_t key, enum lttng_event_output output, + uint64_t tracefile_size, uint64_t tracefile_count) { assert(pathname); assert(name); return consumer_allocate_channel(key, session_id, pathname, name, uid, gid, - relayd_id, output); + relayd_id, output, tracefile_size, tracefile_count); } /* @@ -183,21 +185,21 @@ error: static int send_stream_to_thread(struct lttng_consumer_stream *stream, struct lttng_consumer_local_data *ctx) { - int ret, stream_pipe; + int ret; + struct lttng_pipe *stream_pipe; /* Get the right pipe where the stream will be sent. */ if (stream->metadata_flag) { - stream_pipe = ctx->consumer_metadata_pipe[1]; + stream_pipe = ctx->consumer_metadata_pipe; } else { - stream_pipe = ctx->consumer_data_pipe[1]; + stream_pipe = ctx->consumer_data_pipe; } - do { - ret = write(stream_pipe, &stream, sizeof(stream)); - } while (ret < 0 && errno == EINTR); + ret = lttng_pipe_write(stream_pipe, &stream, sizeof(stream)); if (ret < 0) { - PERROR("Consumer write %s stream to pipe %d", - stream->metadata_flag ? "metadata" : "data", stream_pipe); + ERR("Consumer write %s stream to pipe %d", + stream->metadata_flag ? "metadata" : "data", + lttng_pipe_get_writefd(stream_pipe)); } return ret; @@ -221,7 +223,9 @@ static int send_stream_to_relayd(struct lttng_consumer_stream *stream) pthread_mutex_lock(&relayd->ctrl_sock_mutex); /* Add stream on the relayd */ ret = relayd_add_stream(&relayd->control_sock, stream->name, - stream->chan->pathname, &stream->relayd_stream_id); + stream->chan->pathname, &stream->relayd_stream_id, + stream->chan->tracefile_size, + stream->chan->tracefile_count); pthread_mutex_unlock(&relayd->ctrl_sock_mutex); if (ret < 0) { goto error; @@ -274,6 +278,12 @@ static int create_ust_streams(struct lttng_consumer_channel *channel, */ stream->wait_fd = wait_fd; + /* + * Increment channel refcount since the channel reference has now been + * assigned in the allocation process above. + */ + uatomic_inc(&stream->chan->refcount); + /* * Order is important this is why a list is used. On error, the caller * should clean this list. @@ -481,10 +491,6 @@ static int ask_channel(struct lttng_consumer_local_data *ctx, int sock, channel->wait_fd = ustctl_channel_get_wait_fd(channel->uchan); - if (ret < 0) { - goto error; - } - /* Open all streams for this channel. */ ret = create_ust_streams(channel, ctx); if (ret < 0) { @@ -548,11 +554,16 @@ int lttng_ustconsumer_push_metadata(struct lttng_consumer_channel *metadata, DBG("UST consumer writing metadata to channel %s", metadata->name); + if (!metadata->metadata_stream) { + ret = 0; + goto error; + } + assert(target_offset <= metadata->metadata_cache->max_offset); ret = ustctl_write_metadata_to_channel(metadata->uchan, metadata_str + target_offset, len); if (ret < 0) { - ERR("ustctl write metadata fail with ret %d, len %ld", ret, len); + ERR("ustctl write metadata fail with ret %d, len %" PRIu64, ret, len); goto error; } @@ -575,11 +586,12 @@ static int flush_channel(uint64_t chan_key) struct lttng_ht *ht; struct lttng_ht_iter iter; - DBG("UST consumer flush channel key %lu", chan_key); + DBG("UST consumer flush channel key %" PRIu64, chan_key); + rcu_read_lock(); channel = consumer_find_channel(chan_key); if (!channel) { - ERR("UST consumer flush channel %lu not found", chan_key); + ERR("UST consumer flush channel %" PRIu64 " not found", chan_key); ret = LTTNG_ERR_UST_CHAN_NOT_FOUND; goto error; } @@ -587,49 +599,64 @@ static int flush_channel(uint64_t chan_key) ht = consumer_data.stream_per_chan_id_ht; /* For each stream of the channel id, flush it. */ - rcu_read_lock(); cds_lfht_for_each_entry_duplicate(ht->ht, ht->hash_fct(&channel->key, lttng_ht_seed), ht->match_fct, &channel->key, &iter.iter, stream, node_channel_id.node) { ustctl_flush_buffer(stream->ustream, 1); } - rcu_read_unlock(); - error: + rcu_read_unlock(); return ret; } /* * Close metadata stream wakeup_fd using the given key to retrieve the channel. + * RCU read side lock MUST be acquired before calling this function. * * Return 0 on success else an LTTng error code. */ static int close_metadata(uint64_t chan_key) { - int ret; + int ret = 0; struct lttng_consumer_channel *channel; - DBG("UST consumer close metadata key %lu", chan_key); + DBG("UST consumer close metadata key %" PRIu64, chan_key); channel = consumer_find_channel(chan_key); if (!channel) { - ERR("UST consumer close metadata %lu not found", chan_key); + /* + * This is possible if the metadata thread has issue a delete because + * the endpoint point of the stream hung up. There is no way the + * session daemon can know about it thus use a DBG instead of an actual + * error. + */ + DBG("UST consumer close metadata %" PRIu64 " not found", chan_key); ret = LTTNG_ERR_UST_CHAN_NOT_FOUND; goto error; } - ret = ustctl_stream_close_wakeup_fd(channel->metadata_stream->ustream); - if (ret < 0) { - ERR("UST consumer unable to close fd of metadata (ret: %d)", ret); - ret = LTTCOMM_CONSUMERD_ERROR_METADATA; - goto error; + pthread_mutex_lock(&consumer_data.lock); + + if (cds_lfht_is_node_deleted(&channel->node.node)) { + goto error_unlock; } + if (channel->switch_timer_enabled == 1) { DBG("Deleting timer on metadata channel"); consumer_timer_switch_stop(channel); } - consumer_metadata_cache_destroy(channel); + if (channel->metadata_stream) { + ret = ustctl_stream_close_wakeup_fd(channel->metadata_stream->ustream); + if (ret < 0) { + ERR("UST consumer unable to close fd of metadata (ret: %d)", ret); + ret = LTTCOMM_CONSUMERD_ERROR_METADATA; + goto error_unlock; + } + } + +error_unlock: + pthread_mutex_unlock(&consumer_data.lock); error: return ret; } @@ -644,7 +671,7 @@ static int setup_metadata(struct lttng_consumer_local_data *ctx, uint64_t key) int ret; struct lttng_consumer_channel *metadata; - DBG("UST consumer setup metadata key %lu", key); + DBG("UST consumer setup metadata key %" PRIu64, key); metadata = consumer_find_channel(key); if (!metadata) { @@ -697,7 +724,7 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, int ret, ret_code = LTTNG_OK; char *metadata_str; - DBG("UST consumer push metadata key %lu of len %lu", key, len); + DBG("UST consumer push metadata key %" PRIu64 " of len %" PRIu64, key, len); metadata_str = zmalloc(len * sizeof(char)); if (!metadata_str) { @@ -714,13 +741,33 @@ int lttng_ustconsumer_recv_metadata(int sock, uint64_t key, uint64_t offset, goto end_free; } + /* + * XXX: The consumer data lock is acquired before calling metadata cache + * write which calls push metadata that MUST be protected by the consumer + * lock in order to be able to check the validity of the metadata stream of + * the channel. + * + * Note that this will be subject to change to better fine grained locking + * and ultimately try to get rid of this global consumer data lock. + */ + pthread_mutex_lock(&consumer_data.lock); + pthread_mutex_lock(&channel->metadata_cache->lock); ret = consumer_metadata_cache_write(channel, offset, len, metadata_str); if (ret < 0) { /* Unable to handle metadata. Notify session daemon. */ ret_code = LTTCOMM_CONSUMERD_ERROR_METADATA; + /* + * Skip metadata flush on write error since the offset and len might + * not have been updated which could create an infinite loop below when + * waiting for the metadata cache to be flushed. + */ + pthread_mutex_unlock(&channel->metadata_cache->lock); + pthread_mutex_unlock(&consumer_data.lock); + goto end_free; } pthread_mutex_unlock(&channel->metadata_cache->lock); + pthread_mutex_unlock(&consumer_data.lock); while (consumer_metadata_cache_flushed(channel, offset + len)) { DBG("Waiting for metadata to be flushed"); @@ -755,6 +802,9 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, * The ret value might 0 meaning an orderly shutdown but this is ok * since the caller handles this. */ + if (ret > 0) { + ret = -1; + } return ret; } if (msg.cmd_type == LTTNG_CONSUMER_STOP) { @@ -830,6 +880,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, sizeof(is_data_pending)); if (ret < 0) { DBG("Error when sending the data pending ret code: %d", ret); + goto error_fatal; } /* @@ -848,7 +899,9 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, msg.u.ask_channel.pathname, msg.u.ask_channel.name, msg.u.ask_channel.uid, msg.u.ask_channel.gid, msg.u.ask_channel.relayd_id, msg.u.ask_channel.key, - (enum lttng_event_output) msg.u.ask_channel.output); + (enum lttng_event_output) msg.u.ask_channel.output, + msg.u.ask_channel.tracefile_size, + msg.u.ask_channel.tracefile_count); if (!channel) { goto end_channel_error; } @@ -877,6 +930,12 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, case LTTNG_UST_CHAN_PER_CPU: channel->type = CONSUMER_CHANNEL_TYPE_DATA; attr.type = LTTNG_UST_CHAN_PER_CPU; + /* + * Set refcount to 1 for owner. Below, we will + * pass ownership to the + * consumer_thread_channel_poll() thread. + */ + channel->refcount = 1; break; case LTTNG_UST_CHAN_METADATA: channel->type = CONSUMER_CHANNEL_TYPE_METADATA; @@ -892,17 +951,34 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, goto end_channel_error; } + if (msg.u.ask_channel.type == LTTNG_UST_CHAN_METADATA) { + ret = consumer_metadata_cache_allocate(channel); + if (ret < 0) { + ERR("Allocating metadata cache"); + goto end_channel_error; + } + consumer_timer_switch_start(channel, attr.switch_timer_interval); + attr.switch_timer_interval = 0; + } + /* * Add the channel to the internal state AFTER all streams were created * and successfully sent to session daemon. This way, all streams must * be ready before this channel is visible to the threads. + * If add_channel succeeds, ownership of the channel is + * passed to consumer_thread_channel_poll(). */ ret = add_channel(channel, ctx); if (ret < 0) { + if (msg.u.ask_channel.type == LTTNG_UST_CHAN_METADATA) { + if (channel->switch_timer_enabled == 1) { + consumer_timer_switch_stop(channel); + } + consumer_metadata_cache_destroy(channel); + } goto end_channel_error; } - /* * Channel and streams are now created. Inform the session daemon that * everything went well and should wait to receive the channel and @@ -911,20 +987,9 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, ret = consumer_send_status_channel(sock, channel); if (ret < 0) { /* - * There is probably a problem on the socket so the poll will get - * it and clean everything up. + * There is probably a problem on the socket. */ - goto end_nosignal; - } - - if (msg.u.ask_channel.type == LTTNG_UST_CHAN_METADATA) { - ret = consumer_metadata_cache_allocate(channel); - if (ret < 0) { - ERR("Allocating metadata cache"); - goto end_channel_error; - } - consumer_timer_switch_start(channel, attr.switch_timer_interval); - attr.switch_timer_interval = 0; + goto error_fatal; } break; @@ -937,7 +1002,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, channel = consumer_find_channel(key); if (!channel) { - ERR("UST consumer get channel key %lu not found", key); + ERR("UST consumer get channel key %" PRIu64 " not found", key); ret_code = LTTNG_ERR_UST_CHAN_NOT_FOUND; goto end_msg_sessiond; } @@ -946,7 +1011,7 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, ret = consumer_send_status_msg(sock, LTTNG_OK); if (ret < 0) { /* Somehow, the session daemon is not responding anymore. */ - goto end_nosignal; + goto error_fatal; } /* Send everything to sessiond. */ @@ -984,17 +1049,13 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, case LTTNG_CONSUMER_DESTROY_CHANNEL: { uint64_t key = msg.u.destroy_channel.key; - struct lttng_consumer_channel *channel; - - channel = consumer_find_channel(key); - if (!channel) { - ERR("UST consumer get channel key %lu not found", key); - ret_code = LTTNG_ERR_UST_CHAN_NOT_FOUND; - goto end_msg_sessiond; - } - - destroy_channel(channel); + /* + * Only called if streams have not been sent to stream + * manager thread. However, channel has been sent to + * channel manager thread. + */ + notify_thread_del_channel(ctx, key); goto end_msg_sessiond; } case LTTNG_CONSUMER_CLOSE_METADATA: @@ -1027,12 +1088,14 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, uint64_t offset = msg.u.push_metadata.target_offset; struct lttng_consumer_channel *channel; - DBG("UST consumer push metadata key %lu of len %lu", key, len); + DBG("UST consumer push metadata key %" PRIu64 " of len %" PRIu64, key, + len); channel = consumer_find_channel(key); if (!channel) { - ERR("UST consumer push metadata %lu not found", key); + ERR("UST consumer push metadata %" PRIu64 " not found", key); ret_code = LTTNG_ERR_UST_CHAN_NOT_FOUND; + goto end_msg_sessiond; } /* Tell session daemon we are ready to receive the metadata. */ @@ -1044,14 +1107,14 @@ int lttng_ustconsumer_recv_cmd(struct lttng_consumer_local_data *ctx, /* Wait for more data. */ if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) { - goto end_nosignal; + goto error_fatal; } ret = lttng_ustconsumer_recv_metadata(sock, key, offset, len, channel); if (ret < 0) { /* error receiving from sessiond */ - goto end_nosignal; + goto error_fatal; } else { ret_code = ret; goto end_msg_sessiond; @@ -1086,7 +1149,10 @@ end_msg_sessiond: * the caller because the session daemon socket management is done * elsewhere. Returning a negative code or 0 will shutdown the consumer. */ - (void) consumer_send_status_msg(sock, ret_code); + ret = consumer_send_status_msg(sock, ret_code); + if (ret < 0) { + goto error_fatal; + } rcu_read_unlock(); return 1; end_channel_error: @@ -1181,6 +1247,10 @@ void lttng_ustconsumer_del_channel(struct lttng_consumer_channel *chan) assert(chan); assert(chan->uchan); + if (chan->switch_timer_enabled == 1) { + consumer_timer_switch_stop(chan); + } + consumer_metadata_cache_destroy(chan); ustctl_destroy_channel(chan->uchan); } @@ -1189,6 +1259,9 @@ void lttng_ustconsumer_del_stream(struct lttng_consumer_stream *stream) assert(stream); assert(stream->ustream); + if (stream->chan->switch_timer_enabled == 1) { + consumer_timer_switch_stop(stream->chan); + } ustctl_destroy_stream(stream->ustream); } @@ -1268,7 +1341,7 @@ int lttng_ustconsumer_read_subbuffer(struct lttng_consumer_stream *stream, * happen and it is OK with the code flow. */ DBG("Error writing to tracefile " - "(ret: %zd != len: %lu != subbuf_size: %lu)", + "(ret: %ld != len: %lu != subbuf_size: %lu)", ret, len, subbuf_size); } err = ustctl_put_next_subbuf(ustream); @@ -1280,35 +1353,25 @@ end: /* * Called when a stream is created. + * + * Return 0 on success or else a negative value. */ int lttng_ustconsumer_on_recv_stream(struct lttng_consumer_stream *stream) { int ret; - char full_path[PATH_MAX]; - - /* Opening the tracefile in write mode */ - if (stream->net_seq_idx != (uint64_t) -1ULL) { - goto end; - } - ret = snprintf(full_path, sizeof(full_path), "%s/%s", - stream->chan->pathname, stream->name); - if (ret < 0) { - PERROR("snprintf on_recv_stream"); - goto error; - } - - ret = run_as_open(full_path, O_WRONLY | O_CREAT | O_TRUNC, - S_IRWXU | S_IRWXG | S_IRWXO, stream->uid, stream->gid); - if (ret < 0) { - PERROR("open stream path %s", full_path); - goto error; + /* Don't create anything if this is set for streaming. */ + if (stream->net_seq_idx == (uint64_t) -1ULL) { + ret = utils_create_stream_file(stream->chan->pathname, stream->name, + stream->chan->tracefile_size, stream->tracefile_count_current, + stream->uid, stream->gid); + if (ret < 0) { + goto error; + } + stream->out_fd = ret; + stream->tracefile_size_current = 0; } - stream->out_fd = ret; - -end: - /* we return 0 to let the library handle the FD internally */ - return 0; + ret = 0; error: return ret; @@ -1437,7 +1500,7 @@ int lttng_ustconsumer_request_metadata(struct lttng_consumer_local_data *ctx, ret = lttcomm_recv_unix_sock(ctx->consumer_metadata_socket, &msg, sizeof(msg)); if (ret != sizeof(msg)) { - DBG("Consumer received unexpected message size %d (expects %lu)", + DBG("Consumer received unexpected message size %d (expects %zu)", ret, sizeof(msg)); lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD); /*