X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fcommon%2Fconsumer%2Fconsumer.c;h=20f4deb29bf9f61cfbea4b64b3373605948ff357;hp=99514280a7fd58932bc73b425a3fefdd76e2bced;hb=b1085ea8f4d95b74bbdcfa746c546f8ca8c9af84;hpb=12bddd1de496241f07cbdf729aee6c9a34cb1fa3 diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index 99514280a..20f4deb29 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -877,6 +877,43 @@ error: return outfd; } +/* + * Write a character on the metadata poll pipe to wake the metadata thread. + * Returns 0 on success, -1 on error. + */ +int consumer_metadata_wakeup_pipe(const struct lttng_consumer_channel *channel) +{ + int ret = 0; + + DBG("Waking up metadata poll thread (writing to pipe): channel name = '%s'", + channel->name); + if (channel->monitor && channel->metadata_stream) { + const char dummy = 'c'; + const ssize_t write_ret = lttng_write( + channel->metadata_stream->ust_metadata_poll_pipe[1], + &dummy, 1); + + if (write_ret < 1) { + if (errno == EWOULDBLOCK) { + /* + * This is fine, the metadata poll thread + * is having a hard time keeping-up, but + * it will eventually wake-up and consume + * the available data. + */ + ret = 0; + } else { + PERROR("Failed to write to UST metadata pipe while attempting to wake-up the metadata poll thread"); + ret = -1; + goto end; + } + } + } + +end: + return ret; +} + /* * Trigger a dump of the metadata content. Following/during the succesful * completion of this call, the metadata poll thread will start receiving @@ -3253,7 +3290,7 @@ void *consumer_thread_sessiond_poll(void *data) err = 0; /* All is OK */ goto end; } - DBG("received command on sock"); + DBG("Received command on sock"); } /* All is OK */ err = 0; @@ -3310,6 +3347,29 @@ error_testpoint: return NULL; } +static int post_consume(struct lttng_consumer_stream *stream, + const struct stream_subbuffer *subbuffer, + struct lttng_consumer_local_data *ctx) +{ + size_t i; + int ret = 0; + const size_t count = lttng_dynamic_array_get_count( + &stream->read_subbuffer_ops.post_consume_cbs); + + for (i = 0; i < count; i++) { + const post_consume_cb op = *(post_consume_cb *) lttng_dynamic_array_get_element( + &stream->read_subbuffer_ops.post_consume_cbs, + i); + + ret = op(stream, subbuffer, ctx); + if (ret) { + goto end; + } + } +end: + return ret; +} + ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, struct lttng_consumer_local_data *ctx, bool locked_by_caller) @@ -3360,24 +3420,10 @@ ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, written_bytes = stream->read_subbuffer_ops.consume_subbuffer( ctx, stream, &subbuffer); - /* - * Should write subbuf_size amount of data when network streaming or - * the full padded size when we are not streaming. - */ - if ((written_bytes != subbuffer.info.data.subbuf_size && - stream->net_seq_idx != (uint64_t) -1ULL) || - (written_bytes != subbuffer.info.data.padded_subbuf_size && - stream->net_seq_idx == - (uint64_t) -1ULL)) { - /* - * Display the error but continue processing to try to - * release the subbuffer. This is a DBG statement - * since this can happen without being a critical - * error. - */ - DBG("Failed to write to tracefile (written_bytes: %zd != padded subbuffer size: %lu, subbuffer size: %lu)", - written_bytes, subbuffer.info.data.subbuf_size, - subbuffer.info.data.padded_subbuf_size); + if (written_bytes <= 0) { + ERR("Error consuming subbuffer: (%zd)", written_bytes); + ret = (int) written_bytes; + goto error_put_subbuf; } ret = stream->read_subbuffer_ops.put_next_subbuffer(stream, &subbuffer); @@ -3385,11 +3431,9 @@ ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, goto end; } - if (stream->read_subbuffer_ops.post_consume) { - ret = stream->read_subbuffer_ops.post_consume(stream, &subbuffer, ctx); - if (ret) { - goto end; - } + ret = post_consume(stream, &subbuffer, ctx); + if (ret) { + goto end; } /* @@ -3407,6 +3451,7 @@ ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream, ERR("Stream rotation error after consuming data"); goto end; } + } else if (rotation_ret < 0) { ret = rotation_ret; ERR("Failed to check if stream was ready to rotate after consuming data"); @@ -3901,44 +3946,30 @@ unsigned long consumer_get_consume_start_pos(unsigned long consumed_pos, return start_pos; } -static -int consumer_flush_buffer(struct lttng_consumer_stream *stream, int producer_active) +/* Stream lock must be held by the caller. */ +static int sample_stream_positions(struct lttng_consumer_stream *stream, + unsigned long *produced, unsigned long *consumed) { - int ret = 0; + int ret; - switch (consumer_data.type) { - case LTTNG_CONSUMER_KERNEL: - if (producer_active) { - ret = kernctl_buffer_flush(stream->wait_fd); - if (ret < 0) { - ERR("Failed to flush kernel stream"); - goto end; - } - } else { - ret = kernctl_buffer_flush_empty(stream->wait_fd); - if (ret < 0) { - /* - * Doing a buffer flush which does not take into - * account empty packets. This is not perfect, - * but required as a fall-back when - * "flush_empty" is not implemented by - * lttng-modules. - */ - ret = kernctl_buffer_flush(stream->wait_fd); - if (ret < 0) { - ERR("Failed to flush kernel stream"); - goto end; - } - } - } - break; - case LTTNG_CONSUMER32_UST: - case LTTNG_CONSUMER64_UST: - lttng_ustconsumer_flush_buffer(stream, producer_active); - break; - default: - ERR("Unknown consumer_data type"); - abort(); + ASSERT_LOCKED(stream->lock); + + ret = lttng_consumer_sample_snapshot_positions(stream); + if (ret < 0) { + ERR("Failed to sample snapshot positions"); + goto end; + } + + ret = lttng_consumer_get_produced_snapshot(stream, produced); + if (ret < 0) { + ERR("Failed to sample produced position"); + goto end; + } + + ret = lttng_consumer_get_consumed_snapshot(stream, consumed); + if (ret < 0) { + ERR("Failed to sample consumed position"); + goto end; } end: @@ -3968,11 +3999,15 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, const bool is_local_trace = relayd_id == -1ULL; struct consumer_relayd_sock_pair *relayd = NULL; bool rotating_to_new_chunk = true; + /* Array of `struct lttng_consumer_stream *` */ + struct lttng_dynamic_pointer_array streams_packet_to_open; + size_t stream_idx; DBG("Consumer sample rotate position for channel %" PRIu64, key); lttng_dynamic_array_init(&stream_rotation_positions, sizeof(struct relayd_stream_rotation_position), NULL); + lttng_dynamic_pointer_array_init(&streams_packet_to_open, NULL); rcu_read_lock(); @@ -4003,18 +4038,96 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, } /* - * Do not flush an empty packet when rotating from a NULL trace + * Do not flush a packet when rotating from a NULL trace * chunk. The stream has no means to output data, and the prior - * rotation which rotated to NULL performed that side-effect already. + * rotation which rotated to NULL performed that side-effect + * already. No new data can be produced when a stream has no + * associated trace chunk (e.g. a stop followed by a rotate). */ if (stream->trace_chunk) { + bool flush_active; + + if (stream->metadata_flag) { + /* + * Don't produce an empty metadata packet, + * simply close the current one. + * + * Metadata is regenerated on every trace chunk + * switch; there is no concern that no data was + * produced. + */ + flush_active = true; + } else { + /* + * Only flush an empty packet if the "packet + * open" could not be performed on transition + * to a new trace chunk and no packets were + * consumed within the chunk's lifetime. + */ + if (stream->opened_packet_in_current_trace_chunk) { + flush_active = true; + } else { + /* + * Stream could have been full at the + * time of rotation, but then have had + * no activity at all. + * + * It is important to flush a packet + * to prevent 0-length files from being + * produced as most viewers choke on + * them. + * + * Unfortunately viewers will not be + * able to know that tracing was active + * for this stream during this trace + * chunk's lifetime. + */ + ret = sample_stream_positions(stream, &produced_pos, &consumed_pos); + if (ret) { + goto end_unlock_stream; + } + + /* + * Don't flush an empty packet if data + * was produced; it will be consumed + * before the rotation completes. + */ + flush_active = produced_pos != consumed_pos; + if (!flush_active) { + const char *trace_chunk_name; + uint64_t trace_chunk_id; + + chunk_status = lttng_trace_chunk_get_name( + stream->trace_chunk, + &trace_chunk_name, + NULL); + if (chunk_status == LTTNG_TRACE_CHUNK_STATUS_NONE) { + trace_chunk_name = "none"; + } + + /* + * Consumer trace chunks are + * never anonymous. + */ + chunk_status = lttng_trace_chunk_get_id( + stream->trace_chunk, + &trace_chunk_id); + assert(chunk_status == + LTTNG_TRACE_CHUNK_STATUS_OK); + + DBG("Unable to open packet for stream during trace chunk's lifetime. " + "Flushing an empty packet to prevent an empty file from being created: " + "stream id = %" PRIu64 ", trace chunk name = `%s`, trace chunk id = %" PRIu64, + stream->key, trace_chunk_name, trace_chunk_id); + } + } + } + /* - * For metadata stream, do an active flush, which does not - * produce empty packets. For data streams, empty-flush; - * ensures we have at least one packet in each stream per trace - * chunk, even if no data was produced. + * Close the current packet before sampling the + * ring buffer positions. */ - ret = consumer_flush_buffer(stream, stream->metadata_flag ? 1 : 0); + ret = consumer_stream_flush_buffer(stream, flush_active); if (ret < 0) { ERR("Failed to flush stream %" PRIu64 " during channel rotation", stream->key); @@ -4102,36 +4215,135 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, } stream_count++; } + + stream->opened_packet_in_current_trace_chunk = false; + + if (rotating_to_new_chunk && !stream->metadata_flag) { + /* + * Attempt to flush an empty packet as close to the + * rotation point as possible. In the event where a + * stream remains inactive after the rotation point, + * this ensures that the new trace chunk has a + * beginning timestamp set at the begining of the + * trace chunk instead of only creating an empty + * packet when the trace chunk is stopped. + * + * This indicates to the viewers that the stream + * was being recorded, but more importantly it + * allows viewers to determine a useable trace + * intersection. + * + * This presents a problem in the case where the + * ring-buffer is completely full. + * + * Consider the following scenario: + * - The consumption of data is slow (slow network, + * for instance), + * - The ring buffer is full, + * - A rotation is initiated, + * - The flush below does nothing (no space left to + * open a new packet), + * - The other streams rotate very soon, and new + * data is produced in the new chunk, + * - This stream completes its rotation long after the + * rotation was initiated + * - The session is stopped before any event can be + * produced in this stream's buffers. + * + * The resulting trace chunk will have a single packet + * temporaly at the end of the trace chunk for this + * stream making the stream intersection more narrow + * than it should be. + * + * To work-around this, an empty flush is performed + * after the first consumption of a packet during a + * rotation if open_packet fails. The idea is that + * consuming a packet frees enough space to switch + * packets in this scenario and allows the tracer to + * "stamp" the beginning of the new trace chunk at the + * earliest possible point. + * + * The packet open is performed after the channel + * rotation to ensure that no attempt to open a packet + * is performed in a stream that has no active trace + * chunk. + */ + ret = lttng_dynamic_pointer_array_add_pointer( + &streams_packet_to_open, stream); + if (ret) { + PERROR("Failed to add a stream pointer to array of streams in which to open a packet"); + ret = -1; + goto end_unlock_stream; + } + } + pthread_mutex_unlock(&stream->lock); } stream = NULL; - pthread_mutex_unlock(&channel->lock); - if (is_local_trace) { - ret = 0; - goto end; - } + if (!is_local_trace) { + relayd = consumer_find_relayd(relayd_id); + if (!relayd) { + ERR("Failed to find relayd %" PRIu64, relayd_id); + ret = -1; + goto end_unlock_channel; + } - relayd = consumer_find_relayd(relayd_id); - if (!relayd) { - ERR("Failed to find relayd %" PRIu64, relayd_id); - ret = -1; - goto end; + pthread_mutex_lock(&relayd->ctrl_sock_mutex); + ret = relayd_rotate_streams(&relayd->control_sock, stream_count, + rotating_to_new_chunk ? &next_chunk_id : NULL, + (const struct relayd_stream_rotation_position *) + stream_rotation_positions.buffer + .data); + pthread_mutex_unlock(&relayd->ctrl_sock_mutex); + if (ret < 0) { + ERR("Relayd rotate stream failed. Cleaning up relayd %" PRIu64, + relayd->net_seq_idx); + lttng_consumer_cleanup_relayd(relayd); + goto end_unlock_channel; + } } - pthread_mutex_lock(&relayd->ctrl_sock_mutex); - ret = relayd_rotate_streams(&relayd->control_sock, stream_count, - rotating_to_new_chunk ? &next_chunk_id : NULL, - (const struct relayd_stream_rotation_position *) - stream_rotation_positions.buffer.data); - pthread_mutex_unlock(&relayd->ctrl_sock_mutex); - if (ret < 0) { - ERR("Relayd rotate stream failed. Cleaning up relayd %" PRIu64, - relayd->net_seq_idx); - lttng_consumer_cleanup_relayd(relayd); - goto end; + for (stream_idx = 0; + stream_idx < lttng_dynamic_pointer_array_get_count( + &streams_packet_to_open); + stream_idx++) { + enum consumer_stream_open_packet_status status; + + stream = lttng_dynamic_pointer_array_get_pointer( + &streams_packet_to_open, stream_idx); + + pthread_mutex_lock(&stream->lock); + status = consumer_stream_open_packet(stream); + pthread_mutex_unlock(&stream->lock); + switch (status) { + case CONSUMER_STREAM_OPEN_PACKET_STATUS_OPENED: + DBG("Opened a packet after a rotation: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case CONSUMER_STREAM_OPEN_PACKET_STATUS_NO_SPACE: + /* + * Can't open a packet as there is no space left + * in the buffer. A new packet will be opened + * once one has been consumed. + */ + DBG("No space left to open a packet after a rotation: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case CONSUMER_STREAM_OPEN_PACKET_STATUS_ERROR: + /* Logged by callee. */ + ret = -1; + goto end_unlock_channel; + default: + abort(); + } } + pthread_mutex_unlock(&channel->lock); ret = 0; goto end; @@ -4142,6 +4354,7 @@ end_unlock_channel: end: rcu_read_unlock(); lttng_dynamic_array_reset(&stream_rotation_positions); + lttng_dynamic_pointer_array_reset(&streams_packet_to_open); return ret; } @@ -4200,7 +4413,7 @@ int consumer_clear_stream(struct lttng_consumer_stream *stream) { int ret; - ret = consumer_flush_buffer(stream, 1); + ret = consumer_stream_flush_buffer(stream, 1); if (ret < 0) { ERR("Failed to flush stream %" PRIu64 " during channel clear", stream->key); @@ -4529,9 +4742,9 @@ enum lttcomm_return_code lttng_consumer_create_trace_chunk( } else { relayd_id_str = "(formatting error)"; } - } + } - /* Local protocol error. */ + /* Local protocol error. */ assert(chunk_creation_timestamp); ret = time_to_iso8601_str(chunk_creation_timestamp, creation_timestamp_buffer, @@ -4840,7 +5053,7 @@ enum lttcomm_return_code lttng_consumer_trace_chunk_exists( } else { relayd_id_str = "(formatting error)"; } - } + } DBG("Consumer trace chunk exists command: relayd_id = %s" ", chunk_id = %" PRIu64, relayd_id_str, @@ -4957,3 +5170,70 @@ int lttng_consumer_clear_channel(struct lttng_consumer_channel *channel) end: return ret; } + +enum lttcomm_return_code lttng_consumer_open_channel_packets( + struct lttng_consumer_channel *channel) +{ + struct lttng_consumer_stream *stream; + enum lttcomm_return_code ret = LTTCOMM_CONSUMERD_SUCCESS; + + if (channel->metadata_stream) { + ERR("Open channel packets command attempted on a metadata channel"); + ret = LTTCOMM_CONSUMERD_INVALID_PARAMETERS; + goto end; + } + + rcu_read_lock(); + cds_list_for_each_entry(stream, &channel->streams.head, send_node) { + enum consumer_stream_open_packet_status status; + + pthread_mutex_lock(&stream->lock); + if (cds_lfht_is_node_deleted(&stream->node.node)) { + goto next; + } + + status = consumer_stream_open_packet(stream); + switch (status) { + case CONSUMER_STREAM_OPEN_PACKET_STATUS_OPENED: + DBG("Opened a packet in \"open channel packets\" command: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + stream->opened_packet_in_current_trace_chunk = true; + break; + case CONSUMER_STREAM_OPEN_PACKET_STATUS_NO_SPACE: + DBG("No space left to open a packet in \"open channel packets\" command: stream id = %" PRIu64 + ", channel name = %s, session id = %" PRIu64, + stream->key, stream->chan->name, + stream->chan->session_id); + break; + case CONSUMER_STREAM_OPEN_PACKET_STATUS_ERROR: + /* + * Only unexpected internal errors can lead to this + * failing. Report an unknown error. + */ + ERR("Failed to flush empty buffer in \"open channel packets\" command: stream id = %" PRIu64 + ", channel id = %" PRIu64 + ", channel name = %s" + ", session id = %" PRIu64, + stream->key, channel->key, + channel->name, channel->session_id); + ret = LTTCOMM_CONSUMERD_UNKNOWN_ERROR; + goto error_unlock; + default: + abort(); + } + + next: + pthread_mutex_unlock(&stream->lock); + } + +end_rcu_unlock: + rcu_read_unlock(); +end: + return ret; + +error_unlock: + pthread_mutex_unlock(&stream->lock); + goto end_rcu_unlock; +}