From 633d01821ea83388fd0c30bbc6ed7dc777cec6f1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?J=C3=A9r=C3=A9mie=20Galarneau?= Date: Fri, 9 Aug 2019 11:11:28 -0400 Subject: [PATCH 1/1] Fix: rotation of a stopped session hangs indifinitely MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The relay daemon control protocol expects a rotation position as "the sequence number of the first packet _after_ the current trace chunk. At the moment when the positions of the buffers are sampled, the production position does not necessarily sit at a packet boundary. The 'active' flush operation above will push the production position to the next packet boundary _if_ it is not already sitting at such a boundary. Assuming a current production position that is not on the bound of a packet, the 'target' sequence number is (consumed_pos / subbuffer_size) + 1 Note the '+ 1' to ensure the current packet is part of the current trace chunk. However, if the production position is already at a packet boundary, the '+ 1' is not necessary as the last packet of the current chunk is already 'complete'. In the case of a stopped session, performing the '+ 1' indiscriminately results in a position that will not be reached until another (not 'active') flush occurs. Signed-off-by: Jérémie Galarneau --- src/common/consumer/consumer.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/common/consumer/consumer.c b/src/common/consumer/consumer.c index b2f4c2686..914eda8c5 100644 --- a/src/common/consumer/consumer.c +++ b/src/common/consumer/consumer.c @@ -4073,6 +4073,10 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, stream->rotate_ready = true; } + /* + * Active flush; has no effect if the production position + * is at a packet boundary. + */ ret = consumer_flush_buffer(stream, 1); if (ret < 0) { ERR("Failed to flush stream %" PRIu64 " during channel rotation", @@ -4081,10 +4085,34 @@ int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel, } if (!is_local_trace) { + /* + * The relay daemon control protocol expects a rotation + * position as "the sequence number of the first packet + * _after_ the current trace chunk. + * + * At the moment when the positions of the buffers are + * sampled, the production position does not necessarily + * sit at a packet boundary. The 'active' flush + * operation above will push the production position to + * the next packet boundary _if_ it is not already + * sitting at such a boundary. + * + * Assuming a current production position that is not + * on the bound of a packet, the 'target' sequence + * number is + * (consumed_pos / subbuffer_size) + 1 + * Note the '+ 1' to ensure the current packet is + * part of the current trace chunk. + * + * However, if the production position is already at + * a packet boundary, the '+ 1' is not necessary as the + * last packet of the current chunk is already + * 'complete'. + */ const struct relayd_stream_rotation_position position = { .stream_id = stream->relayd_stream_id, - .rotate_at_seq_num = (stream->rotate_position / - stream->max_sb_size) + 1, + .rotate_at_seq_num = (stream->rotate_position / stream->max_sb_size) + + !!(stream->rotate_position % stream->max_sb_size), }; ret = lttng_dynamic_array_add_element( -- 2.34.1