2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include <sys/socket.h>
28 #include <sys/types.h>
34 #include <bin/lttng-consumerd/health-consumerd.h>
35 #include <common/common.h>
36 #include <common/kernel-ctl/kernel-ctl.h>
37 #include <common/sessiond-comm/sessiond-comm.h>
38 #include <common/sessiond-comm/relayd.h>
39 #include <common/compat/fcntl.h>
40 #include <common/compat/endian.h>
41 #include <common/pipe.h>
42 #include <common/relayd/relayd.h>
43 #include <common/utils.h>
44 #include <common/consumer/consumer-stream.h>
45 #include <common/index/index.h>
46 #include <common/consumer/consumer-timer.h>
47 #include <common/optional.h>
48 #include <common/buffer-view.h>
49 #include <common/consumer/consumer.h>
50 #include <common/consumer/metadata-bucket.h>
52 #include "kernel-consumer.h"
54 extern struct lttng_consumer_global_data consumer_data
;
55 extern int consumer_poll_timeout
;
58 * Take a snapshot for a specific fd
60 * Returns 0 on success, < 0 on error
62 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream
*stream
)
65 int infd
= stream
->wait_fd
;
67 ret
= kernctl_snapshot(infd
);
69 * -EAGAIN is not an error, it just means that there is no data to
72 if (ret
!= 0 && ret
!= -EAGAIN
) {
73 PERROR("Getting sub-buffer snapshot.");
80 * Sample consumed and produced positions for a specific fd.
82 * Returns 0 on success, < 0 on error.
84 int lttng_kconsumer_sample_snapshot_positions(
85 struct lttng_consumer_stream
*stream
)
89 return kernctl_snapshot_sample_positions(stream
->wait_fd
);
93 * Get the produced position
95 * Returns 0 on success, < 0 on error
97 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream
*stream
,
101 int infd
= stream
->wait_fd
;
103 ret
= kernctl_snapshot_get_produced(infd
, pos
);
105 PERROR("kernctl_snapshot_get_produced");
112 * Get the consumerd position
114 * Returns 0 on success, < 0 on error
116 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream
*stream
,
120 int infd
= stream
->wait_fd
;
122 ret
= kernctl_snapshot_get_consumed(infd
, pos
);
124 PERROR("kernctl_snapshot_get_consumed");
131 int get_current_subbuf_addr(struct lttng_consumer_stream
*stream
,
135 unsigned long mmap_offset
;
136 const char *mmap_base
= stream
->mmap_base
;
138 ret
= kernctl_get_mmap_read_offset(stream
->wait_fd
, &mmap_offset
);
140 PERROR("Failed to get mmap read offset");
144 *addr
= mmap_base
+ mmap_offset
;
150 * Take a snapshot of all the stream of a channel
151 * RCU read-side lock must be held across this function to ensure existence of
152 * channel. The channel lock must be held by the caller.
154 * Returns 0 on success, < 0 on error
156 static int lttng_kconsumer_snapshot_channel(
157 struct lttng_consumer_channel
*channel
,
158 uint64_t key
, char *path
, uint64_t relayd_id
,
159 uint64_t nb_packets_per_stream
,
160 struct lttng_consumer_local_data
*ctx
)
163 struct lttng_consumer_stream
*stream
;
165 DBG("Kernel consumer snapshot channel %" PRIu64
, key
);
169 /* Splice is not supported yet for channel snapshot. */
170 if (channel
->output
!= CONSUMER_CHANNEL_MMAP
) {
171 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
177 cds_list_for_each_entry(stream
, &channel
->streams
.head
, send_node
) {
178 unsigned long consumed_pos
, produced_pos
;
180 health_code_update();
183 * Lock stream because we are about to change its state.
185 pthread_mutex_lock(&stream
->lock
);
187 assert(channel
->trace_chunk
);
188 if (!lttng_trace_chunk_get(channel
->trace_chunk
)) {
190 * Can't happen barring an internal error as the channel
191 * holds a reference to the trace chunk.
193 ERR("Failed to acquire reference to channel's trace chunk");
197 assert(!stream
->trace_chunk
);
198 stream
->trace_chunk
= channel
->trace_chunk
;
201 * Assign the received relayd ID so we can use it for streaming. The streams
202 * are not visible to anyone so this is OK to change it.
204 stream
->net_seq_idx
= relayd_id
;
205 channel
->relayd_id
= relayd_id
;
206 if (relayd_id
!= (uint64_t) -1ULL) {
207 ret
= consumer_send_relayd_stream(stream
, path
);
209 ERR("sending stream to relayd");
213 ret
= consumer_stream_create_output_files(stream
,
218 DBG("Kernel consumer snapshot stream (%" PRIu64
")",
222 ret
= kernctl_buffer_flush_empty(stream
->wait_fd
);
225 * Doing a buffer flush which does not take into
226 * account empty packets. This is not perfect
227 * for stream intersection, but required as a
228 * fall-back when "flush_empty" is not
229 * implemented by lttng-modules.
231 ret
= kernctl_buffer_flush(stream
->wait_fd
);
233 ERR("Failed to flush kernel stream");
239 ret
= lttng_kconsumer_take_snapshot(stream
);
241 ERR("Taking kernel snapshot");
245 ret
= lttng_kconsumer_get_produced_snapshot(stream
, &produced_pos
);
247 ERR("Produced kernel snapshot position");
251 ret
= lttng_kconsumer_get_consumed_snapshot(stream
, &consumed_pos
);
253 ERR("Consumerd kernel snapshot position");
257 consumed_pos
= consumer_get_consume_start_pos(consumed_pos
,
258 produced_pos
, nb_packets_per_stream
,
259 stream
->max_sb_size
);
261 while ((long) (consumed_pos
- produced_pos
) < 0) {
263 unsigned long len
, padded_len
;
264 const char *subbuf_addr
;
265 struct lttng_buffer_view subbuf_view
;
267 health_code_update();
268 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos
);
270 ret
= kernctl_get_subbuf(stream
->wait_fd
, &consumed_pos
);
272 if (ret
!= -EAGAIN
) {
273 PERROR("kernctl_get_subbuf snapshot");
276 DBG("Kernel consumer get subbuf failed. Skipping it.");
277 consumed_pos
+= stream
->max_sb_size
;
278 stream
->chan
->lost_packets
++;
282 ret
= kernctl_get_subbuf_size(stream
->wait_fd
, &len
);
284 ERR("Snapshot kernctl_get_subbuf_size");
285 goto error_put_subbuf
;
288 ret
= kernctl_get_padded_subbuf_size(stream
->wait_fd
, &padded_len
);
290 ERR("Snapshot kernctl_get_padded_subbuf_size");
291 goto error_put_subbuf
;
294 ret
= get_current_subbuf_addr(stream
, &subbuf_addr
);
296 goto error_put_subbuf
;
299 subbuf_view
= lttng_buffer_view_init(
300 subbuf_addr
, 0, padded_len
);
301 read_len
= lttng_consumer_on_read_subbuffer_mmap(
302 stream
, &subbuf_view
,
305 * We write the padded len in local tracefiles but the data len
306 * when using a relay. Display the error but continue processing
307 * to try to release the subbuffer.
309 if (relayd_id
!= (uint64_t) -1ULL) {
310 if (read_len
!= len
) {
311 ERR("Error sending to the relay (ret: %zd != len: %lu)",
315 if (read_len
!= padded_len
) {
316 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
317 read_len
, padded_len
);
321 ret
= kernctl_put_subbuf(stream
->wait_fd
);
323 ERR("Snapshot kernctl_put_subbuf");
326 consumed_pos
+= stream
->max_sb_size
;
329 if (relayd_id
== (uint64_t) -1ULL) {
330 if (stream
->out_fd
>= 0) {
331 ret
= close(stream
->out_fd
);
333 PERROR("Kernel consumer snapshot close out_fd");
339 close_relayd_stream(stream
);
340 stream
->net_seq_idx
= (uint64_t) -1ULL;
342 lttng_trace_chunk_put(stream
->trace_chunk
);
343 stream
->trace_chunk
= NULL
;
344 pthread_mutex_unlock(&stream
->lock
);
352 ret
= kernctl_put_subbuf(stream
->wait_fd
);
354 ERR("Snapshot kernctl_put_subbuf error path");
357 pthread_mutex_unlock(&stream
->lock
);
364 * Read the whole metadata available for a snapshot.
365 * RCU read-side lock must be held across this function to ensure existence of
366 * metadata_channel. The channel lock must be held by the caller.
368 * Returns 0 on success, < 0 on error
370 static int lttng_kconsumer_snapshot_metadata(
371 struct lttng_consumer_channel
*metadata_channel
,
372 uint64_t key
, char *path
, uint64_t relayd_id
,
373 struct lttng_consumer_local_data
*ctx
)
375 int ret
, use_relayd
= 0;
377 struct lttng_consumer_stream
*metadata_stream
;
381 DBG("Kernel consumer snapshot metadata with key %" PRIu64
" at path %s",
386 metadata_stream
= metadata_channel
->metadata_stream
;
387 assert(metadata_stream
);
389 pthread_mutex_lock(&metadata_stream
->lock
);
390 assert(metadata_channel
->trace_chunk
);
391 assert(metadata_stream
->trace_chunk
);
393 /* Flag once that we have a valid relayd for the stream. */
394 if (relayd_id
!= (uint64_t) -1ULL) {
399 ret
= consumer_send_relayd_stream(metadata_stream
, path
);
404 ret
= consumer_stream_create_output_files(metadata_stream
,
412 health_code_update();
414 ret_read
= lttng_consumer_read_subbuffer(metadata_stream
, ctx
, true);
416 if (ret_read
!= -EAGAIN
) {
417 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
422 /* ret_read is negative at this point so we will exit the loop. */
425 } while (ret_read
>= 0);
428 close_relayd_stream(metadata_stream
);
429 metadata_stream
->net_seq_idx
= (uint64_t) -1ULL;
431 if (metadata_stream
->out_fd
>= 0) {
432 ret
= close(metadata_stream
->out_fd
);
434 PERROR("Kernel consumer snapshot metadata close out_fd");
436 * Don't go on error here since the snapshot was successful at this
437 * point but somehow the close failed.
440 metadata_stream
->out_fd
= -1;
441 lttng_trace_chunk_put(metadata_stream
->trace_chunk
);
442 metadata_stream
->trace_chunk
= NULL
;
448 pthread_mutex_unlock(&metadata_stream
->lock
);
449 cds_list_del(&metadata_stream
->send_node
);
450 consumer_stream_destroy(metadata_stream
, NULL
);
451 metadata_channel
->metadata_stream
= NULL
;
457 * Receive command from session daemon and process it.
459 * Return 1 on success else a negative value or 0.
461 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
462 int sock
, struct pollfd
*consumer_sockpoll
)
465 enum lttcomm_return_code ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
466 struct lttcomm_consumer_msg msg
;
468 health_code_update();
470 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
471 if (ret
!= sizeof(msg
)) {
473 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_ERROR_RECV_CMD
);
479 health_code_update();
481 /* Deprecated command */
482 assert(msg
.cmd_type
!= LTTNG_CONSUMER_STOP
);
484 health_code_update();
486 /* relayd needs RCU read-side protection */
489 switch (msg
.cmd_type
) {
490 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
492 /* Session daemon status message are handled in the following call. */
493 consumer_add_relayd_socket(msg
.u
.relayd_sock
.net_index
,
494 msg
.u
.relayd_sock
.type
, ctx
, sock
, consumer_sockpoll
,
495 &msg
.u
.relayd_sock
.sock
, msg
.u
.relayd_sock
.session_id
,
496 msg
.u
.relayd_sock
.relayd_session_id
);
499 case LTTNG_CONSUMER_ADD_CHANNEL
:
501 struct lttng_consumer_channel
*new_channel
;
503 const uint64_t chunk_id
= msg
.u
.channel
.chunk_id
.value
;
505 health_code_update();
507 /* First send a status message before receiving the fds. */
508 ret
= consumer_send_status_msg(sock
, ret_code
);
510 /* Somehow, the session daemon is not responding anymore. */
514 health_code_update();
516 DBG("consumer_add_channel %" PRIu64
, msg
.u
.channel
.channel_key
);
517 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
518 msg
.u
.channel
.session_id
,
519 msg
.u
.channel
.chunk_id
.is_set
?
521 msg
.u
.channel
.pathname
,
523 msg
.u
.channel
.relayd_id
, msg
.u
.channel
.output
,
524 msg
.u
.channel
.tracefile_size
,
525 msg
.u
.channel
.tracefile_count
, 0,
526 msg
.u
.channel
.monitor
,
527 msg
.u
.channel
.live_timer_interval
,
528 msg
.u
.channel
.is_live
,
530 if (new_channel
== NULL
) {
531 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_OUTFD_ERROR
);
534 new_channel
->nb_init_stream_left
= msg
.u
.channel
.nb_init_streams
;
535 switch (msg
.u
.channel
.output
) {
536 case LTTNG_EVENT_SPLICE
:
537 new_channel
->output
= CONSUMER_CHANNEL_SPLICE
;
539 case LTTNG_EVENT_MMAP
:
540 new_channel
->output
= CONSUMER_CHANNEL_MMAP
;
543 ERR("Channel output unknown %d", msg
.u
.channel
.output
);
547 /* Translate and save channel type. */
548 switch (msg
.u
.channel
.type
) {
549 case CONSUMER_CHANNEL_TYPE_DATA
:
550 case CONSUMER_CHANNEL_TYPE_METADATA
:
551 new_channel
->type
= msg
.u
.channel
.type
;
558 health_code_update();
560 if (ctx
->on_recv_channel
!= NULL
) {
561 ret_recv
= ctx
->on_recv_channel(new_channel
);
563 ret
= consumer_add_channel(new_channel
, ctx
);
564 } else if (ret_recv
< 0) {
568 ret
= consumer_add_channel(new_channel
, ctx
);
570 if (msg
.u
.channel
.type
== CONSUMER_CHANNEL_TYPE_DATA
&& !ret
) {
571 int monitor_start_ret
;
573 DBG("Consumer starting monitor timer");
574 consumer_timer_live_start(new_channel
,
575 msg
.u
.channel
.live_timer_interval
);
576 monitor_start_ret
= consumer_timer_monitor_start(
578 msg
.u
.channel
.monitor_timer_interval
);
579 if (monitor_start_ret
< 0) {
580 ERR("Starting channel monitoring timer failed");
586 health_code_update();
588 /* If we received an error in add_channel, we need to report it. */
590 ret
= consumer_send_status_msg(sock
, ret
);
599 case LTTNG_CONSUMER_ADD_STREAM
:
602 struct lttng_pipe
*stream_pipe
;
603 struct lttng_consumer_stream
*new_stream
;
604 struct lttng_consumer_channel
*channel
;
608 * Get stream's channel reference. Needed when adding the stream to the
611 channel
= consumer_find_channel(msg
.u
.stream
.channel_key
);
614 * We could not find the channel. Can happen if cpu hotplug
615 * happens while tearing down.
617 ERR("Unable to find channel key %" PRIu64
, msg
.u
.stream
.channel_key
);
618 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
621 health_code_update();
623 /* First send a status message before receiving the fds. */
624 ret
= consumer_send_status_msg(sock
, ret_code
);
626 /* Somehow, the session daemon is not responding anymore. */
627 goto error_add_stream_fatal
;
630 health_code_update();
632 if (ret_code
!= LTTCOMM_CONSUMERD_SUCCESS
) {
633 /* Channel was not found. */
634 goto error_add_stream_nosignal
;
639 ret
= lttng_consumer_poll_socket(consumer_sockpoll
);
642 goto error_add_stream_fatal
;
645 health_code_update();
647 /* Get stream file descriptor from socket */
648 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
649 if (ret
!= sizeof(fd
)) {
650 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_ERROR_RECV_FD
);
654 health_code_update();
657 * Send status code to session daemon only if the recv works. If the
658 * above recv() failed, the session daemon is notified through the
659 * error socket and the teardown is eventually done.
661 ret
= consumer_send_status_msg(sock
, ret_code
);
663 /* Somehow, the session daemon is not responding anymore. */
664 goto error_add_stream_nosignal
;
667 health_code_update();
669 pthread_mutex_lock(&channel
->lock
);
670 new_stream
= consumer_stream_create(
677 channel
->trace_chunk
,
682 if (new_stream
== NULL
) {
687 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_OUTFD_ERROR
);
690 pthread_mutex_unlock(&channel
->lock
);
691 goto error_add_stream_nosignal
;
694 new_stream
->wait_fd
= fd
;
695 ret
= kernctl_get_max_subbuf_size(new_stream
->wait_fd
,
696 &new_stream
->max_sb_size
);
698 pthread_mutex_unlock(&channel
->lock
);
699 ERR("Failed to get kernel maximal subbuffer size");
700 goto error_add_stream_nosignal
;
703 consumer_stream_update_channel_attributes(new_stream
,
707 * We've just assigned the channel to the stream so increment the
708 * refcount right now. We don't need to increment the refcount for
709 * streams in no monitor because we handle manually the cleanup of
710 * those. It is very important to make sure there is NO prior
711 * consumer_del_stream() calls or else the refcount will be unbalanced.
713 if (channel
->monitor
) {
714 uatomic_inc(&new_stream
->chan
->refcount
);
718 * The buffer flush is done on the session daemon side for the kernel
719 * so no need for the stream "hangup_flush_done" variable to be
720 * tracked. This is important for a kernel stream since we don't rely
721 * on the flush state of the stream to read data. It's not the case for
722 * user space tracing.
724 new_stream
->hangup_flush_done
= 0;
726 health_code_update();
728 pthread_mutex_lock(&new_stream
->lock
);
729 if (ctx
->on_recv_stream
) {
730 ret
= ctx
->on_recv_stream(new_stream
);
732 pthread_mutex_unlock(&new_stream
->lock
);
733 pthread_mutex_unlock(&channel
->lock
);
734 consumer_stream_free(new_stream
);
735 goto error_add_stream_nosignal
;
738 health_code_update();
740 if (new_stream
->metadata_flag
) {
741 channel
->metadata_stream
= new_stream
;
744 /* Do not monitor this stream. */
745 if (!channel
->monitor
) {
746 DBG("Kernel consumer add stream %s in no monitor mode with "
747 "relayd id %" PRIu64
, new_stream
->name
,
748 new_stream
->net_seq_idx
);
749 cds_list_add(&new_stream
->send_node
, &channel
->streams
.head
);
750 pthread_mutex_unlock(&new_stream
->lock
);
751 pthread_mutex_unlock(&channel
->lock
);
755 /* Send stream to relayd if the stream has an ID. */
756 if (new_stream
->net_seq_idx
!= (uint64_t) -1ULL) {
757 ret
= consumer_send_relayd_stream(new_stream
,
758 new_stream
->chan
->pathname
);
760 pthread_mutex_unlock(&new_stream
->lock
);
761 pthread_mutex_unlock(&channel
->lock
);
762 consumer_stream_free(new_stream
);
763 goto error_add_stream_nosignal
;
767 * If adding an extra stream to an already
768 * existing channel (e.g. cpu hotplug), we need
769 * to send the "streams_sent" command to relayd.
771 if (channel
->streams_sent_to_relayd
) {
772 ret
= consumer_send_relayd_streams_sent(
773 new_stream
->net_seq_idx
);
775 pthread_mutex_unlock(&new_stream
->lock
);
776 pthread_mutex_unlock(&channel
->lock
);
777 goto error_add_stream_nosignal
;
781 pthread_mutex_unlock(&new_stream
->lock
);
782 pthread_mutex_unlock(&channel
->lock
);
784 /* Get the right pipe where the stream will be sent. */
785 if (new_stream
->metadata_flag
) {
786 consumer_add_metadata_stream(new_stream
);
787 stream_pipe
= ctx
->consumer_metadata_pipe
;
789 consumer_add_data_stream(new_stream
);
790 stream_pipe
= ctx
->consumer_data_pipe
;
793 /* Visible to other threads */
794 new_stream
->globally_visible
= 1;
796 health_code_update();
798 ret
= lttng_pipe_write(stream_pipe
, &new_stream
, sizeof(new_stream
));
800 ERR("Consumer write %s stream to pipe %d",
801 new_stream
->metadata_flag
? "metadata" : "data",
802 lttng_pipe_get_writefd(stream_pipe
));
803 if (new_stream
->metadata_flag
) {
804 consumer_del_stream_for_metadata(new_stream
);
806 consumer_del_stream_for_data(new_stream
);
808 goto error_add_stream_nosignal
;
811 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64
,
812 new_stream
->name
, fd
, new_stream
->chan
->pathname
, new_stream
->relayd_stream_id
);
815 error_add_stream_nosignal
:
817 error_add_stream_fatal
:
820 case LTTNG_CONSUMER_STREAMS_SENT
:
822 struct lttng_consumer_channel
*channel
;
825 * Get stream's channel reference. Needed when adding the stream to the
828 channel
= consumer_find_channel(msg
.u
.sent_streams
.channel_key
);
831 * We could not find the channel. Can happen if cpu hotplug
832 * happens while tearing down.
834 ERR("Unable to find channel key %" PRIu64
,
835 msg
.u
.sent_streams
.channel_key
);
836 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
839 health_code_update();
842 * Send status code to session daemon.
844 ret
= consumer_send_status_msg(sock
, ret_code
);
845 if (ret
< 0 || ret_code
!= LTTCOMM_CONSUMERD_SUCCESS
) {
846 /* Somehow, the session daemon is not responding anymore. */
847 goto error_streams_sent_nosignal
;
850 health_code_update();
853 * We should not send this message if we don't monitor the
854 * streams in this channel.
856 if (!channel
->monitor
) {
857 goto end_error_streams_sent
;
860 health_code_update();
861 /* Send stream to relayd if the stream has an ID. */
862 if (msg
.u
.sent_streams
.net_seq_idx
!= (uint64_t) -1ULL) {
863 ret
= consumer_send_relayd_streams_sent(
864 msg
.u
.sent_streams
.net_seq_idx
);
866 goto error_streams_sent_nosignal
;
868 channel
->streams_sent_to_relayd
= true;
870 end_error_streams_sent
:
872 error_streams_sent_nosignal
:
875 case LTTNG_CONSUMER_UPDATE_STREAM
:
880 case LTTNG_CONSUMER_DESTROY_RELAYD
:
882 uint64_t index
= msg
.u
.destroy_relayd
.net_seq_idx
;
883 struct consumer_relayd_sock_pair
*relayd
;
885 DBG("Kernel consumer destroying relayd %" PRIu64
, index
);
887 /* Get relayd reference if exists. */
888 relayd
= consumer_find_relayd(index
);
889 if (relayd
== NULL
) {
890 DBG("Unable to find relayd %" PRIu64
, index
);
891 ret_code
= LTTCOMM_CONSUMERD_RELAYD_FAIL
;
895 * Each relayd socket pair has a refcount of stream attached to it
896 * which tells if the relayd is still active or not depending on the
899 * This will set the destroy flag of the relayd object and destroy it
900 * if the refcount reaches zero when called.
902 * The destroy can happen either here or when a stream fd hangs up.
905 consumer_flag_relayd_for_destroy(relayd
);
908 health_code_update();
910 ret
= consumer_send_status_msg(sock
, ret_code
);
912 /* Somehow, the session daemon is not responding anymore. */
918 case LTTNG_CONSUMER_DATA_PENDING
:
921 uint64_t id
= msg
.u
.data_pending
.session_id
;
923 DBG("Kernel consumer data pending command for id %" PRIu64
, id
);
925 ret
= consumer_data_pending(id
);
927 health_code_update();
929 /* Send back returned value to session daemon */
930 ret
= lttcomm_send_unix_sock(sock
, &ret
, sizeof(ret
));
932 PERROR("send data pending ret code");
937 * No need to send back a status message since the data pending
938 * returned value is the response.
942 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL
:
944 struct lttng_consumer_channel
*channel
;
945 uint64_t key
= msg
.u
.snapshot_channel
.key
;
947 channel
= consumer_find_channel(key
);
949 ERR("Channel %" PRIu64
" not found", key
);
950 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
952 pthread_mutex_lock(&channel
->lock
);
953 if (msg
.u
.snapshot_channel
.metadata
== 1) {
954 ret
= lttng_kconsumer_snapshot_metadata(channel
, key
,
955 msg
.u
.snapshot_channel
.pathname
,
956 msg
.u
.snapshot_channel
.relayd_id
, ctx
);
958 ERR("Snapshot metadata failed");
959 ret_code
= LTTCOMM_CONSUMERD_SNAPSHOT_FAILED
;
962 ret
= lttng_kconsumer_snapshot_channel(channel
, key
,
963 msg
.u
.snapshot_channel
.pathname
,
964 msg
.u
.snapshot_channel
.relayd_id
,
965 msg
.u
.snapshot_channel
.nb_packets_per_stream
,
968 ERR("Snapshot channel failed");
969 ret_code
= LTTCOMM_CONSUMERD_SNAPSHOT_FAILED
;
972 pthread_mutex_unlock(&channel
->lock
);
974 health_code_update();
976 ret
= consumer_send_status_msg(sock
, ret_code
);
978 /* Somehow, the session daemon is not responding anymore. */
983 case LTTNG_CONSUMER_DESTROY_CHANNEL
:
985 uint64_t key
= msg
.u
.destroy_channel
.key
;
986 struct lttng_consumer_channel
*channel
;
988 channel
= consumer_find_channel(key
);
990 ERR("Kernel consumer destroy channel %" PRIu64
" not found", key
);
991 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
994 health_code_update();
996 ret
= consumer_send_status_msg(sock
, ret_code
);
998 /* Somehow, the session daemon is not responding anymore. */
999 goto end_destroy_channel
;
1002 health_code_update();
1004 /* Stop right now if no channel was found. */
1006 goto end_destroy_channel
;
1010 * This command should ONLY be issued for channel with streams set in
1013 assert(!channel
->monitor
);
1016 * The refcount should ALWAYS be 0 in the case of a channel in no
1019 assert(!uatomic_sub_return(&channel
->refcount
, 1));
1021 consumer_del_channel(channel
);
1022 end_destroy_channel
:
1025 case LTTNG_CONSUMER_DISCARDED_EVENTS
:
1029 struct lttng_consumer_channel
*channel
;
1030 uint64_t id
= msg
.u
.discarded_events
.session_id
;
1031 uint64_t key
= msg
.u
.discarded_events
.channel_key
;
1033 DBG("Kernel consumer discarded events command for session id %"
1034 PRIu64
", channel key %" PRIu64
, id
, key
);
1036 channel
= consumer_find_channel(key
);
1038 ERR("Kernel consumer discarded events channel %"
1039 PRIu64
" not found", key
);
1042 count
= channel
->discarded_events
;
1045 health_code_update();
1047 /* Send back returned value to session daemon */
1048 ret
= lttcomm_send_unix_sock(sock
, &count
, sizeof(count
));
1050 PERROR("send discarded events");
1056 case LTTNG_CONSUMER_LOST_PACKETS
:
1060 struct lttng_consumer_channel
*channel
;
1061 uint64_t id
= msg
.u
.lost_packets
.session_id
;
1062 uint64_t key
= msg
.u
.lost_packets
.channel_key
;
1064 DBG("Kernel consumer lost packets command for session id %"
1065 PRIu64
", channel key %" PRIu64
, id
, key
);
1067 channel
= consumer_find_channel(key
);
1069 ERR("Kernel consumer lost packets channel %"
1070 PRIu64
" not found", key
);
1073 count
= channel
->lost_packets
;
1076 health_code_update();
1078 /* Send back returned value to session daemon */
1079 ret
= lttcomm_send_unix_sock(sock
, &count
, sizeof(count
));
1081 PERROR("send lost packets");
1087 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE
:
1089 int channel_monitor_pipe
;
1091 ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
1092 /* Successfully received the command's type. */
1093 ret
= consumer_send_status_msg(sock
, ret_code
);
1098 ret
= lttcomm_recv_fds_unix_sock(sock
, &channel_monitor_pipe
,
1100 if (ret
!= sizeof(channel_monitor_pipe
)) {
1101 ERR("Failed to receive channel monitor pipe");
1105 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe
);
1106 ret
= consumer_timer_thread_set_channel_monitor_pipe(
1107 channel_monitor_pipe
);
1111 ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
1112 /* Set the pipe as non-blocking. */
1113 ret
= fcntl(channel_monitor_pipe
, F_GETFL
, 0);
1115 PERROR("fcntl get flags of the channel monitoring pipe");
1120 ret
= fcntl(channel_monitor_pipe
, F_SETFL
,
1121 flags
| O_NONBLOCK
);
1123 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1126 DBG("Channel monitor pipe set as non-blocking");
1128 ret_code
= LTTCOMM_CONSUMERD_ALREADY_SET
;
1130 ret
= consumer_send_status_msg(sock
, ret_code
);
1136 case LTTNG_CONSUMER_ROTATE_CHANNEL
:
1138 struct lttng_consumer_channel
*channel
;
1139 uint64_t key
= msg
.u
.rotate_channel
.key
;
1141 DBG("Consumer rotate channel %" PRIu64
, key
);
1143 channel
= consumer_find_channel(key
);
1145 ERR("Channel %" PRIu64
" not found", key
);
1146 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
1149 * Sample the rotate position of all the streams in this channel.
1151 ret
= lttng_consumer_rotate_channel(channel
, key
,
1152 msg
.u
.rotate_channel
.relayd_id
,
1153 msg
.u
.rotate_channel
.metadata
,
1156 ERR("Rotate channel failed");
1157 ret_code
= LTTCOMM_CONSUMERD_ROTATION_FAIL
;
1160 health_code_update();
1162 ret
= consumer_send_status_msg(sock
, ret_code
);
1164 /* Somehow, the session daemon is not responding anymore. */
1165 goto error_rotate_channel
;
1168 /* Rotate the streams that are ready right now. */
1169 ret
= lttng_consumer_rotate_ready_streams(
1172 ERR("Rotate ready streams failed");
1176 error_rotate_channel
:
1179 case LTTNG_CONSUMER_INIT
:
1181 ret_code
= lttng_consumer_init_command(ctx
,
1182 msg
.u
.init
.sessiond_uuid
);
1183 health_code_update();
1184 ret
= consumer_send_status_msg(sock
, ret_code
);
1186 /* Somehow, the session daemon is not responding anymore. */
1191 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK
:
1193 const struct lttng_credentials credentials
= {
1194 .uid
= msg
.u
.create_trace_chunk
.credentials
.value
.uid
,
1195 .gid
= msg
.u
.create_trace_chunk
.credentials
.value
.gid
,
1197 const bool is_local_trace
=
1198 !msg
.u
.create_trace_chunk
.relayd_id
.is_set
;
1199 const uint64_t relayd_id
=
1200 msg
.u
.create_trace_chunk
.relayd_id
.value
;
1201 const char *chunk_override_name
=
1202 *msg
.u
.create_trace_chunk
.override_name
?
1203 msg
.u
.create_trace_chunk
.override_name
:
1205 LTTNG_OPTIONAL(struct lttng_directory_handle
) chunk_directory_handle
=
1206 LTTNG_OPTIONAL_INIT
;
1209 * The session daemon will only provide a chunk directory file
1210 * descriptor for local traces.
1212 if (is_local_trace
) {
1215 /* Acnowledge the reception of the command. */
1216 ret
= consumer_send_status_msg(sock
,
1217 LTTCOMM_CONSUMERD_SUCCESS
);
1219 /* Somehow, the session daemon is not responding anymore. */
1223 ret
= lttcomm_recv_fds_unix_sock(sock
, &chunk_dirfd
, 1);
1224 if (ret
!= sizeof(chunk_dirfd
)) {
1225 ERR("Failed to receive trace chunk directory file descriptor");
1229 DBG("Received trace chunk directory fd (%d)",
1231 ret
= lttng_directory_handle_init_from_dirfd(
1232 &chunk_directory_handle
.value
,
1235 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1236 if (close(chunk_dirfd
)) {
1237 PERROR("Failed to close chunk directory file descriptor");
1241 chunk_directory_handle
.is_set
= true;
1244 ret_code
= lttng_consumer_create_trace_chunk(
1245 !is_local_trace
? &relayd_id
: NULL
,
1246 msg
.u
.create_trace_chunk
.session_id
,
1247 msg
.u
.create_trace_chunk
.chunk_id
,
1248 (time_t) msg
.u
.create_trace_chunk
1249 .creation_timestamp
,
1250 chunk_override_name
,
1251 msg
.u
.create_trace_chunk
.credentials
.is_set
?
1254 chunk_directory_handle
.is_set
?
1255 &chunk_directory_handle
.value
:
1258 if (chunk_directory_handle
.is_set
) {
1259 lttng_directory_handle_fini(
1260 &chunk_directory_handle
.value
);
1262 goto end_msg_sessiond
;
1264 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK
:
1266 enum lttng_trace_chunk_command_type close_command
=
1267 msg
.u
.close_trace_chunk
.close_command
.value
;
1268 const uint64_t relayd_id
=
1269 msg
.u
.close_trace_chunk
.relayd_id
.value
;
1270 struct lttcomm_consumer_close_trace_chunk_reply reply
;
1271 char path
[LTTNG_PATH_MAX
];
1273 ret_code
= lttng_consumer_close_trace_chunk(
1274 msg
.u
.close_trace_chunk
.relayd_id
.is_set
?
1277 msg
.u
.close_trace_chunk
.session_id
,
1278 msg
.u
.close_trace_chunk
.chunk_id
,
1279 (time_t) msg
.u
.close_trace_chunk
.close_timestamp
,
1280 msg
.u
.close_trace_chunk
.close_command
.is_set
?
1283 reply
.ret_code
= ret_code
;
1284 reply
.path_length
= strlen(path
) + 1;
1285 ret
= lttcomm_send_unix_sock(sock
, &reply
, sizeof(reply
));
1286 if (ret
!= sizeof(reply
)) {
1289 ret
= lttcomm_send_unix_sock(sock
, path
, reply
.path_length
);
1290 if (ret
!= reply
.path_length
) {
1295 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS
:
1297 const uint64_t relayd_id
=
1298 msg
.u
.trace_chunk_exists
.relayd_id
.value
;
1300 ret_code
= lttng_consumer_trace_chunk_exists(
1301 msg
.u
.trace_chunk_exists
.relayd_id
.is_set
?
1303 msg
.u
.trace_chunk_exists
.session_id
,
1304 msg
.u
.trace_chunk_exists
.chunk_id
);
1305 goto end_msg_sessiond
;
1313 * Return 1 to indicate success since the 0 value can be a socket
1314 * shutdown during the recv() or send() call.
1319 /* This will issue a consumer stop. */
1324 * The returned value here is not useful since either way we'll return 1 to
1325 * the caller because the session daemon socket management is done
1326 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1328 ret
= consumer_send_status_msg(sock
, ret_code
);
1334 health_code_update();
1340 * Sync metadata meaning request them to the session daemon and snapshot to the
1341 * metadata thread can consumer them.
1343 * Metadata stream lock MUST be acquired.
1345 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1346 * is empty or a negative value on error.
1348 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream
*metadata
)
1354 ret
= kernctl_buffer_flush(metadata
->wait_fd
);
1356 ERR("Failed to flush kernel stream");
1360 ret
= kernctl_snapshot(metadata
->wait_fd
);
1362 if (ret
!= -EAGAIN
) {
1363 ERR("Sync metadata, taking kernel snapshot failed.");
1366 DBG("Sync metadata, no new kernel metadata");
1367 /* No new metadata, exit. */
1377 int extract_common_subbuffer_info(struct lttng_consumer_stream
*stream
,
1378 struct stream_subbuffer
*subbuf
)
1382 ret
= kernctl_get_subbuf_size(
1383 stream
->wait_fd
, &subbuf
->info
.data
.subbuf_size
);
1388 ret
= kernctl_get_padded_subbuf_size(
1389 stream
->wait_fd
, &subbuf
->info
.data
.padded_subbuf_size
);
1399 int extract_metadata_subbuffer_info(struct lttng_consumer_stream
*stream
,
1400 struct stream_subbuffer
*subbuf
)
1404 ret
= extract_common_subbuffer_info(stream
, subbuf
);
1409 ret
= kernctl_get_metadata_version(
1410 stream
->wait_fd
, &subbuf
->info
.metadata
.version
);
1420 int extract_data_subbuffer_info(struct lttng_consumer_stream
*stream
,
1421 struct stream_subbuffer
*subbuf
)
1425 ret
= extract_common_subbuffer_info(stream
, subbuf
);
1430 ret
= kernctl_get_packet_size(
1431 stream
->wait_fd
, &subbuf
->info
.data
.packet_size
);
1433 PERROR("Failed to get sub-buffer packet size");
1437 ret
= kernctl_get_content_size(
1438 stream
->wait_fd
, &subbuf
->info
.data
.content_size
);
1440 PERROR("Failed to get sub-buffer content size");
1444 ret
= kernctl_get_timestamp_begin(
1445 stream
->wait_fd
, &subbuf
->info
.data
.timestamp_begin
);
1447 PERROR("Failed to get sub-buffer begin timestamp");
1451 ret
= kernctl_get_timestamp_end(
1452 stream
->wait_fd
, &subbuf
->info
.data
.timestamp_end
);
1454 PERROR("Failed to get sub-buffer end timestamp");
1458 ret
= kernctl_get_events_discarded(
1459 stream
->wait_fd
, &subbuf
->info
.data
.events_discarded
);
1461 PERROR("Failed to get sub-buffer events discarded count");
1465 ret
= kernctl_get_sequence_number(stream
->wait_fd
,
1466 &subbuf
->info
.data
.sequence_number
.value
);
1468 /* May not be supported by older LTTng-modules. */
1469 if (ret
!= -ENOTTY
) {
1470 PERROR("Failed to get sub-buffer sequence number");
1474 subbuf
->info
.data
.sequence_number
.is_set
= true;
1477 ret
= kernctl_get_stream_id(
1478 stream
->wait_fd
, &subbuf
->info
.data
.stream_id
);
1480 PERROR("Failed to get stream id");
1484 ret
= kernctl_get_instance_id(stream
->wait_fd
,
1485 &subbuf
->info
.data
.stream_instance_id
.value
);
1487 /* May not be supported by older LTTng-modules. */
1488 if (ret
!= -ENOTTY
) {
1489 PERROR("Failed to get stream instance id");
1493 subbuf
->info
.data
.stream_instance_id
.is_set
= true;
1500 int get_subbuffer_common(struct lttng_consumer_stream
*stream
,
1501 struct stream_subbuffer
*subbuffer
)
1505 ret
= kernctl_get_next_subbuf(stream
->wait_fd
);
1510 ret
= stream
->read_subbuffer_ops
.extract_subbuffer_info(
1517 int get_next_subbuffer_splice(struct lttng_consumer_stream
*stream
,
1518 struct stream_subbuffer
*subbuffer
)
1522 ret
= get_subbuffer_common(stream
, subbuffer
);
1527 subbuffer
->buffer
.fd
= stream
->wait_fd
;
1533 int get_next_subbuffer_mmap(struct lttng_consumer_stream
*stream
,
1534 struct stream_subbuffer
*subbuffer
)
1539 ret
= get_subbuffer_common(stream
, subbuffer
);
1544 ret
= get_current_subbuf_addr(stream
, &addr
);
1549 subbuffer
->buffer
.buffer
= lttng_buffer_view_init(
1550 addr
, 0, subbuffer
->info
.data
.padded_subbuf_size
);
1556 int get_next_subbuffer_metadata_check(struct lttng_consumer_stream
*stream
,
1557 struct stream_subbuffer
*subbuffer
)
1563 ret
= kernctl_get_next_subbuf_metadata_check(stream
->wait_fd
,
1569 ret
= stream
->read_subbuffer_ops
.extract_subbuffer_info(
1575 LTTNG_OPTIONAL_SET(&subbuffer
->info
.metadata
.coherent
, coherent
);
1577 ret
= get_current_subbuf_addr(stream
, &addr
);
1582 subbuffer
->buffer
.buffer
= lttng_buffer_view_init(
1583 addr
, 0, subbuffer
->info
.data
.padded_subbuf_size
);
1584 DBG("Got metadata packet with padded_subbuf_size = %lu, coherent = %s",
1585 subbuffer
->info
.metadata
.padded_subbuf_size
,
1586 coherent
? "true" : "false");
1592 int put_next_subbuffer(struct lttng_consumer_stream
*stream
,
1593 struct stream_subbuffer
*subbuffer
)
1595 const int ret
= kernctl_put_next_subbuf(stream
->wait_fd
);
1598 if (ret
== -EFAULT
) {
1599 PERROR("Error in unreserving sub buffer");
1600 } else if (ret
== -EIO
) {
1601 /* Should never happen with newer LTTng versions */
1602 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted");
1610 bool is_get_next_check_metadata_available(int tracer_fd
)
1612 return kernctl_get_next_subbuf_metadata_check(tracer_fd
, NULL
) !=
1617 int lttng_kconsumer_set_stream_ops(
1618 struct lttng_consumer_stream
*stream
)
1622 if (stream
->metadata_flag
&& stream
->chan
->is_live
) {
1623 DBG("Attempting to enable metadata bucketization for live consumers");
1624 if (is_get_next_check_metadata_available(stream
->wait_fd
)) {
1625 DBG("Kernel tracer supports get_next_subbuffer_metadata_check, metadata will be accumulated until a coherent state is reached");
1626 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1627 get_next_subbuffer_metadata_check
;
1628 ret
= consumer_stream_enable_metadata_bucketization(
1635 * The kernel tracer version is too old to indicate
1636 * when the metadata stream has reached a "coherent"
1637 * (parseable) point.
1639 * This means that a live viewer may see an incoherent
1640 * sequence of metadata and fail to parse it.
1642 WARN("Kernel tracer does not support get_next_subbuffer_metadata_check which may cause live clients to fail to parse the metadata stream");
1643 metadata_bucket_destroy(stream
->metadata_bucket
);
1644 stream
->metadata_bucket
= NULL
;
1648 if (!stream
->read_subbuffer_ops
.get_next_subbuffer
) {
1649 if (stream
->chan
->output
== CONSUMER_CHANNEL_MMAP
) {
1650 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1651 get_next_subbuffer_mmap
;
1653 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1654 get_next_subbuffer_splice
;
1658 if (stream
->metadata_flag
) {
1659 stream
->read_subbuffer_ops
.extract_subbuffer_info
=
1660 extract_metadata_subbuffer_info
;
1662 stream
->read_subbuffer_ops
.extract_subbuffer_info
=
1663 extract_data_subbuffer_info
;
1664 if (stream
->chan
->is_live
) {
1665 stream
->read_subbuffer_ops
.send_live_beacon
=
1666 consumer_flush_kernel_index
;
1670 stream
->read_subbuffer_ops
.put_next_subbuffer
= put_next_subbuffer
;
1675 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
1682 * Don't create anything if this is set for streaming or if there is
1683 * no current trace chunk on the parent channel.
1685 if (stream
->net_seq_idx
== (uint64_t) -1ULL && stream
->chan
->monitor
&&
1686 stream
->chan
->trace_chunk
) {
1687 ret
= consumer_stream_create_output_files(stream
, true);
1693 if (stream
->output
== LTTNG_EVENT_MMAP
) {
1694 /* get the len of the mmap region */
1695 unsigned long mmap_len
;
1697 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
1699 PERROR("kernctl_get_mmap_len");
1700 goto error_close_fd
;
1702 stream
->mmap_len
= (size_t) mmap_len
;
1704 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
, PROT_READ
,
1705 MAP_PRIVATE
, stream
->wait_fd
, 0);
1706 if (stream
->mmap_base
== MAP_FAILED
) {
1707 PERROR("Error mmaping");
1709 goto error_close_fd
;
1713 ret
= lttng_kconsumer_set_stream_ops(stream
);
1715 goto error_close_fd
;
1718 /* we return 0 to let the library handle the FD internally */
1722 if (stream
->out_fd
>= 0) {
1725 err
= close(stream
->out_fd
);
1727 stream
->out_fd
= -1;
1734 * Check if data is still being extracted from the buffers for a specific
1735 * stream. Consumer data lock MUST be acquired before calling this function
1736 * and the stream lock.
1738 * Return 1 if the traced data are still getting read else 0 meaning that the
1739 * data is available for trace viewer reading.
1741 int lttng_kconsumer_data_pending(struct lttng_consumer_stream
*stream
)
1747 if (stream
->endpoint_status
!= CONSUMER_ENDPOINT_ACTIVE
) {
1752 ret
= kernctl_get_next_subbuf(stream
->wait_fd
);
1754 /* There is still data so let's put back this subbuffer. */
1755 ret
= kernctl_put_subbuf(stream
->wait_fd
);
1757 ret
= 1; /* Data is pending */
1761 /* Data is NOT pending and ready to be read. */