2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include <sys/socket.h>
28 #include <sys/types.h>
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47 #include <common/buffer-view.h>
48 #include <common/consumer/consumer.h>
51 #include "kernel-consumer.h"
53 extern struct lttng_consumer_global_data consumer_data
;
54 extern int consumer_poll_timeout
;
57 * Take a snapshot for a specific fd
59 * Returns 0 on success, < 0 on error
61 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream
*stream
)
64 int infd
= stream
->wait_fd
;
66 ret
= kernctl_snapshot(infd
);
68 * -EAGAIN is not an error, it just means that there is no data to
71 if (ret
!= 0 && ret
!= -EAGAIN
) {
72 PERROR("Getting sub-buffer snapshot.");
79 * Sample consumed and produced positions for a specific fd.
81 * Returns 0 on success, < 0 on error.
83 int lttng_kconsumer_sample_snapshot_positions(
84 struct lttng_consumer_stream
*stream
)
88 return kernctl_snapshot_sample_positions(stream
->wait_fd
);
92 * Get the produced position
94 * Returns 0 on success, < 0 on error
96 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream
*stream
,
100 int infd
= stream
->wait_fd
;
102 ret
= kernctl_snapshot_get_produced(infd
, pos
);
104 PERROR("kernctl_snapshot_get_produced");
111 * Get the consumerd position
113 * Returns 0 on success, < 0 on error
115 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream
*stream
,
119 int infd
= stream
->wait_fd
;
121 ret
= kernctl_snapshot_get_consumed(infd
, pos
);
123 PERROR("kernctl_snapshot_get_consumed");
130 int get_current_subbuf_addr(struct lttng_consumer_stream
*stream
,
134 unsigned long mmap_offset
;
135 const char *mmap_base
= stream
->mmap_base
;
137 ret
= kernctl_get_mmap_read_offset(stream
->wait_fd
, &mmap_offset
);
139 PERROR("Failed to get mmap read offset");
143 *addr
= mmap_base
+ mmap_offset
;
149 * Take a snapshot of all the stream of a channel
150 * RCU read-side lock must be held across this function to ensure existence of
151 * channel. The channel lock must be held by the caller.
153 * Returns 0 on success, < 0 on error
155 static int lttng_kconsumer_snapshot_channel(
156 struct lttng_consumer_channel
*channel
,
157 uint64_t key
, char *path
, uint64_t relayd_id
,
158 uint64_t nb_packets_per_stream
,
159 struct lttng_consumer_local_data
*ctx
)
162 struct lttng_consumer_stream
*stream
;
164 DBG("Kernel consumer snapshot channel %" PRIu64
, key
);
168 /* Splice is not supported yet for channel snapshot. */
169 if (channel
->output
!= CONSUMER_CHANNEL_MMAP
) {
170 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
176 cds_list_for_each_entry(stream
, &channel
->streams
.head
, send_node
) {
177 unsigned long consumed_pos
, produced_pos
;
179 health_code_update();
182 * Lock stream because we are about to change its state.
184 pthread_mutex_lock(&stream
->lock
);
186 assert(channel
->trace_chunk
);
187 if (!lttng_trace_chunk_get(channel
->trace_chunk
)) {
189 * Can't happen barring an internal error as the channel
190 * holds a reference to the trace chunk.
192 ERR("Failed to acquire reference to channel's trace chunk");
196 assert(!stream
->trace_chunk
);
197 stream
->trace_chunk
= channel
->trace_chunk
;
200 * Assign the received relayd ID so we can use it for streaming. The streams
201 * are not visible to anyone so this is OK to change it.
203 stream
->net_seq_idx
= relayd_id
;
204 channel
->relayd_id
= relayd_id
;
205 if (relayd_id
!= (uint64_t) -1ULL) {
206 ret
= consumer_send_relayd_stream(stream
, path
);
208 ERR("sending stream to relayd");
212 ret
= consumer_stream_create_output_files(stream
,
217 DBG("Kernel consumer snapshot stream (%" PRIu64
")",
221 ret
= kernctl_buffer_flush_empty(stream
->wait_fd
);
224 * Doing a buffer flush which does not take into
225 * account empty packets. This is not perfect
226 * for stream intersection, but required as a
227 * fall-back when "flush_empty" is not
228 * implemented by lttng-modules.
230 ret
= kernctl_buffer_flush(stream
->wait_fd
);
232 ERR("Failed to flush kernel stream");
238 ret
= lttng_kconsumer_take_snapshot(stream
);
240 ERR("Taking kernel snapshot");
244 ret
= lttng_kconsumer_get_produced_snapshot(stream
, &produced_pos
);
246 ERR("Produced kernel snapshot position");
250 ret
= lttng_kconsumer_get_consumed_snapshot(stream
, &consumed_pos
);
252 ERR("Consumerd kernel snapshot position");
256 consumed_pos
= consumer_get_consume_start_pos(consumed_pos
,
257 produced_pos
, nb_packets_per_stream
,
258 stream
->max_sb_size
);
260 while ((long) (consumed_pos
- produced_pos
) < 0) {
262 unsigned long len
, padded_len
;
263 const char *subbuf_addr
;
264 struct lttng_buffer_view subbuf_view
;
266 health_code_update();
267 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos
);
269 ret
= kernctl_get_subbuf(stream
->wait_fd
, &consumed_pos
);
271 if (ret
!= -EAGAIN
) {
272 PERROR("kernctl_get_subbuf snapshot");
275 DBG("Kernel consumer get subbuf failed. Skipping it.");
276 consumed_pos
+= stream
->max_sb_size
;
277 stream
->chan
->lost_packets
++;
281 ret
= kernctl_get_subbuf_size(stream
->wait_fd
, &len
);
283 ERR("Snapshot kernctl_get_subbuf_size");
284 goto error_put_subbuf
;
287 ret
= kernctl_get_padded_subbuf_size(stream
->wait_fd
, &padded_len
);
289 ERR("Snapshot kernctl_get_padded_subbuf_size");
290 goto error_put_subbuf
;
293 ret
= get_current_subbuf_addr(stream
, &subbuf_addr
);
295 goto error_put_subbuf
;
298 subbuf_view
= lttng_buffer_view_init(
299 subbuf_addr
, 0, padded_len
);
300 read_len
= lttng_consumer_on_read_subbuffer_mmap(ctx
,
301 stream
, &subbuf_view
,
304 * We write the padded len in local tracefiles but the data len
305 * when using a relay. Display the error but continue processing
306 * to try to release the subbuffer.
308 if (relayd_id
!= (uint64_t) -1ULL) {
309 if (read_len
!= len
) {
310 ERR("Error sending to the relay (ret: %zd != len: %lu)",
314 if (read_len
!= padded_len
) {
315 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
316 read_len
, padded_len
);
320 ret
= kernctl_put_subbuf(stream
->wait_fd
);
322 ERR("Snapshot kernctl_put_subbuf");
325 consumed_pos
+= stream
->max_sb_size
;
328 if (relayd_id
== (uint64_t) -1ULL) {
329 if (stream
->out_fd
>= 0) {
330 ret
= close(stream
->out_fd
);
332 PERROR("Kernel consumer snapshot close out_fd");
338 close_relayd_stream(stream
);
339 stream
->net_seq_idx
= (uint64_t) -1ULL;
341 lttng_trace_chunk_put(stream
->trace_chunk
);
342 stream
->trace_chunk
= NULL
;
343 pthread_mutex_unlock(&stream
->lock
);
351 ret
= kernctl_put_subbuf(stream
->wait_fd
);
353 ERR("Snapshot kernctl_put_subbuf error path");
356 pthread_mutex_unlock(&stream
->lock
);
363 * Read the whole metadata available for a snapshot.
364 * RCU read-side lock must be held across this function to ensure existence of
365 * metadata_channel. The channel lock must be held by the caller.
367 * Returns 0 on success, < 0 on error
369 static int lttng_kconsumer_snapshot_metadata(
370 struct lttng_consumer_channel
*metadata_channel
,
371 uint64_t key
, char *path
, uint64_t relayd_id
,
372 struct lttng_consumer_local_data
*ctx
)
374 int ret
, use_relayd
= 0;
376 struct lttng_consumer_stream
*metadata_stream
;
380 DBG("Kernel consumer snapshot metadata with key %" PRIu64
" at path %s",
385 metadata_stream
= metadata_channel
->metadata_stream
;
386 assert(metadata_stream
);
388 pthread_mutex_lock(&metadata_stream
->lock
);
389 assert(metadata_channel
->trace_chunk
);
390 assert(metadata_stream
->trace_chunk
);
392 /* Flag once that we have a valid relayd for the stream. */
393 if (relayd_id
!= (uint64_t) -1ULL) {
398 ret
= consumer_send_relayd_stream(metadata_stream
, path
);
403 ret
= consumer_stream_create_output_files(metadata_stream
,
411 health_code_update();
413 ret_read
= lttng_consumer_read_subbuffer(metadata_stream
, ctx
, true);
415 if (ret_read
!= -EAGAIN
) {
416 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
421 /* ret_read is negative at this point so we will exit the loop. */
424 } while (ret_read
>= 0);
427 close_relayd_stream(metadata_stream
);
428 metadata_stream
->net_seq_idx
= (uint64_t) -1ULL;
430 if (metadata_stream
->out_fd
>= 0) {
431 ret
= close(metadata_stream
->out_fd
);
433 PERROR("Kernel consumer snapshot metadata close out_fd");
435 * Don't go on error here since the snapshot was successful at this
436 * point but somehow the close failed.
439 metadata_stream
->out_fd
= -1;
440 lttng_trace_chunk_put(metadata_stream
->trace_chunk
);
441 metadata_stream
->trace_chunk
= NULL
;
447 pthread_mutex_unlock(&metadata_stream
->lock
);
448 cds_list_del(&metadata_stream
->send_node
);
449 consumer_stream_destroy(metadata_stream
, NULL
);
450 metadata_channel
->metadata_stream
= NULL
;
456 * Receive command from session daemon and process it.
458 * Return 1 on success else a negative value or 0.
460 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
461 int sock
, struct pollfd
*consumer_sockpoll
)
464 enum lttcomm_return_code ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
465 struct lttcomm_consumer_msg msg
;
467 health_code_update();
469 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
470 if (ret
!= sizeof(msg
)) {
472 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_ERROR_RECV_CMD
);
478 health_code_update();
480 /* Deprecated command */
481 assert(msg
.cmd_type
!= LTTNG_CONSUMER_STOP
);
483 health_code_update();
485 /* relayd needs RCU read-side protection */
488 switch (msg
.cmd_type
) {
489 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
491 /* Session daemon status message are handled in the following call. */
492 consumer_add_relayd_socket(msg
.u
.relayd_sock
.net_index
,
493 msg
.u
.relayd_sock
.type
, ctx
, sock
, consumer_sockpoll
,
494 &msg
.u
.relayd_sock
.sock
, msg
.u
.relayd_sock
.session_id
,
495 msg
.u
.relayd_sock
.relayd_session_id
);
498 case LTTNG_CONSUMER_ADD_CHANNEL
:
500 struct lttng_consumer_channel
*new_channel
;
502 const uint64_t chunk_id
= msg
.u
.channel
.chunk_id
.value
;
504 health_code_update();
506 /* First send a status message before receiving the fds. */
507 ret
= consumer_send_status_msg(sock
, ret_code
);
509 /* Somehow, the session daemon is not responding anymore. */
513 health_code_update();
515 DBG("consumer_add_channel %" PRIu64
, msg
.u
.channel
.channel_key
);
516 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
517 msg
.u
.channel
.session_id
,
518 msg
.u
.channel
.chunk_id
.is_set
?
520 msg
.u
.channel
.pathname
,
522 msg
.u
.channel
.relayd_id
, msg
.u
.channel
.output
,
523 msg
.u
.channel
.tracefile_size
,
524 msg
.u
.channel
.tracefile_count
, 0,
525 msg
.u
.channel
.monitor
,
526 msg
.u
.channel
.live_timer_interval
,
527 msg
.u
.channel
.is_live
,
529 if (new_channel
== NULL
) {
530 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_OUTFD_ERROR
);
533 new_channel
->nb_init_stream_left
= msg
.u
.channel
.nb_init_streams
;
534 switch (msg
.u
.channel
.output
) {
535 case LTTNG_EVENT_SPLICE
:
536 new_channel
->output
= CONSUMER_CHANNEL_SPLICE
;
538 case LTTNG_EVENT_MMAP
:
539 new_channel
->output
= CONSUMER_CHANNEL_MMAP
;
542 ERR("Channel output unknown %d", msg
.u
.channel
.output
);
546 /* Translate and save channel type. */
547 switch (msg
.u
.channel
.type
) {
548 case CONSUMER_CHANNEL_TYPE_DATA
:
549 case CONSUMER_CHANNEL_TYPE_METADATA
:
550 new_channel
->type
= msg
.u
.channel
.type
;
557 health_code_update();
559 if (ctx
->on_recv_channel
!= NULL
) {
560 ret_recv
= ctx
->on_recv_channel(new_channel
);
562 ret
= consumer_add_channel(new_channel
, ctx
);
563 } else if (ret_recv
< 0) {
567 ret
= consumer_add_channel(new_channel
, ctx
);
569 if (msg
.u
.channel
.type
== CONSUMER_CHANNEL_TYPE_DATA
&& !ret
) {
570 int monitor_start_ret
;
572 DBG("Consumer starting monitor timer");
573 consumer_timer_live_start(new_channel
,
574 msg
.u
.channel
.live_timer_interval
);
575 monitor_start_ret
= consumer_timer_monitor_start(
577 msg
.u
.channel
.monitor_timer_interval
);
578 if (monitor_start_ret
< 0) {
579 ERR("Starting channel monitoring timer failed");
585 health_code_update();
587 /* If we received an error in add_channel, we need to report it. */
589 ret
= consumer_send_status_msg(sock
, ret
);
598 case LTTNG_CONSUMER_ADD_STREAM
:
601 struct lttng_pipe
*stream_pipe
;
602 struct lttng_consumer_stream
*new_stream
;
603 struct lttng_consumer_channel
*channel
;
607 * Get stream's channel reference. Needed when adding the stream to the
610 channel
= consumer_find_channel(msg
.u
.stream
.channel_key
);
613 * We could not find the channel. Can happen if cpu hotplug
614 * happens while tearing down.
616 ERR("Unable to find channel key %" PRIu64
, msg
.u
.stream
.channel_key
);
617 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
620 health_code_update();
622 /* First send a status message before receiving the fds. */
623 ret
= consumer_send_status_msg(sock
, ret_code
);
625 /* Somehow, the session daemon is not responding anymore. */
626 goto error_add_stream_fatal
;
629 health_code_update();
631 if (ret_code
!= LTTCOMM_CONSUMERD_SUCCESS
) {
632 /* Channel was not found. */
633 goto error_add_stream_nosignal
;
638 ret
= lttng_consumer_poll_socket(consumer_sockpoll
);
641 goto error_add_stream_fatal
;
644 health_code_update();
646 /* Get stream file descriptor from socket */
647 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
648 if (ret
!= sizeof(fd
)) {
649 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_ERROR_RECV_FD
);
653 health_code_update();
656 * Send status code to session daemon only if the recv works. If the
657 * above recv() failed, the session daemon is notified through the
658 * error socket and the teardown is eventually done.
660 ret
= consumer_send_status_msg(sock
, ret_code
);
662 /* Somehow, the session daemon is not responding anymore. */
663 goto error_add_stream_nosignal
;
666 health_code_update();
668 pthread_mutex_lock(&channel
->lock
);
669 new_stream
= consumer_stream_create(
676 channel
->trace_chunk
,
681 if (new_stream
== NULL
) {
686 lttng_consumer_send_error(ctx
, LTTCOMM_CONSUMERD_OUTFD_ERROR
);
689 pthread_mutex_unlock(&channel
->lock
);
690 goto error_add_stream_nosignal
;
693 new_stream
->wait_fd
= fd
;
694 ret
= kernctl_get_max_subbuf_size(new_stream
->wait_fd
,
695 &new_stream
->max_sb_size
);
697 pthread_mutex_unlock(&channel
->lock
);
698 ERR("Failed to get kernel maximal subbuffer size");
699 goto error_add_stream_nosignal
;
702 consumer_stream_update_channel_attributes(new_stream
,
706 * We've just assigned the channel to the stream so increment the
707 * refcount right now. We don't need to increment the refcount for
708 * streams in no monitor because we handle manually the cleanup of
709 * those. It is very important to make sure there is NO prior
710 * consumer_del_stream() calls or else the refcount will be unbalanced.
712 if (channel
->monitor
) {
713 uatomic_inc(&new_stream
->chan
->refcount
);
717 * The buffer flush is done on the session daemon side for the kernel
718 * so no need for the stream "hangup_flush_done" variable to be
719 * tracked. This is important for a kernel stream since we don't rely
720 * on the flush state of the stream to read data. It's not the case for
721 * user space tracing.
723 new_stream
->hangup_flush_done
= 0;
725 health_code_update();
727 pthread_mutex_lock(&new_stream
->lock
);
728 if (ctx
->on_recv_stream
) {
729 ret
= ctx
->on_recv_stream(new_stream
);
731 pthread_mutex_unlock(&new_stream
->lock
);
732 pthread_mutex_unlock(&channel
->lock
);
733 consumer_stream_free(new_stream
);
734 goto error_add_stream_nosignal
;
737 health_code_update();
739 if (new_stream
->metadata_flag
) {
740 channel
->metadata_stream
= new_stream
;
743 /* Do not monitor this stream. */
744 if (!channel
->monitor
) {
745 DBG("Kernel consumer add stream %s in no monitor mode with "
746 "relayd id %" PRIu64
, new_stream
->name
,
747 new_stream
->net_seq_idx
);
748 cds_list_add(&new_stream
->send_node
, &channel
->streams
.head
);
749 pthread_mutex_unlock(&new_stream
->lock
);
750 pthread_mutex_unlock(&channel
->lock
);
754 /* Send stream to relayd if the stream has an ID. */
755 if (new_stream
->net_seq_idx
!= (uint64_t) -1ULL) {
756 ret
= consumer_send_relayd_stream(new_stream
,
757 new_stream
->chan
->pathname
);
759 pthread_mutex_unlock(&new_stream
->lock
);
760 pthread_mutex_unlock(&channel
->lock
);
761 consumer_stream_free(new_stream
);
762 goto error_add_stream_nosignal
;
766 * If adding an extra stream to an already
767 * existing channel (e.g. cpu hotplug), we need
768 * to send the "streams_sent" command to relayd.
770 if (channel
->streams_sent_to_relayd
) {
771 ret
= consumer_send_relayd_streams_sent(
772 new_stream
->net_seq_idx
);
774 pthread_mutex_unlock(&new_stream
->lock
);
775 pthread_mutex_unlock(&channel
->lock
);
776 goto error_add_stream_nosignal
;
780 pthread_mutex_unlock(&new_stream
->lock
);
781 pthread_mutex_unlock(&channel
->lock
);
783 /* Get the right pipe where the stream will be sent. */
784 if (new_stream
->metadata_flag
) {
785 consumer_add_metadata_stream(new_stream
);
786 stream_pipe
= ctx
->consumer_metadata_pipe
;
788 consumer_add_data_stream(new_stream
);
789 stream_pipe
= ctx
->consumer_data_pipe
;
792 /* Visible to other threads */
793 new_stream
->globally_visible
= 1;
795 health_code_update();
797 ret
= lttng_pipe_write(stream_pipe
, &new_stream
, sizeof(new_stream
));
799 ERR("Consumer write %s stream to pipe %d",
800 new_stream
->metadata_flag
? "metadata" : "data",
801 lttng_pipe_get_writefd(stream_pipe
));
802 if (new_stream
->metadata_flag
) {
803 consumer_del_stream_for_metadata(new_stream
);
805 consumer_del_stream_for_data(new_stream
);
807 goto error_add_stream_nosignal
;
810 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64
,
811 new_stream
->name
, fd
, new_stream
->chan
->pathname
, new_stream
->relayd_stream_id
);
814 error_add_stream_nosignal
:
816 error_add_stream_fatal
:
819 case LTTNG_CONSUMER_STREAMS_SENT
:
821 struct lttng_consumer_channel
*channel
;
824 * Get stream's channel reference. Needed when adding the stream to the
827 channel
= consumer_find_channel(msg
.u
.sent_streams
.channel_key
);
830 * We could not find the channel. Can happen if cpu hotplug
831 * happens while tearing down.
833 ERR("Unable to find channel key %" PRIu64
,
834 msg
.u
.sent_streams
.channel_key
);
835 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
838 health_code_update();
841 * Send status code to session daemon.
843 ret
= consumer_send_status_msg(sock
, ret_code
);
844 if (ret
< 0 || ret_code
!= LTTCOMM_CONSUMERD_SUCCESS
) {
845 /* Somehow, the session daemon is not responding anymore. */
846 goto error_streams_sent_nosignal
;
849 health_code_update();
852 * We should not send this message if we don't monitor the
853 * streams in this channel.
855 if (!channel
->monitor
) {
856 goto end_error_streams_sent
;
859 health_code_update();
860 /* Send stream to relayd if the stream has an ID. */
861 if (msg
.u
.sent_streams
.net_seq_idx
!= (uint64_t) -1ULL) {
862 ret
= consumer_send_relayd_streams_sent(
863 msg
.u
.sent_streams
.net_seq_idx
);
865 goto error_streams_sent_nosignal
;
867 channel
->streams_sent_to_relayd
= true;
869 end_error_streams_sent
:
871 error_streams_sent_nosignal
:
874 case LTTNG_CONSUMER_UPDATE_STREAM
:
879 case LTTNG_CONSUMER_DESTROY_RELAYD
:
881 uint64_t index
= msg
.u
.destroy_relayd
.net_seq_idx
;
882 struct consumer_relayd_sock_pair
*relayd
;
884 DBG("Kernel consumer destroying relayd %" PRIu64
, index
);
886 /* Get relayd reference if exists. */
887 relayd
= consumer_find_relayd(index
);
888 if (relayd
== NULL
) {
889 DBG("Unable to find relayd %" PRIu64
, index
);
890 ret_code
= LTTCOMM_CONSUMERD_RELAYD_FAIL
;
894 * Each relayd socket pair has a refcount of stream attached to it
895 * which tells if the relayd is still active or not depending on the
898 * This will set the destroy flag of the relayd object and destroy it
899 * if the refcount reaches zero when called.
901 * The destroy can happen either here or when a stream fd hangs up.
904 consumer_flag_relayd_for_destroy(relayd
);
907 health_code_update();
909 ret
= consumer_send_status_msg(sock
, ret_code
);
911 /* Somehow, the session daemon is not responding anymore. */
917 case LTTNG_CONSUMER_DATA_PENDING
:
920 uint64_t id
= msg
.u
.data_pending
.session_id
;
922 DBG("Kernel consumer data pending command for id %" PRIu64
, id
);
924 ret
= consumer_data_pending(id
);
926 health_code_update();
928 /* Send back returned value to session daemon */
929 ret
= lttcomm_send_unix_sock(sock
, &ret
, sizeof(ret
));
931 PERROR("send data pending ret code");
936 * No need to send back a status message since the data pending
937 * returned value is the response.
941 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL
:
943 struct lttng_consumer_channel
*channel
;
944 uint64_t key
= msg
.u
.snapshot_channel
.key
;
946 channel
= consumer_find_channel(key
);
948 ERR("Channel %" PRIu64
" not found", key
);
949 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
951 pthread_mutex_lock(&channel
->lock
);
952 if (msg
.u
.snapshot_channel
.metadata
== 1) {
953 ret
= lttng_kconsumer_snapshot_metadata(channel
, key
,
954 msg
.u
.snapshot_channel
.pathname
,
955 msg
.u
.snapshot_channel
.relayd_id
, ctx
);
957 ERR("Snapshot metadata failed");
958 ret_code
= LTTCOMM_CONSUMERD_SNAPSHOT_FAILED
;
961 ret
= lttng_kconsumer_snapshot_channel(channel
, key
,
962 msg
.u
.snapshot_channel
.pathname
,
963 msg
.u
.snapshot_channel
.relayd_id
,
964 msg
.u
.snapshot_channel
.nb_packets_per_stream
,
967 ERR("Snapshot channel failed");
968 ret_code
= LTTCOMM_CONSUMERD_SNAPSHOT_FAILED
;
971 pthread_mutex_unlock(&channel
->lock
);
973 health_code_update();
975 ret
= consumer_send_status_msg(sock
, ret_code
);
977 /* Somehow, the session daemon is not responding anymore. */
982 case LTTNG_CONSUMER_DESTROY_CHANNEL
:
984 uint64_t key
= msg
.u
.destroy_channel
.key
;
985 struct lttng_consumer_channel
*channel
;
987 channel
= consumer_find_channel(key
);
989 ERR("Kernel consumer destroy channel %" PRIu64
" not found", key
);
990 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
993 health_code_update();
995 ret
= consumer_send_status_msg(sock
, ret_code
);
997 /* Somehow, the session daemon is not responding anymore. */
998 goto end_destroy_channel
;
1001 health_code_update();
1003 /* Stop right now if no channel was found. */
1005 goto end_destroy_channel
;
1009 * This command should ONLY be issued for channel with streams set in
1012 assert(!channel
->monitor
);
1015 * The refcount should ALWAYS be 0 in the case of a channel in no
1018 assert(!uatomic_sub_return(&channel
->refcount
, 1));
1020 consumer_del_channel(channel
);
1021 end_destroy_channel
:
1024 case LTTNG_CONSUMER_DISCARDED_EVENTS
:
1028 struct lttng_consumer_channel
*channel
;
1029 uint64_t id
= msg
.u
.discarded_events
.session_id
;
1030 uint64_t key
= msg
.u
.discarded_events
.channel_key
;
1032 DBG("Kernel consumer discarded events command for session id %"
1033 PRIu64
", channel key %" PRIu64
, id
, key
);
1035 channel
= consumer_find_channel(key
);
1037 ERR("Kernel consumer discarded events channel %"
1038 PRIu64
" not found", key
);
1041 count
= channel
->discarded_events
;
1044 health_code_update();
1046 /* Send back returned value to session daemon */
1047 ret
= lttcomm_send_unix_sock(sock
, &count
, sizeof(count
));
1049 PERROR("send discarded events");
1055 case LTTNG_CONSUMER_LOST_PACKETS
:
1059 struct lttng_consumer_channel
*channel
;
1060 uint64_t id
= msg
.u
.lost_packets
.session_id
;
1061 uint64_t key
= msg
.u
.lost_packets
.channel_key
;
1063 DBG("Kernel consumer lost packets command for session id %"
1064 PRIu64
", channel key %" PRIu64
, id
, key
);
1066 channel
= consumer_find_channel(key
);
1068 ERR("Kernel consumer lost packets channel %"
1069 PRIu64
" not found", key
);
1072 count
= channel
->lost_packets
;
1075 health_code_update();
1077 /* Send back returned value to session daemon */
1078 ret
= lttcomm_send_unix_sock(sock
, &count
, sizeof(count
));
1080 PERROR("send lost packets");
1086 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE
:
1088 int channel_monitor_pipe
;
1090 ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
1091 /* Successfully received the command's type. */
1092 ret
= consumer_send_status_msg(sock
, ret_code
);
1097 ret
= lttcomm_recv_fds_unix_sock(sock
, &channel_monitor_pipe
,
1099 if (ret
!= sizeof(channel_monitor_pipe
)) {
1100 ERR("Failed to receive channel monitor pipe");
1104 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe
);
1105 ret
= consumer_timer_thread_set_channel_monitor_pipe(
1106 channel_monitor_pipe
);
1110 ret_code
= LTTCOMM_CONSUMERD_SUCCESS
;
1111 /* Set the pipe as non-blocking. */
1112 ret
= fcntl(channel_monitor_pipe
, F_GETFL
, 0);
1114 PERROR("fcntl get flags of the channel monitoring pipe");
1119 ret
= fcntl(channel_monitor_pipe
, F_SETFL
,
1120 flags
| O_NONBLOCK
);
1122 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1125 DBG("Channel monitor pipe set as non-blocking");
1127 ret_code
= LTTCOMM_CONSUMERD_ALREADY_SET
;
1129 ret
= consumer_send_status_msg(sock
, ret_code
);
1135 case LTTNG_CONSUMER_ROTATE_CHANNEL
:
1137 struct lttng_consumer_channel
*channel
;
1138 uint64_t key
= msg
.u
.rotate_channel
.key
;
1140 DBG("Consumer rotate channel %" PRIu64
, key
);
1142 channel
= consumer_find_channel(key
);
1144 ERR("Channel %" PRIu64
" not found", key
);
1145 ret_code
= LTTCOMM_CONSUMERD_CHAN_NOT_FOUND
;
1148 * Sample the rotate position of all the streams in this channel.
1150 ret
= lttng_consumer_rotate_channel(channel
, key
,
1151 msg
.u
.rotate_channel
.relayd_id
,
1152 msg
.u
.rotate_channel
.metadata
,
1155 ERR("Rotate channel failed");
1156 ret_code
= LTTCOMM_CONSUMERD_ROTATION_FAIL
;
1159 health_code_update();
1161 ret
= consumer_send_status_msg(sock
, ret_code
);
1163 /* Somehow, the session daemon is not responding anymore. */
1164 goto error_rotate_channel
;
1167 /* Rotate the streams that are ready right now. */
1168 ret
= lttng_consumer_rotate_ready_streams(
1171 ERR("Rotate ready streams failed");
1175 error_rotate_channel
:
1178 case LTTNG_CONSUMER_INIT
:
1180 ret_code
= lttng_consumer_init_command(ctx
,
1181 msg
.u
.init
.sessiond_uuid
);
1182 health_code_update();
1183 ret
= consumer_send_status_msg(sock
, ret_code
);
1185 /* Somehow, the session daemon is not responding anymore. */
1190 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK
:
1192 const struct lttng_credentials credentials
= {
1193 .uid
= msg
.u
.create_trace_chunk
.credentials
.value
.uid
,
1194 .gid
= msg
.u
.create_trace_chunk
.credentials
.value
.gid
,
1196 const bool is_local_trace
=
1197 !msg
.u
.create_trace_chunk
.relayd_id
.is_set
;
1198 const uint64_t relayd_id
=
1199 msg
.u
.create_trace_chunk
.relayd_id
.value
;
1200 const char *chunk_override_name
=
1201 *msg
.u
.create_trace_chunk
.override_name
?
1202 msg
.u
.create_trace_chunk
.override_name
:
1204 LTTNG_OPTIONAL(struct lttng_directory_handle
) chunk_directory_handle
=
1205 LTTNG_OPTIONAL_INIT
;
1208 * The session daemon will only provide a chunk directory file
1209 * descriptor for local traces.
1211 if (is_local_trace
) {
1214 /* Acnowledge the reception of the command. */
1215 ret
= consumer_send_status_msg(sock
,
1216 LTTCOMM_CONSUMERD_SUCCESS
);
1218 /* Somehow, the session daemon is not responding anymore. */
1222 ret
= lttcomm_recv_fds_unix_sock(sock
, &chunk_dirfd
, 1);
1223 if (ret
!= sizeof(chunk_dirfd
)) {
1224 ERR("Failed to receive trace chunk directory file descriptor");
1228 DBG("Received trace chunk directory fd (%d)",
1230 ret
= lttng_directory_handle_init_from_dirfd(
1231 &chunk_directory_handle
.value
,
1234 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1235 if (close(chunk_dirfd
)) {
1236 PERROR("Failed to close chunk directory file descriptor");
1240 chunk_directory_handle
.is_set
= true;
1243 ret_code
= lttng_consumer_create_trace_chunk(
1244 !is_local_trace
? &relayd_id
: NULL
,
1245 msg
.u
.create_trace_chunk
.session_id
,
1246 msg
.u
.create_trace_chunk
.chunk_id
,
1247 (time_t) msg
.u
.create_trace_chunk
1248 .creation_timestamp
,
1249 chunk_override_name
,
1250 msg
.u
.create_trace_chunk
.credentials
.is_set
?
1253 chunk_directory_handle
.is_set
?
1254 &chunk_directory_handle
.value
:
1257 if (chunk_directory_handle
.is_set
) {
1258 lttng_directory_handle_fini(
1259 &chunk_directory_handle
.value
);
1261 goto end_msg_sessiond
;
1263 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK
:
1265 enum lttng_trace_chunk_command_type close_command
=
1266 msg
.u
.close_trace_chunk
.close_command
.value
;
1267 const uint64_t relayd_id
=
1268 msg
.u
.close_trace_chunk
.relayd_id
.value
;
1269 struct lttcomm_consumer_close_trace_chunk_reply reply
;
1270 char path
[LTTNG_PATH_MAX
];
1272 ret_code
= lttng_consumer_close_trace_chunk(
1273 msg
.u
.close_trace_chunk
.relayd_id
.is_set
?
1276 msg
.u
.close_trace_chunk
.session_id
,
1277 msg
.u
.close_trace_chunk
.chunk_id
,
1278 (time_t) msg
.u
.close_trace_chunk
.close_timestamp
,
1279 msg
.u
.close_trace_chunk
.close_command
.is_set
?
1282 reply
.ret_code
= ret_code
;
1283 reply
.path_length
= strlen(path
) + 1;
1284 ret
= lttcomm_send_unix_sock(sock
, &reply
, sizeof(reply
));
1285 if (ret
!= sizeof(reply
)) {
1288 ret
= lttcomm_send_unix_sock(sock
, path
, reply
.path_length
);
1289 if (ret
!= reply
.path_length
) {
1294 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS
:
1296 const uint64_t relayd_id
=
1297 msg
.u
.trace_chunk_exists
.relayd_id
.value
;
1299 ret_code
= lttng_consumer_trace_chunk_exists(
1300 msg
.u
.trace_chunk_exists
.relayd_id
.is_set
?
1302 msg
.u
.trace_chunk_exists
.session_id
,
1303 msg
.u
.trace_chunk_exists
.chunk_id
);
1304 goto end_msg_sessiond
;
1312 * Return 1 to indicate success since the 0 value can be a socket
1313 * shutdown during the recv() or send() call.
1318 /* This will issue a consumer stop. */
1323 * The returned value here is not useful since either way we'll return 1 to
1324 * the caller because the session daemon socket management is done
1325 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1327 ret
= consumer_send_status_msg(sock
, ret_code
);
1333 health_code_update();
1339 * Sync metadata meaning request them to the session daemon and snapshot to the
1340 * metadata thread can consumer them.
1342 * Metadata stream lock MUST be acquired.
1344 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1345 * is empty or a negative value on error.
1347 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream
*metadata
)
1353 ret
= kernctl_buffer_flush(metadata
->wait_fd
);
1355 ERR("Failed to flush kernel stream");
1359 ret
= kernctl_snapshot(metadata
->wait_fd
);
1361 if (ret
!= -EAGAIN
) {
1362 ERR("Sync metadata, taking kernel snapshot failed.");
1365 DBG("Sync metadata, no new kernel metadata");
1366 /* No new metadata, exit. */
1376 int extract_common_subbuffer_info(struct lttng_consumer_stream
*stream
,
1377 struct stream_subbuffer
*subbuf
)
1381 ret
= kernctl_get_subbuf_size(
1382 stream
->wait_fd
, &subbuf
->info
.data
.subbuf_size
);
1387 ret
= kernctl_get_padded_subbuf_size(
1388 stream
->wait_fd
, &subbuf
->info
.data
.padded_subbuf_size
);
1398 int extract_metadata_subbuffer_info(struct lttng_consumer_stream
*stream
,
1399 struct stream_subbuffer
*subbuf
)
1403 ret
= extract_common_subbuffer_info(stream
, subbuf
);
1408 ret
= kernctl_get_metadata_version(
1409 stream
->wait_fd
, &subbuf
->info
.metadata
.version
);
1419 int extract_data_subbuffer_info(struct lttng_consumer_stream
*stream
,
1420 struct stream_subbuffer
*subbuf
)
1424 ret
= extract_common_subbuffer_info(stream
, subbuf
);
1429 ret
= kernctl_get_packet_size(
1430 stream
->wait_fd
, &subbuf
->info
.data
.packet_size
);
1432 PERROR("Failed to get sub-buffer packet size");
1436 ret
= kernctl_get_content_size(
1437 stream
->wait_fd
, &subbuf
->info
.data
.content_size
);
1439 PERROR("Failed to get sub-buffer content size");
1443 ret
= kernctl_get_timestamp_begin(
1444 stream
->wait_fd
, &subbuf
->info
.data
.timestamp_begin
);
1446 PERROR("Failed to get sub-buffer begin timestamp");
1450 ret
= kernctl_get_timestamp_end(
1451 stream
->wait_fd
, &subbuf
->info
.data
.timestamp_end
);
1453 PERROR("Failed to get sub-buffer end timestamp");
1457 ret
= kernctl_get_events_discarded(
1458 stream
->wait_fd
, &subbuf
->info
.data
.events_discarded
);
1460 PERROR("Failed to get sub-buffer events discarded count");
1464 ret
= kernctl_get_sequence_number(stream
->wait_fd
,
1465 &subbuf
->info
.data
.sequence_number
.value
);
1467 /* May not be supported by older LTTng-modules. */
1468 if (ret
!= -ENOTTY
) {
1469 PERROR("Failed to get sub-buffer sequence number");
1473 subbuf
->info
.data
.sequence_number
.is_set
= true;
1476 ret
= kernctl_get_stream_id(
1477 stream
->wait_fd
, &subbuf
->info
.data
.stream_id
);
1479 PERROR("Failed to get stream id");
1483 ret
= kernctl_get_instance_id(stream
->wait_fd
,
1484 &subbuf
->info
.data
.stream_instance_id
.value
);
1486 /* May not be supported by older LTTng-modules. */
1487 if (ret
!= -ENOTTY
) {
1488 PERROR("Failed to get stream instance id");
1492 subbuf
->info
.data
.stream_instance_id
.is_set
= true;
1499 int get_subbuffer_common(struct lttng_consumer_stream
*stream
,
1500 struct stream_subbuffer
*subbuffer
)
1504 ret
= kernctl_get_next_subbuf(stream
->wait_fd
);
1509 ret
= stream
->read_subbuffer_ops
.extract_subbuffer_info(
1516 int get_next_subbuffer_splice(struct lttng_consumer_stream
*stream
,
1517 struct stream_subbuffer
*subbuffer
)
1521 ret
= get_subbuffer_common(stream
, subbuffer
);
1526 subbuffer
->buffer
.fd
= stream
->wait_fd
;
1532 int get_next_subbuffer_mmap(struct lttng_consumer_stream
*stream
,
1533 struct stream_subbuffer
*subbuffer
)
1538 ret
= get_subbuffer_common(stream
, subbuffer
);
1543 ret
= get_current_subbuf_addr(stream
, &addr
);
1548 subbuffer
->buffer
.buffer
= lttng_buffer_view_init(
1549 addr
, 0, subbuffer
->info
.data
.padded_subbuf_size
);
1555 int put_next_subbuffer(struct lttng_consumer_stream
*stream
,
1556 struct stream_subbuffer
*subbuffer
)
1558 const int ret
= kernctl_put_next_subbuf(stream
->wait_fd
);
1561 if (ret
== -EFAULT
) {
1562 PERROR("Error in unreserving sub buffer");
1563 } else if (ret
== -EIO
) {
1564 /* Should never happen with newer LTTng versions */
1565 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted");
1572 static void lttng_kconsumer_set_stream_ops(
1573 struct lttng_consumer_stream
*stream
)
1575 if (stream
->chan
->output
== CONSUMER_CHANNEL_MMAP
) {
1576 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1577 get_next_subbuffer_mmap
;
1579 stream
->read_subbuffer_ops
.get_next_subbuffer
=
1580 get_next_subbuffer_splice
;
1583 if (stream
->metadata_flag
) {
1584 stream
->read_subbuffer_ops
.extract_subbuffer_info
=
1585 extract_metadata_subbuffer_info
;
1587 stream
->read_subbuffer_ops
.extract_subbuffer_info
=
1588 extract_data_subbuffer_info
;
1589 if (stream
->chan
->is_live
) {
1590 stream
->read_subbuffer_ops
.send_live_beacon
=
1591 consumer_flush_kernel_index
;
1595 stream
->read_subbuffer_ops
.put_next_subbuffer
= put_next_subbuffer
;
1598 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
1605 * Don't create anything if this is set for streaming or if there is
1606 * no current trace chunk on the parent channel.
1608 if (stream
->net_seq_idx
== (uint64_t) -1ULL && stream
->chan
->monitor
&&
1609 stream
->chan
->trace_chunk
) {
1610 ret
= consumer_stream_create_output_files(stream
, true);
1616 if (stream
->output
== LTTNG_EVENT_MMAP
) {
1617 /* get the len of the mmap region */
1618 unsigned long mmap_len
;
1620 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
1622 PERROR("kernctl_get_mmap_len");
1623 goto error_close_fd
;
1625 stream
->mmap_len
= (size_t) mmap_len
;
1627 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
, PROT_READ
,
1628 MAP_PRIVATE
, stream
->wait_fd
, 0);
1629 if (stream
->mmap_base
== MAP_FAILED
) {
1630 PERROR("Error mmaping");
1632 goto error_close_fd
;
1636 lttng_kconsumer_set_stream_ops(stream
);
1638 /* we return 0 to let the library handle the FD internally */
1642 if (stream
->out_fd
>= 0) {
1645 err
= close(stream
->out_fd
);
1647 stream
->out_fd
= -1;
1654 * Check if data is still being extracted from the buffers for a specific
1655 * stream. Consumer data lock MUST be acquired before calling this function
1656 * and the stream lock.
1658 * Return 1 if the traced data are still getting read else 0 meaning that the
1659 * data is available for trace viewer reading.
1661 int lttng_kconsumer_data_pending(struct lttng_consumer_stream
*stream
)
1667 if (stream
->endpoint_status
!= CONSUMER_ENDPOINT_ACTIVE
) {
1672 ret
= kernctl_get_next_subbuf(stream
->wait_fd
);
1674 /* There is still data so let's put back this subbuffer. */
1675 ret
= kernctl_put_subbuf(stream
->wait_fd
);
1677 ret
= 1; /* Data is pending */
1681 /* Data is NOT pending and ready to be read. */