2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 #include <sys/socket.h>
27 #include <sys/types.h>
31 #include <common/common.h>
32 #include <common/kernel-ctl/kernel-ctl.h>
33 #include <common/sessiond-comm/sessiond-comm.h>
34 #include <common/sessiond-comm/relayd.h>
35 #include <common/compat/fcntl.h>
36 #include <common/relayd/relayd.h>
38 #include "kernel-consumer.h"
40 extern struct lttng_consumer_global_data consumer_data
;
41 extern int consumer_poll_timeout
;
42 extern volatile int consumer_quit
;
45 * Mmap the ring buffer, read it and write the data to the tracefile.
47 * Returns the number of bytes written
49 ssize_t
lttng_kconsumer_on_read_subbuffer_mmap(
50 struct lttng_consumer_local_data
*ctx
,
51 struct lttng_consumer_stream
*stream
, unsigned long len
)
53 unsigned long mmap_offset
;
54 ssize_t ret
= 0, written
= 0;
55 off_t orig_offset
= stream
->out_fd_offset
;
56 int fd
= stream
->wait_fd
;
57 /* Default is on the disk */
58 int outfd
= stream
->out_fd
;
60 struct consumer_relayd_sock_pair
*relayd
= NULL
;
62 /* Flag that the current stream if set for network streaming. */
63 if (stream
->net_seq_idx
!= -1) {
64 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
70 /* get the offset inside the fd to mmap */
71 ret
= kernctl_get_mmap_read_offset(fd
, &mmap_offset
);
74 perror("kernctl_get_mmap_read_offset");
79 /* RCU lock for the relayd pointer */
82 /* Handle stream on the relayd if the output is on the network */
85 * Lock the control socket for the complete duration of the function
86 * since from this point on we will use the socket.
88 if (stream
->metadata_flag
) {
89 /* Metadata requires the control socket. */
90 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
93 ret
= consumer_handle_stream_before_relayd(stream
, len
);
95 /* Use the returned socket. */
98 /* Write metadata stream id before payload */
99 if (stream
->metadata_flag
) {
100 metadata_id
= htobe64(stream
->relayd_stream_id
);
102 ret
= write(outfd
, (void *) &metadata_id
,
103 sizeof(stream
->relayd_stream_id
));
105 PERROR("write metadata stream id");
109 } while (errno
== EINTR
);
110 DBG("Metadata stream id %zu written before data",
111 stream
->relayd_stream_id
);
113 * We do this so the return value can match the len passed as
114 * argument to this function.
116 written
-= sizeof(stream
->relayd_stream_id
);
119 /* Else, use the default set before which is the filesystem. */
123 ret
= write(outfd
, stream
->mmap_base
+ mmap_offset
, len
);
125 if (errno
== EINTR
) {
126 /* restart the interrupted system call */
129 perror("Error in file write");
135 } else if (ret
> len
) {
136 perror("Error in file write");
144 /* This call is useless on a socket so better save a syscall. */
146 /* This won't block, but will start writeout asynchronously */
147 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret
,
148 SYNC_FILE_RANGE_WRITE
);
149 stream
->out_fd_offset
+= ret
;
153 lttng_consumer_sync_trace_file(stream
, orig_offset
);
156 /* Unlock only if ctrl socket used */
157 if (relayd
&& stream
->metadata_flag
) {
158 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
167 * Splice the data from the ring buffer to the tracefile.
169 * Returns the number of bytes spliced.
171 ssize_t
lttng_kconsumer_on_read_subbuffer_splice(
172 struct lttng_consumer_local_data
*ctx
,
173 struct lttng_consumer_stream
*stream
, unsigned long len
)
175 ssize_t ret
= 0, written
= 0, ret_splice
= 0;
177 off_t orig_offset
= stream
->out_fd_offset
;
178 int fd
= stream
->wait_fd
;
179 /* Default is on the disk */
180 int outfd
= stream
->out_fd
;
181 uint64_t metadata_id
;
182 struct consumer_relayd_sock_pair
*relayd
= NULL
;
184 /* Flag that the current stream if set for network streaming. */
185 if (stream
->net_seq_idx
!= -1) {
186 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
187 if (relayd
== NULL
) {
192 /* RCU lock for the relayd pointer */
195 /* Write metadata stream id before payload */
196 if (stream
->metadata_flag
&& relayd
) {
198 * Lock the control socket for the complete duration of the function
199 * since from this point on we will use the socket.
201 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
204 metadata_id
= htobe64(stream
->relayd_stream_id
);
205 ret
= write(ctx
->consumer_thread_pipe
[1],
206 (void *) &metadata_id
,
207 sizeof(stream
->relayd_stream_id
));
209 PERROR("write metadata stream id");
213 } while (errno
== EINTR
);
214 DBG("Metadata stream id %zu written before data",
215 stream
->relayd_stream_id
);
219 DBG("splice chan to pipe offset %lu of len %lu (fd : %d)",
220 (unsigned long)offset
, len
, fd
);
221 ret_splice
= splice(fd
, &offset
, ctx
->consumer_thread_pipe
[1], NULL
, len
,
222 SPLICE_F_MOVE
| SPLICE_F_MORE
);
223 DBG("splice chan to pipe, ret %zd", ret_splice
);
224 if (ret_splice
< 0) {
225 perror("Error in relay splice");
227 written
= ret_splice
;
233 /* Handle stream on the relayd if the output is on the network */
235 if (stream
->metadata_flag
) {
236 /* Update counter to fit the spliced data */
237 ret_splice
+= sizeof(stream
->relayd_stream_id
);
238 len
+= sizeof(stream
->relayd_stream_id
);
240 * We do this so the return value can match the len passed as
241 * argument to this function.
243 written
-= sizeof(stream
->relayd_stream_id
);
246 ret
= consumer_handle_stream_before_relayd(stream
, ret_splice
);
248 /* Use the returned socket. */
252 ERR("Remote relayd disconnected. Stopping");
258 DBG3("Kernel consumer splice data in %d to out %d",
259 ctx
->consumer_thread_pipe
[0], outfd
);
260 ret_splice
= splice(ctx
->consumer_thread_pipe
[0], NULL
, outfd
, NULL
,
261 ret_splice
, SPLICE_F_MOVE
| SPLICE_F_MORE
);
262 DBG("splice pipe to file, ret %zd", ret_splice
);
263 if (ret_splice
< 0) {
264 perror("Error in file splice");
266 written
= ret_splice
;
271 if (ret_splice
> len
) {
273 PERROR("Wrote more data than requested %zd (len: %lu)",
275 written
+= ret_splice
;
281 /* This call is useless on a socket so better save a syscall. */
283 /* This won't block, but will start writeout asynchronously */
284 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret_splice
,
285 SYNC_FILE_RANGE_WRITE
);
286 stream
->out_fd_offset
+= ret_splice
;
288 written
+= ret_splice
;
290 lttng_consumer_sync_trace_file(stream
, orig_offset
);
297 /* send the appropriate error description to sessiond */
300 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EBADF
);
303 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EINVAL
);
306 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ENOMEM
);
309 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ESPIPE
);
314 if (relayd
&& stream
->metadata_flag
) {
315 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
324 * Take a snapshot for a specific fd
326 * Returns 0 on success, < 0 on error
328 int lttng_kconsumer_take_snapshot(struct lttng_consumer_local_data
*ctx
,
329 struct lttng_consumer_stream
*stream
)
332 int infd
= stream
->wait_fd
;
334 ret
= kernctl_snapshot(infd
);
337 perror("Getting sub-buffer snapshot.");
344 * Get the produced position
346 * Returns 0 on success, < 0 on error
348 int lttng_kconsumer_get_produced_snapshot(
349 struct lttng_consumer_local_data
*ctx
,
350 struct lttng_consumer_stream
*stream
,
354 int infd
= stream
->wait_fd
;
356 ret
= kernctl_snapshot_get_produced(infd
, pos
);
359 perror("kernctl_snapshot_get_produced");
365 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
366 int sock
, struct pollfd
*consumer_sockpoll
)
369 struct lttcomm_consumer_msg msg
;
371 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
372 if (ret
!= sizeof(msg
)) {
373 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_CMD
);
376 if (msg
.cmd_type
== LTTNG_CONSUMER_STOP
) {
380 switch (msg
.cmd_type
) {
381 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
384 struct consumer_relayd_sock_pair
*relayd
;
386 DBG("Consumer adding relayd socket");
388 /* Get relayd reference if exists. */
389 relayd
= consumer_find_relayd(msg
.u
.relayd_sock
.net_index
);
390 if (relayd
== NULL
) {
391 /* Not found. Allocate one. */
392 relayd
= consumer_allocate_relayd_sock_pair(
393 msg
.u
.relayd_sock
.net_index
);
394 if (relayd
== NULL
) {
395 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
400 /* Poll on consumer socket. */
401 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
405 /* Get relayd socket from session daemon */
406 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
407 if (ret
!= sizeof(fd
)) {
408 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
412 /* Copy socket information and received FD */
413 switch (msg
.u
.relayd_sock
.type
) {
414 case LTTNG_STREAM_CONTROL
:
415 /* Copy received lttcomm socket */
416 lttcomm_copy_sock(&relayd
->control_sock
, &msg
.u
.relayd_sock
.sock
);
418 ret
= lttcomm_create_sock(&relayd
->control_sock
);
423 /* Close the created socket fd which is useless */
424 close(relayd
->control_sock
.fd
);
426 /* Assign new file descriptor */
427 relayd
->control_sock
.fd
= fd
;
429 case LTTNG_STREAM_DATA
:
430 /* Copy received lttcomm socket */
431 lttcomm_copy_sock(&relayd
->data_sock
, &msg
.u
.relayd_sock
.sock
);
432 ret
= lttcomm_create_sock(&relayd
->data_sock
);
437 /* Close the created socket fd which is useless */
438 close(relayd
->data_sock
.fd
);
440 /* Assign new file descriptor */
441 relayd
->data_sock
.fd
= fd
;
444 ERR("Unknown relayd socket type");
448 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
449 msg
.u
.relayd_sock
.type
== LTTNG_STREAM_CONTROL
? "control" : "data",
450 relayd
->net_seq_idx
, fd
);
453 * Add relayd socket pair to consumer data hashtable. If object already
454 * exists or on error, the function gracefully returns.
456 consumer_add_relayd(relayd
);
460 case LTTNG_CONSUMER_ADD_CHANNEL
:
462 struct lttng_consumer_channel
*new_channel
;
464 DBG("consumer_add_channel %d", msg
.u
.channel
.channel_key
);
465 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
467 msg
.u
.channel
.mmap_len
,
468 msg
.u
.channel
.max_sb_size
);
469 if (new_channel
== NULL
) {
470 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
473 if (ctx
->on_recv_channel
!= NULL
) {
474 ret
= ctx
->on_recv_channel(new_channel
);
476 consumer_add_channel(new_channel
);
477 } else if (ret
< 0) {
481 consumer_add_channel(new_channel
);
485 case LTTNG_CONSUMER_ADD_STREAM
:
488 struct consumer_relayd_sock_pair
*relayd
= NULL
;
489 struct lttng_consumer_stream
*new_stream
;
492 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
496 /* Get stream file descriptor from socket */
497 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
498 if (ret
!= sizeof(fd
)) {
499 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
503 new_stream
= consumer_allocate_stream(msg
.u
.stream
.channel_key
,
504 msg
.u
.stream
.stream_key
,
507 msg
.u
.stream
.mmap_len
,
509 msg
.u
.stream
.path_name
,
512 msg
.u
.stream
.net_index
,
513 msg
.u
.stream
.metadata_flag
);
514 if (new_stream
== NULL
) {
515 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
519 /* The stream is not metadata. Get relayd reference if exists. */
520 relayd
= consumer_find_relayd(msg
.u
.stream
.net_index
);
521 if (relayd
!= NULL
) {
522 /* Add stream on the relayd */
523 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
524 ret
= relayd_add_stream(&relayd
->control_sock
,
525 msg
.u
.stream
.name
, msg
.u
.stream
.path_name
,
526 &new_stream
->relayd_stream_id
);
527 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
531 } else if (msg
.u
.stream
.net_index
!= -1) {
532 ERR("Network sequence index %d unknown. Not adding stream.",
533 msg
.u
.stream
.net_index
);
538 if (ctx
->on_recv_stream
!= NULL
) {
539 ret
= ctx
->on_recv_stream(new_stream
);
541 consumer_add_stream(new_stream
);
542 } else if (ret
< 0) {
546 consumer_add_stream(new_stream
);
549 DBG("Kernel consumer_add_stream (%d)", fd
);
552 case LTTNG_CONSUMER_UPDATE_STREAM
:
554 if (ctx
->on_update_stream
!= NULL
) {
555 ret
= ctx
->on_update_stream(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
557 consumer_change_stream_state(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
558 } else if (ret
< 0) {
562 consumer_change_stream_state(msg
.u
.stream
.stream_key
,
572 * Wake-up the other end by writing a null byte in the pipe
573 * (non-blocking). Important note: Because writing into the
574 * pipe is non-blocking (and therefore we allow dropping wakeup
575 * data, as long as there is wakeup data present in the pipe
576 * buffer to wake up the other end), the other end should
577 * perform the following sequence for waiting:
578 * 1) empty the pipe (reads).
579 * 2) perform update operation.
580 * 3) wait on the pipe (poll).
583 ret
= write(ctx
->consumer_poll_pipe
[1], "", 1);
584 } while (ret
== -1UL && errno
== EINTR
);
590 * Consume data on a file descriptor and write it on a trace file.
592 ssize_t
lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream
*stream
,
593 struct lttng_consumer_local_data
*ctx
)
598 int infd
= stream
->wait_fd
;
600 DBG("In read_subbuffer (infd : %d)", infd
);
601 /* Get the next subbuffer */
602 err
= kernctl_get_next_subbuf(infd
);
605 * This is a debug message even for single-threaded consumer,
606 * because poll() have more relaxed criterions than get subbuf,
607 * so get_subbuf may fail for short race windows where poll()
608 * would issue wakeups.
610 DBG("Reserving sub buffer failed (everything is normal, "
611 "it is due to concurrency)");
615 switch (stream
->output
) {
616 case LTTNG_EVENT_SPLICE
:
617 /* read the whole subbuffer */
618 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
621 perror("Getting sub-buffer len failed.");
625 /* splice the subbuffer to the tracefile */
626 ret
= lttng_consumer_on_read_subbuffer_splice(ctx
, stream
, len
);
629 * display the error but continue processing to try
630 * to release the subbuffer
632 ERR("Error splicing to tracefile (ret: %ld != len: %ld)",
637 case LTTNG_EVENT_MMAP
:
638 /* read the used subbuffer size */
639 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
642 perror("Getting sub-buffer len failed.");
645 /* write the subbuffer to the tracefile */
646 ret
= lttng_consumer_on_read_subbuffer_mmap(ctx
, stream
, len
);
649 * display the error but continue processing to try
650 * to release the subbuffer
652 ERR("Error writing to tracefile");
656 ERR("Unknown output method");
660 err
= kernctl_put_next_subbuf(infd
);
663 if (errno
== EFAULT
) {
664 perror("Error in unreserving sub buffer\n");
665 } else if (errno
== EIO
) {
666 /* Should never happen with newer LTTng versions */
667 perror("Reader has been pushed by the writer, last sub-buffer corrupted.");
676 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
680 /* Opening the tracefile in write mode */
681 if (strlen(stream
->path_name
) > 0 && stream
->net_seq_idx
== -1) {
682 ret
= run_as_open(stream
->path_name
,
683 O_WRONLY
|O_CREAT
|O_TRUNC
,
684 S_IRWXU
|S_IRWXG
|S_IRWXO
,
685 stream
->uid
, stream
->gid
);
687 ERR("Opening %s", stream
->path_name
);
691 stream
->out_fd
= ret
;
694 if (stream
->output
== LTTNG_EVENT_MMAP
) {
695 /* get the len of the mmap region */
696 unsigned long mmap_len
;
698 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
701 perror("kernctl_get_mmap_len");
704 stream
->mmap_len
= (size_t) mmap_len
;
706 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
,
707 PROT_READ
, MAP_PRIVATE
, stream
->wait_fd
, 0);
708 if (stream
->mmap_base
== MAP_FAILED
) {
709 perror("Error mmaping");
715 /* we return 0 to let the library handle the FD internally */
722 err
= close(stream
->out_fd
);