2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 #include <sys/socket.h>
27 #include <sys/types.h>
31 #include <common/common.h>
32 #include <common/kernel-ctl/kernel-ctl.h>
33 #include <common/sessiond-comm/sessiond-comm.h>
34 #include <common/sessiond-comm/relayd.h>
35 #include <common/compat/fcntl.h>
36 #include <common/relayd/relayd.h>
38 #include "kernel-consumer.h"
40 extern struct lttng_consumer_global_data consumer_data
;
41 extern int consumer_poll_timeout
;
42 extern volatile int consumer_quit
;
45 * Mmap the ring buffer, read it and write the data to the tracefile.
47 * Returns the number of bytes written
49 ssize_t
lttng_kconsumer_on_read_subbuffer_mmap(
50 struct lttng_consumer_local_data
*ctx
,
51 struct lttng_consumer_stream
*stream
, unsigned long len
)
53 unsigned long mmap_offset
;
54 ssize_t ret
= 0, written
= 0;
55 off_t orig_offset
= stream
->out_fd_offset
;
56 int fd
= stream
->wait_fd
;
57 /* Default is on the disk */
58 int outfd
= stream
->out_fd
;
60 struct consumer_relayd_sock_pair
*relayd
= NULL
;
62 /* Flag that the current stream if set for network streaming. */
63 if (stream
->net_seq_idx
!= -1) {
64 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
70 /* get the offset inside the fd to mmap */
71 ret
= kernctl_get_mmap_read_offset(fd
, &mmap_offset
);
74 perror("kernctl_get_mmap_read_offset");
79 /* RCU lock for the relayd pointer */
82 /* Handle stream on the relayd if the output is on the network */
85 * Lock the control socket for the complete duration of the function
86 * since from this point on we will use the socket.
88 if (stream
->metadata_flag
) {
89 /* Metadata requires the control socket. */
90 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
93 ret
= consumer_handle_stream_before_relayd(stream
, len
);
95 /* Use the returned socket. */
98 /* Write metadata stream id before payload */
99 if (stream
->metadata_flag
) {
100 metadata_id
= htobe64(stream
->relayd_stream_id
);
102 ret
= write(outfd
, (void *) &metadata_id
,
103 sizeof(stream
->relayd_stream_id
));
104 } while (ret
< 0 && errno
== EINTR
);
106 PERROR("write metadata stream id");
110 DBG("Metadata stream id %zu written before data",
111 stream
->relayd_stream_id
);
113 * We do this so the return value can match the len passed as
114 * argument to this function.
116 written
-= sizeof(stream
->relayd_stream_id
);
119 /* Else, use the default set before which is the filesystem. */
124 ret
= write(outfd
, stream
->mmap_base
+ mmap_offset
, len
);
125 } while (ret
< 0 && errno
== EINTR
);
127 perror("Error in file write");
132 } else if (ret
> len
) {
133 perror("Error in file write");
141 /* This call is useless on a socket so better save a syscall. */
143 /* This won't block, but will start writeout asynchronously */
144 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret
,
145 SYNC_FILE_RANGE_WRITE
);
146 stream
->out_fd_offset
+= ret
;
150 lttng_consumer_sync_trace_file(stream
, orig_offset
);
153 /* Unlock only if ctrl socket used */
154 if (relayd
&& stream
->metadata_flag
) {
155 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
164 * Splice the data from the ring buffer to the tracefile.
166 * Returns the number of bytes spliced.
168 ssize_t
lttng_kconsumer_on_read_subbuffer_splice(
169 struct lttng_consumer_local_data
*ctx
,
170 struct lttng_consumer_stream
*stream
, unsigned long len
)
172 ssize_t ret
= 0, written
= 0, ret_splice
= 0;
174 off_t orig_offset
= stream
->out_fd_offset
;
175 int fd
= stream
->wait_fd
;
176 /* Default is on the disk */
177 int outfd
= stream
->out_fd
;
178 uint64_t metadata_id
;
179 struct consumer_relayd_sock_pair
*relayd
= NULL
;
181 /* Flag that the current stream if set for network streaming. */
182 if (stream
->net_seq_idx
!= -1) {
183 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
184 if (relayd
== NULL
) {
189 /* RCU lock for the relayd pointer */
192 /* Write metadata stream id before payload */
193 if (stream
->metadata_flag
&& relayd
) {
195 * Lock the control socket for the complete duration of the function
196 * since from this point on we will use the socket.
198 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
200 metadata_id
= htobe64(stream
->relayd_stream_id
);
202 ret
= write(ctx
->consumer_thread_pipe
[1],
203 (void *) &metadata_id
,
204 sizeof(stream
->relayd_stream_id
));
205 } while (ret
< 0 && errno
== EINTR
);
207 PERROR("write metadata stream id");
211 DBG("Metadata stream id %zu written before data",
212 stream
->relayd_stream_id
);
216 DBG("splice chan to pipe offset %lu of len %lu (fd : %d)",
217 (unsigned long)offset
, len
, fd
);
218 ret_splice
= splice(fd
, &offset
, ctx
->consumer_thread_pipe
[1], NULL
, len
,
219 SPLICE_F_MOVE
| SPLICE_F_MORE
);
220 DBG("splice chan to pipe, ret %zd", ret_splice
);
221 if (ret_splice
< 0) {
222 perror("Error in relay splice");
224 written
= ret_splice
;
230 /* Handle stream on the relayd if the output is on the network */
232 if (stream
->metadata_flag
) {
233 /* Update counter to fit the spliced data */
234 ret_splice
+= sizeof(stream
->relayd_stream_id
);
235 len
+= sizeof(stream
->relayd_stream_id
);
237 * We do this so the return value can match the len passed as
238 * argument to this function.
240 written
-= sizeof(stream
->relayd_stream_id
);
243 ret
= consumer_handle_stream_before_relayd(stream
, ret_splice
);
245 /* Use the returned socket. */
249 ERR("Remote relayd disconnected. Stopping");
255 DBG3("Kernel consumer splice data in %d to out %d",
256 ctx
->consumer_thread_pipe
[0], outfd
);
257 ret_splice
= splice(ctx
->consumer_thread_pipe
[0], NULL
, outfd
, NULL
,
258 ret_splice
, SPLICE_F_MOVE
| SPLICE_F_MORE
);
259 DBG("splice pipe to file, ret %zd", ret_splice
);
260 if (ret_splice
< 0) {
261 perror("Error in file splice");
263 written
= ret_splice
;
268 if (ret_splice
> len
) {
270 PERROR("Wrote more data than requested %zd (len: %lu)",
272 written
+= ret_splice
;
278 /* This call is useless on a socket so better save a syscall. */
280 /* This won't block, but will start writeout asynchronously */
281 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret_splice
,
282 SYNC_FILE_RANGE_WRITE
);
283 stream
->out_fd_offset
+= ret_splice
;
285 written
+= ret_splice
;
287 lttng_consumer_sync_trace_file(stream
, orig_offset
);
294 /* send the appropriate error description to sessiond */
297 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EBADF
);
300 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EINVAL
);
303 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ENOMEM
);
306 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ESPIPE
);
311 if (relayd
&& stream
->metadata_flag
) {
312 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
321 * Take a snapshot for a specific fd
323 * Returns 0 on success, < 0 on error
325 int lttng_kconsumer_take_snapshot(struct lttng_consumer_local_data
*ctx
,
326 struct lttng_consumer_stream
*stream
)
329 int infd
= stream
->wait_fd
;
331 ret
= kernctl_snapshot(infd
);
334 perror("Getting sub-buffer snapshot.");
341 * Get the produced position
343 * Returns 0 on success, < 0 on error
345 int lttng_kconsumer_get_produced_snapshot(
346 struct lttng_consumer_local_data
*ctx
,
347 struct lttng_consumer_stream
*stream
,
351 int infd
= stream
->wait_fd
;
353 ret
= kernctl_snapshot_get_produced(infd
, pos
);
356 perror("kernctl_snapshot_get_produced");
362 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
363 int sock
, struct pollfd
*consumer_sockpoll
)
366 struct lttcomm_consumer_msg msg
;
368 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
369 if (ret
!= sizeof(msg
)) {
370 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_CMD
);
373 if (msg
.cmd_type
== LTTNG_CONSUMER_STOP
) {
377 switch (msg
.cmd_type
) {
378 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
381 struct consumer_relayd_sock_pair
*relayd
;
383 DBG("Consumer adding relayd socket");
385 /* Get relayd reference if exists. */
386 relayd
= consumer_find_relayd(msg
.u
.relayd_sock
.net_index
);
387 if (relayd
== NULL
) {
388 /* Not found. Allocate one. */
389 relayd
= consumer_allocate_relayd_sock_pair(
390 msg
.u
.relayd_sock
.net_index
);
391 if (relayd
== NULL
) {
392 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
397 /* Poll on consumer socket. */
398 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
402 /* Get relayd socket from session daemon */
403 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
404 if (ret
!= sizeof(fd
)) {
405 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
409 /* Copy socket information and received FD */
410 switch (msg
.u
.relayd_sock
.type
) {
411 case LTTNG_STREAM_CONTROL
:
412 /* Copy received lttcomm socket */
413 lttcomm_copy_sock(&relayd
->control_sock
, &msg
.u
.relayd_sock
.sock
);
415 ret
= lttcomm_create_sock(&relayd
->control_sock
);
420 /* Close the created socket fd which is useless */
421 close(relayd
->control_sock
.fd
);
423 /* Assign new file descriptor */
424 relayd
->control_sock
.fd
= fd
;
426 case LTTNG_STREAM_DATA
:
427 /* Copy received lttcomm socket */
428 lttcomm_copy_sock(&relayd
->data_sock
, &msg
.u
.relayd_sock
.sock
);
429 ret
= lttcomm_create_sock(&relayd
->data_sock
);
434 /* Close the created socket fd which is useless */
435 close(relayd
->data_sock
.fd
);
437 /* Assign new file descriptor */
438 relayd
->data_sock
.fd
= fd
;
441 ERR("Unknown relayd socket type");
445 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
446 msg
.u
.relayd_sock
.type
== LTTNG_STREAM_CONTROL
? "control" : "data",
447 relayd
->net_seq_idx
, fd
);
450 * Add relayd socket pair to consumer data hashtable. If object already
451 * exists or on error, the function gracefully returns.
453 consumer_add_relayd(relayd
);
457 case LTTNG_CONSUMER_ADD_CHANNEL
:
459 struct lttng_consumer_channel
*new_channel
;
461 DBG("consumer_add_channel %d", msg
.u
.channel
.channel_key
);
462 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
464 msg
.u
.channel
.mmap_len
,
465 msg
.u
.channel
.max_sb_size
);
466 if (new_channel
== NULL
) {
467 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
470 if (ctx
->on_recv_channel
!= NULL
) {
471 ret
= ctx
->on_recv_channel(new_channel
);
473 consumer_add_channel(new_channel
);
474 } else if (ret
< 0) {
478 consumer_add_channel(new_channel
);
482 case LTTNG_CONSUMER_ADD_STREAM
:
485 struct consumer_relayd_sock_pair
*relayd
= NULL
;
486 struct lttng_consumer_stream
*new_stream
;
489 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
493 /* Get stream file descriptor from socket */
494 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
495 if (ret
!= sizeof(fd
)) {
496 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
500 new_stream
= consumer_allocate_stream(msg
.u
.stream
.channel_key
,
501 msg
.u
.stream
.stream_key
,
504 msg
.u
.stream
.mmap_len
,
506 msg
.u
.stream
.path_name
,
509 msg
.u
.stream
.net_index
,
510 msg
.u
.stream
.metadata_flag
);
511 if (new_stream
== NULL
) {
512 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
516 /* The stream is not metadata. Get relayd reference if exists. */
517 relayd
= consumer_find_relayd(msg
.u
.stream
.net_index
);
518 if (relayd
!= NULL
) {
519 /* Add stream on the relayd */
520 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
521 ret
= relayd_add_stream(&relayd
->control_sock
,
522 msg
.u
.stream
.name
, msg
.u
.stream
.path_name
,
523 &new_stream
->relayd_stream_id
);
524 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
528 } else if (msg
.u
.stream
.net_index
!= -1) {
529 ERR("Network sequence index %d unknown. Not adding stream.",
530 msg
.u
.stream
.net_index
);
535 if (ctx
->on_recv_stream
!= NULL
) {
536 ret
= ctx
->on_recv_stream(new_stream
);
538 consumer_add_stream(new_stream
);
539 } else if (ret
< 0) {
543 consumer_add_stream(new_stream
);
546 DBG("Kernel consumer_add_stream (%d)", fd
);
549 case LTTNG_CONSUMER_UPDATE_STREAM
:
551 if (ctx
->on_update_stream
!= NULL
) {
552 ret
= ctx
->on_update_stream(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
554 consumer_change_stream_state(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
555 } else if (ret
< 0) {
559 consumer_change_stream_state(msg
.u
.stream
.stream_key
,
569 * Wake-up the other end by writing a null byte in the pipe
570 * (non-blocking). Important note: Because writing into the
571 * pipe is non-blocking (and therefore we allow dropping wakeup
572 * data, as long as there is wakeup data present in the pipe
573 * buffer to wake up the other end), the other end should
574 * perform the following sequence for waiting:
575 * 1) empty the pipe (reads).
576 * 2) perform update operation.
577 * 3) wait on the pipe (poll).
580 ret
= write(ctx
->consumer_poll_pipe
[1], "", 1);
581 } while (ret
< 0 && errno
== EINTR
);
587 * Consume data on a file descriptor and write it on a trace file.
589 ssize_t
lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream
*stream
,
590 struct lttng_consumer_local_data
*ctx
)
595 int infd
= stream
->wait_fd
;
597 DBG("In read_subbuffer (infd : %d)", infd
);
598 /* Get the next subbuffer */
599 err
= kernctl_get_next_subbuf(infd
);
602 * This is a debug message even for single-threaded consumer,
603 * because poll() have more relaxed criterions than get subbuf,
604 * so get_subbuf may fail for short race windows where poll()
605 * would issue wakeups.
607 DBG("Reserving sub buffer failed (everything is normal, "
608 "it is due to concurrency)");
612 switch (stream
->output
) {
613 case LTTNG_EVENT_SPLICE
:
614 /* read the whole subbuffer */
615 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
618 perror("Getting sub-buffer len failed.");
622 /* splice the subbuffer to the tracefile */
623 ret
= lttng_consumer_on_read_subbuffer_splice(ctx
, stream
, len
);
626 * display the error but continue processing to try
627 * to release the subbuffer
629 ERR("Error splicing to tracefile (ret: %ld != len: %ld)",
634 case LTTNG_EVENT_MMAP
:
635 /* read the used subbuffer size */
636 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
639 perror("Getting sub-buffer len failed.");
642 /* write the subbuffer to the tracefile */
643 ret
= lttng_consumer_on_read_subbuffer_mmap(ctx
, stream
, len
);
646 * display the error but continue processing to try
647 * to release the subbuffer
649 ERR("Error writing to tracefile");
653 ERR("Unknown output method");
657 err
= kernctl_put_next_subbuf(infd
);
660 if (errno
== EFAULT
) {
661 perror("Error in unreserving sub buffer\n");
662 } else if (errno
== EIO
) {
663 /* Should never happen with newer LTTng versions */
664 perror("Reader has been pushed by the writer, last sub-buffer corrupted.");
673 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
677 /* Opening the tracefile in write mode */
678 if (strlen(stream
->path_name
) > 0 && stream
->net_seq_idx
== -1) {
679 ret
= run_as_open(stream
->path_name
,
680 O_WRONLY
|O_CREAT
|O_TRUNC
,
681 S_IRWXU
|S_IRWXG
|S_IRWXO
,
682 stream
->uid
, stream
->gid
);
684 ERR("Opening %s", stream
->path_name
);
688 stream
->out_fd
= ret
;
691 if (stream
->output
== LTTNG_EVENT_MMAP
) {
692 /* get the len of the mmap region */
693 unsigned long mmap_len
;
695 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
698 perror("kernctl_get_mmap_len");
701 stream
->mmap_len
= (size_t) mmap_len
;
703 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
,
704 PROT_READ
, MAP_PRIVATE
, stream
->wait_fd
, 0);
705 if (stream
->mmap_base
== MAP_FAILED
) {
706 perror("Error mmaping");
712 /* we return 0 to let the library handle the FD internally */
719 err
= close(stream
->out_fd
);