2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 #include <sys/socket.h>
27 #include <sys/types.h>
31 #include <common/common.h>
32 #include <common/kernel-ctl/kernel-ctl.h>
33 #include <common/sessiond-comm/sessiond-comm.h>
34 #include <common/sessiond-comm/relayd.h>
35 #include <common/compat/fcntl.h>
36 #include <common/relayd/relayd.h>
38 #include "kernel-consumer.h"
40 extern struct lttng_consumer_global_data consumer_data
;
41 extern int consumer_poll_timeout
;
42 extern volatile int consumer_quit
;
45 * Mmap the ring buffer, read it and write the data to the tracefile.
47 * Returns the number of bytes written
49 ssize_t
lttng_kconsumer_on_read_subbuffer_mmap(
50 struct lttng_consumer_local_data
*ctx
,
51 struct lttng_consumer_stream
*stream
, unsigned long len
)
53 unsigned long mmap_offset
;
54 ssize_t ret
= 0, written
= 0;
55 off_t orig_offset
= stream
->out_fd_offset
;
56 int fd
= stream
->wait_fd
;
57 /* Default is on the disk */
58 int outfd
= stream
->out_fd
;
60 struct consumer_relayd_sock_pair
*relayd
= NULL
;
62 /* RCU lock for the relayd pointer */
65 /* Flag that the current stream if set for network streaming. */
66 if (stream
->net_seq_idx
!= -1) {
67 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
73 /* get the offset inside the fd to mmap */
74 ret
= kernctl_get_mmap_read_offset(fd
, &mmap_offset
);
77 perror("kernctl_get_mmap_read_offset");
82 /* Handle stream on the relayd if the output is on the network */
84 unsigned long netlen
= len
;
87 * Lock the control socket for the complete duration of the function
88 * since from this point on we will use the socket.
90 if (stream
->metadata_flag
) {
91 /* Metadata requires the control socket. */
92 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
93 netlen
+= sizeof(stream
->relayd_stream_id
);
96 ret
= consumer_handle_stream_before_relayd(stream
, netlen
);
98 /* Use the returned socket. */
101 /* Write metadata stream id before payload */
102 if (stream
->metadata_flag
) {
103 metadata_id
= htobe64(stream
->relayd_stream_id
);
105 ret
= write(outfd
, (void *) &metadata_id
,
106 sizeof(stream
->relayd_stream_id
));
107 } while (ret
< 0 && errno
== EINTR
);
109 PERROR("write metadata stream id");
113 DBG("Metadata stream id %zu written before data",
114 stream
->relayd_stream_id
);
116 * We do this so the return value can match the len passed as
117 * argument to this function.
119 written
-= sizeof(stream
->relayd_stream_id
);
122 /* Else, use the default set before which is the filesystem. */
127 ret
= write(outfd
, stream
->mmap_base
+ mmap_offset
, len
);
128 } while (ret
< 0 && errno
== EINTR
);
130 perror("Error in file write");
135 } else if (ret
> len
) {
136 perror("Error in file write");
144 /* This call is useless on a socket so better save a syscall. */
146 /* This won't block, but will start writeout asynchronously */
147 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret
,
148 SYNC_FILE_RANGE_WRITE
);
149 stream
->out_fd_offset
+= ret
;
153 lttng_consumer_sync_trace_file(stream
, orig_offset
);
156 /* Unlock only if ctrl socket used */
157 if (relayd
&& stream
->metadata_flag
) {
158 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
167 * Splice the data from the ring buffer to the tracefile.
169 * Returns the number of bytes spliced.
171 ssize_t
lttng_kconsumer_on_read_subbuffer_splice(
172 struct lttng_consumer_local_data
*ctx
,
173 struct lttng_consumer_stream
*stream
, unsigned long len
)
175 ssize_t ret
= 0, written
= 0, ret_splice
= 0;
177 off_t orig_offset
= stream
->out_fd_offset
;
178 int fd
= stream
->wait_fd
;
179 /* Default is on the disk */
180 int outfd
= stream
->out_fd
;
181 uint64_t metadata_id
;
182 struct consumer_relayd_sock_pair
*relayd
= NULL
;
184 /* RCU lock for the relayd pointer */
187 /* Flag that the current stream if set for network streaming. */
188 if (stream
->net_seq_idx
!= -1) {
189 relayd
= consumer_find_relayd(stream
->net_seq_idx
);
190 if (relayd
== NULL
) {
195 /* Write metadata stream id before payload */
196 if (stream
->metadata_flag
&& relayd
) {
198 * Lock the control socket for the complete duration of the function
199 * since from this point on we will use the socket.
201 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
203 metadata_id
= htobe64(stream
->relayd_stream_id
);
205 ret
= write(ctx
->consumer_thread_pipe
[1],
206 (void *) &metadata_id
,
207 sizeof(stream
->relayd_stream_id
));
208 } while (ret
< 0 && errno
== EINTR
);
210 PERROR("write metadata stream id");
214 DBG("Metadata stream id %zu written before data",
215 stream
->relayd_stream_id
);
219 DBG("splice chan to pipe offset %lu of len %lu (fd : %d)",
220 (unsigned long)offset
, len
, fd
);
221 ret_splice
= splice(fd
, &offset
, ctx
->consumer_thread_pipe
[1], NULL
, len
,
222 SPLICE_F_MOVE
| SPLICE_F_MORE
);
223 DBG("splice chan to pipe, ret %zd", ret_splice
);
224 if (ret_splice
< 0) {
225 perror("Error in relay splice");
227 written
= ret_splice
;
233 /* Handle stream on the relayd if the output is on the network */
235 if (stream
->metadata_flag
) {
236 /* Update counter to fit the spliced data */
237 ret_splice
+= sizeof(stream
->relayd_stream_id
);
238 len
+= sizeof(stream
->relayd_stream_id
);
240 * We do this so the return value can match the len passed as
241 * argument to this function.
243 written
-= sizeof(stream
->relayd_stream_id
);
246 ret
= consumer_handle_stream_before_relayd(stream
, ret_splice
);
248 /* Use the returned socket. */
252 ERR("Remote relayd disconnected. Stopping");
258 DBG3("Kernel consumer splice data in %d to out %d",
259 ctx
->consumer_thread_pipe
[0], outfd
);
260 ret_splice
= splice(ctx
->consumer_thread_pipe
[0], NULL
, outfd
, NULL
,
261 ret_splice
, SPLICE_F_MOVE
| SPLICE_F_MORE
);
262 DBG("splice pipe to file, ret %zd", ret_splice
);
263 if (ret_splice
< 0) {
264 perror("Error in file splice");
266 written
= ret_splice
;
271 if (ret_splice
> len
) {
273 PERROR("Wrote more data than requested %zd (len: %lu)",
275 written
+= ret_splice
;
281 /* This call is useless on a socket so better save a syscall. */
283 /* This won't block, but will start writeout asynchronously */
284 lttng_sync_file_range(outfd
, stream
->out_fd_offset
, ret_splice
,
285 SYNC_FILE_RANGE_WRITE
);
286 stream
->out_fd_offset
+= ret_splice
;
288 written
+= ret_splice
;
290 lttng_consumer_sync_trace_file(stream
, orig_offset
);
297 /* send the appropriate error description to sessiond */
300 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EBADF
);
303 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_EINVAL
);
306 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ENOMEM
);
309 lttng_consumer_send_error(ctx
, CONSUMERD_SPLICE_ESPIPE
);
314 if (relayd
&& stream
->metadata_flag
) {
315 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
324 * Take a snapshot for a specific fd
326 * Returns 0 on success, < 0 on error
328 int lttng_kconsumer_take_snapshot(struct lttng_consumer_local_data
*ctx
,
329 struct lttng_consumer_stream
*stream
)
332 int infd
= stream
->wait_fd
;
334 ret
= kernctl_snapshot(infd
);
337 perror("Getting sub-buffer snapshot.");
344 * Get the produced position
346 * Returns 0 on success, < 0 on error
348 int lttng_kconsumer_get_produced_snapshot(
349 struct lttng_consumer_local_data
*ctx
,
350 struct lttng_consumer_stream
*stream
,
354 int infd
= stream
->wait_fd
;
356 ret
= kernctl_snapshot_get_produced(infd
, pos
);
359 perror("kernctl_snapshot_get_produced");
365 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data
*ctx
,
366 int sock
, struct pollfd
*consumer_sockpoll
)
369 struct lttcomm_consumer_msg msg
;
371 ret
= lttcomm_recv_unix_sock(sock
, &msg
, sizeof(msg
));
372 if (ret
!= sizeof(msg
)) {
373 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_CMD
);
376 if (msg
.cmd_type
== LTTNG_CONSUMER_STOP
) {
380 /* relayd needs RCU read-side protection */
383 switch (msg
.cmd_type
) {
384 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET
:
387 struct consumer_relayd_sock_pair
*relayd
;
389 DBG("Consumer adding relayd socket");
391 /* Get relayd reference if exists. */
392 relayd
= consumer_find_relayd(msg
.u
.relayd_sock
.net_index
);
393 if (relayd
== NULL
) {
394 /* Not found. Allocate one. */
395 relayd
= consumer_allocate_relayd_sock_pair(
396 msg
.u
.relayd_sock
.net_index
);
397 if (relayd
== NULL
) {
398 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
403 /* Poll on consumer socket. */
404 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
408 /* Get relayd socket from session daemon */
409 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
410 if (ret
!= sizeof(fd
)) {
411 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
415 /* Copy socket information and received FD */
416 switch (msg
.u
.relayd_sock
.type
) {
417 case LTTNG_STREAM_CONTROL
:
418 /* Copy received lttcomm socket */
419 lttcomm_copy_sock(&relayd
->control_sock
, &msg
.u
.relayd_sock
.sock
);
421 ret
= lttcomm_create_sock(&relayd
->control_sock
);
426 /* Close the created socket fd which is useless */
427 close(relayd
->control_sock
.fd
);
429 /* Assign new file descriptor */
430 relayd
->control_sock
.fd
= fd
;
432 case LTTNG_STREAM_DATA
:
433 /* Copy received lttcomm socket */
434 lttcomm_copy_sock(&relayd
->data_sock
, &msg
.u
.relayd_sock
.sock
);
435 ret
= lttcomm_create_sock(&relayd
->data_sock
);
440 /* Close the created socket fd which is useless */
441 close(relayd
->data_sock
.fd
);
443 /* Assign new file descriptor */
444 relayd
->data_sock
.fd
= fd
;
447 ERR("Unknown relayd socket type");
451 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
452 msg
.u
.relayd_sock
.type
== LTTNG_STREAM_CONTROL
? "control" : "data",
453 relayd
->net_seq_idx
, fd
);
456 * Add relayd socket pair to consumer data hashtable. If object already
457 * exists or on error, the function gracefully returns.
459 consumer_add_relayd(relayd
);
463 case LTTNG_CONSUMER_ADD_CHANNEL
:
465 struct lttng_consumer_channel
*new_channel
;
467 DBG("consumer_add_channel %d", msg
.u
.channel
.channel_key
);
468 new_channel
= consumer_allocate_channel(msg
.u
.channel
.channel_key
,
470 msg
.u
.channel
.mmap_len
,
471 msg
.u
.channel
.max_sb_size
);
472 if (new_channel
== NULL
) {
473 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
476 if (ctx
->on_recv_channel
!= NULL
) {
477 ret
= ctx
->on_recv_channel(new_channel
);
479 consumer_add_channel(new_channel
);
480 } else if (ret
< 0) {
484 consumer_add_channel(new_channel
);
488 case LTTNG_CONSUMER_ADD_STREAM
:
491 struct consumer_relayd_sock_pair
*relayd
= NULL
;
492 struct lttng_consumer_stream
*new_stream
;
495 if (lttng_consumer_poll_socket(consumer_sockpoll
) < 0) {
499 /* Get stream file descriptor from socket */
500 ret
= lttcomm_recv_fds_unix_sock(sock
, &fd
, 1);
501 if (ret
!= sizeof(fd
)) {
502 lttng_consumer_send_error(ctx
, CONSUMERD_ERROR_RECV_FD
);
506 new_stream
= consumer_allocate_stream(msg
.u
.stream
.channel_key
,
507 msg
.u
.stream
.stream_key
,
510 msg
.u
.stream
.mmap_len
,
512 msg
.u
.stream
.path_name
,
515 msg
.u
.stream
.net_index
,
516 msg
.u
.stream
.metadata_flag
);
517 if (new_stream
== NULL
) {
518 lttng_consumer_send_error(ctx
, CONSUMERD_OUTFD_ERROR
);
522 /* The stream is not metadata. Get relayd reference if exists. */
523 relayd
= consumer_find_relayd(msg
.u
.stream
.net_index
);
524 if (relayd
!= NULL
) {
525 /* Add stream on the relayd */
526 pthread_mutex_lock(&relayd
->ctrl_sock_mutex
);
527 ret
= relayd_add_stream(&relayd
->control_sock
,
528 msg
.u
.stream
.name
, msg
.u
.stream
.path_name
,
529 &new_stream
->relayd_stream_id
);
530 pthread_mutex_unlock(&relayd
->ctrl_sock_mutex
);
534 } else if (msg
.u
.stream
.net_index
!= -1) {
535 ERR("Network sequence index %d unknown. Not adding stream.",
536 msg
.u
.stream
.net_index
);
541 if (ctx
->on_recv_stream
!= NULL
) {
542 ret
= ctx
->on_recv_stream(new_stream
);
544 consumer_add_stream(new_stream
);
545 } else if (ret
< 0) {
549 consumer_add_stream(new_stream
);
552 DBG("Kernel consumer_add_stream (%d)", fd
);
555 case LTTNG_CONSUMER_UPDATE_STREAM
:
557 if (ctx
->on_update_stream
!= NULL
) {
558 ret
= ctx
->on_update_stream(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
560 consumer_change_stream_state(msg
.u
.stream
.stream_key
, msg
.u
.stream
.state
);
561 } else if (ret
< 0) {
565 consumer_change_stream_state(msg
.u
.stream
.stream_key
,
575 * Wake-up the other end by writing a null byte in the pipe
576 * (non-blocking). Important note: Because writing into the
577 * pipe is non-blocking (and therefore we allow dropping wakeup
578 * data, as long as there is wakeup data present in the pipe
579 * buffer to wake up the other end), the other end should
580 * perform the following sequence for waiting:
581 * 1) empty the pipe (reads).
582 * 2) perform update operation.
583 * 3) wait on the pipe (poll).
586 ret
= write(ctx
->consumer_poll_pipe
[1], "", 1);
587 } while (ret
< 0 && errno
== EINTR
);
594 * Consume data on a file descriptor and write it on a trace file.
596 ssize_t
lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream
*stream
,
597 struct lttng_consumer_local_data
*ctx
)
602 int infd
= stream
->wait_fd
;
604 DBG("In read_subbuffer (infd : %d)", infd
);
605 /* Get the next subbuffer */
606 err
= kernctl_get_next_subbuf(infd
);
609 * This is a debug message even for single-threaded consumer,
610 * because poll() have more relaxed criterions than get subbuf,
611 * so get_subbuf may fail for short race windows where poll()
612 * would issue wakeups.
614 DBG("Reserving sub buffer failed (everything is normal, "
615 "it is due to concurrency)");
619 switch (stream
->output
) {
620 case LTTNG_EVENT_SPLICE
:
621 /* read the whole subbuffer */
622 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
625 perror("Getting sub-buffer len failed.");
629 /* splice the subbuffer to the tracefile */
630 ret
= lttng_consumer_on_read_subbuffer_splice(ctx
, stream
, len
);
633 * display the error but continue processing to try
634 * to release the subbuffer
636 ERR("Error splicing to tracefile (ret: %ld != len: %ld)",
641 case LTTNG_EVENT_MMAP
:
642 /* read the used subbuffer size */
643 err
= kernctl_get_padded_subbuf_size(infd
, &len
);
646 perror("Getting sub-buffer len failed.");
649 /* write the subbuffer to the tracefile */
650 ret
= lttng_consumer_on_read_subbuffer_mmap(ctx
, stream
, len
);
653 * display the error but continue processing to try
654 * to release the subbuffer
656 ERR("Error writing to tracefile");
660 ERR("Unknown output method");
664 err
= kernctl_put_next_subbuf(infd
);
667 if (errno
== EFAULT
) {
668 perror("Error in unreserving sub buffer\n");
669 } else if (errno
== EIO
) {
670 /* Should never happen with newer LTTng versions */
671 perror("Reader has been pushed by the writer, last sub-buffer corrupted.");
680 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream
*stream
)
684 /* Opening the tracefile in write mode */
685 if (strlen(stream
->path_name
) > 0 && stream
->net_seq_idx
== -1) {
686 ret
= run_as_open(stream
->path_name
,
687 O_WRONLY
|O_CREAT
|O_TRUNC
,
688 S_IRWXU
|S_IRWXG
|S_IRWXO
,
689 stream
->uid
, stream
->gid
);
691 ERR("Opening %s", stream
->path_name
);
695 stream
->out_fd
= ret
;
698 if (stream
->output
== LTTNG_EVENT_MMAP
) {
699 /* get the len of the mmap region */
700 unsigned long mmap_len
;
702 ret
= kernctl_get_mmap_len(stream
->wait_fd
, &mmap_len
);
705 perror("kernctl_get_mmap_len");
708 stream
->mmap_len
= (size_t) mmap_len
;
710 stream
->mmap_base
= mmap(NULL
, stream
->mmap_len
,
711 PROT_READ
, MAP_PRIVATE
, stream
->wait_fd
, 0);
712 if (stream
->mmap_base
== MAP_FAILED
) {
713 perror("Error mmaping");
719 /* we return 0 to let the library handle the FD internally */
726 err
= close(stream
->out_fd
);