consumerd: cleanup: use buffer view interface for mmap read subbuf
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #include "common/buffer-view.h"
11 #include <stdint.h>
12 #define _LGPL_SOURCE
13 #include <assert.h>
14 #include <poll.h>
15 #include <pthread.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/mman.h>
19 #include <sys/socket.h>
20 #include <sys/types.h>
21 #include <inttypes.h>
22 #include <unistd.h>
23 #include <sys/stat.h>
24
25 #include <bin/lttng-consumerd/health-consumerd.h>
26 #include <common/common.h>
27 #include <common/kernel-ctl/kernel-ctl.h>
28 #include <common/sessiond-comm/sessiond-comm.h>
29 #include <common/sessiond-comm/relayd.h>
30 #include <common/compat/fcntl.h>
31 #include <common/compat/endian.h>
32 #include <common/pipe.h>
33 #include <common/relayd/relayd.h>
34 #include <common/utils.h>
35 #include <common/consumer/consumer-stream.h>
36 #include <common/index/index.h>
37 #include <common/consumer/consumer-timer.h>
38 #include <common/optional.h>
39
40 #include "kernel-consumer.h"
41
42 extern struct lttng_consumer_global_data consumer_data;
43 extern int consumer_poll_timeout;
44
45 /*
46 * Take a snapshot for a specific fd
47 *
48 * Returns 0 on success, < 0 on error
49 */
50 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
51 {
52 int ret = 0;
53 int infd = stream->wait_fd;
54
55 ret = kernctl_snapshot(infd);
56 /*
57 * -EAGAIN is not an error, it just means that there is no data to
58 * be read.
59 */
60 if (ret != 0 && ret != -EAGAIN) {
61 PERROR("Getting sub-buffer snapshot.");
62 }
63
64 return ret;
65 }
66
67 /*
68 * Sample consumed and produced positions for a specific fd.
69 *
70 * Returns 0 on success, < 0 on error.
71 */
72 int lttng_kconsumer_sample_snapshot_positions(
73 struct lttng_consumer_stream *stream)
74 {
75 assert(stream);
76
77 return kernctl_snapshot_sample_positions(stream->wait_fd);
78 }
79
80 /*
81 * Get the produced position
82 *
83 * Returns 0 on success, < 0 on error
84 */
85 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
86 unsigned long *pos)
87 {
88 int ret;
89 int infd = stream->wait_fd;
90
91 ret = kernctl_snapshot_get_produced(infd, pos);
92 if (ret != 0) {
93 PERROR("kernctl_snapshot_get_produced");
94 }
95
96 return ret;
97 }
98
99 /*
100 * Get the consumerd position
101 *
102 * Returns 0 on success, < 0 on error
103 */
104 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
105 unsigned long *pos)
106 {
107 int ret;
108 int infd = stream->wait_fd;
109
110 ret = kernctl_snapshot_get_consumed(infd, pos);
111 if (ret != 0) {
112 PERROR("kernctl_snapshot_get_consumed");
113 }
114
115 return ret;
116 }
117
118 static
119 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
120 const char **addr)
121 {
122 int ret;
123 unsigned long mmap_offset;
124 const char *mmap_base = stream->mmap_base;
125
126 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
127 if (ret < 0) {
128 PERROR("Failed to get mmap read offset");
129 goto error;
130 }
131
132 *addr = mmap_base + mmap_offset;
133 error:
134 return ret;
135 }
136
137 /*
138 * Take a snapshot of all the stream of a channel
139 * RCU read-side lock must be held across this function to ensure existence of
140 * channel. The channel lock must be held by the caller.
141 *
142 * Returns 0 on success, < 0 on error
143 */
144 static int lttng_kconsumer_snapshot_channel(
145 struct lttng_consumer_channel *channel,
146 uint64_t key, char *path, uint64_t relayd_id,
147 uint64_t nb_packets_per_stream,
148 struct lttng_consumer_local_data *ctx)
149 {
150 int ret;
151 struct lttng_consumer_stream *stream;
152
153 DBG("Kernel consumer snapshot channel %" PRIu64, key);
154
155 rcu_read_lock();
156
157 /* Splice is not supported yet for channel snapshot. */
158 if (channel->output != CONSUMER_CHANNEL_MMAP) {
159 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
160 channel->name);
161 ret = -1;
162 goto end;
163 }
164
165 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
166 unsigned long consumed_pos, produced_pos;
167
168 health_code_update();
169
170 /*
171 * Lock stream because we are about to change its state.
172 */
173 pthread_mutex_lock(&stream->lock);
174
175 assert(channel->trace_chunk);
176 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
177 /*
178 * Can't happen barring an internal error as the channel
179 * holds a reference to the trace chunk.
180 */
181 ERR("Failed to acquire reference to channel's trace chunk");
182 ret = -1;
183 goto end_unlock;
184 }
185 assert(!stream->trace_chunk);
186 stream->trace_chunk = channel->trace_chunk;
187
188 /*
189 * Assign the received relayd ID so we can use it for streaming. The streams
190 * are not visible to anyone so this is OK to change it.
191 */
192 stream->net_seq_idx = relayd_id;
193 channel->relayd_id = relayd_id;
194 if (relayd_id != (uint64_t) -1ULL) {
195 ret = consumer_send_relayd_stream(stream, path);
196 if (ret < 0) {
197 ERR("sending stream to relayd");
198 goto end_unlock;
199 }
200 } else {
201 ret = consumer_stream_create_output_files(stream,
202 false);
203 if (ret < 0) {
204 goto end_unlock;
205 }
206 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
207 stream->key);
208 }
209
210 ret = kernctl_buffer_flush_empty(stream->wait_fd);
211 if (ret < 0) {
212 /*
213 * Doing a buffer flush which does not take into
214 * account empty packets. This is not perfect
215 * for stream intersection, but required as a
216 * fall-back when "flush_empty" is not
217 * implemented by lttng-modules.
218 */
219 ret = kernctl_buffer_flush(stream->wait_fd);
220 if (ret < 0) {
221 ERR("Failed to flush kernel stream");
222 goto end_unlock;
223 }
224 goto end_unlock;
225 }
226
227 ret = lttng_kconsumer_take_snapshot(stream);
228 if (ret < 0) {
229 ERR("Taking kernel snapshot");
230 goto end_unlock;
231 }
232
233 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
234 if (ret < 0) {
235 ERR("Produced kernel snapshot position");
236 goto end_unlock;
237 }
238
239 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
240 if (ret < 0) {
241 ERR("Consumerd kernel snapshot position");
242 goto end_unlock;
243 }
244
245 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
246 produced_pos, nb_packets_per_stream,
247 stream->max_sb_size);
248
249 while ((long) (consumed_pos - produced_pos) < 0) {
250 ssize_t read_len;
251 unsigned long len, padded_len;
252 const char *subbuf_addr;
253 struct lttng_buffer_view subbuf_view;
254
255 health_code_update();
256 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
257
258 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
259 if (ret < 0) {
260 if (ret != -EAGAIN) {
261 PERROR("kernctl_get_subbuf snapshot");
262 goto end_unlock;
263 }
264 DBG("Kernel consumer get subbuf failed. Skipping it.");
265 consumed_pos += stream->max_sb_size;
266 stream->chan->lost_packets++;
267 continue;
268 }
269
270 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
271 if (ret < 0) {
272 ERR("Snapshot kernctl_get_subbuf_size");
273 goto error_put_subbuf;
274 }
275
276 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
277 if (ret < 0) {
278 ERR("Snapshot kernctl_get_padded_subbuf_size");
279 goto error_put_subbuf;
280 }
281
282 ret = get_current_subbuf_addr(stream, &subbuf_addr);
283 if (ret) {
284 goto error_put_subbuf;
285 }
286
287 subbuf_view = lttng_buffer_view_init(
288 subbuf_addr, 0, padded_len);
289 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
290 stream, &subbuf_view,
291 padded_len - len, NULL);
292 /*
293 * We write the padded len in local tracefiles but the data len
294 * when using a relay. Display the error but continue processing
295 * to try to release the subbuffer.
296 */
297 if (relayd_id != (uint64_t) -1ULL) {
298 if (read_len != len) {
299 ERR("Error sending to the relay (ret: %zd != len: %lu)",
300 read_len, len);
301 }
302 } else {
303 if (read_len != padded_len) {
304 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
305 read_len, padded_len);
306 }
307 }
308
309 ret = kernctl_put_subbuf(stream->wait_fd);
310 if (ret < 0) {
311 ERR("Snapshot kernctl_put_subbuf");
312 goto end_unlock;
313 }
314 consumed_pos += stream->max_sb_size;
315 }
316
317 if (relayd_id == (uint64_t) -1ULL) {
318 if (stream->out_fd >= 0) {
319 ret = close(stream->out_fd);
320 if (ret < 0) {
321 PERROR("Kernel consumer snapshot close out_fd");
322 goto end_unlock;
323 }
324 stream->out_fd = -1;
325 }
326 } else {
327 close_relayd_stream(stream);
328 stream->net_seq_idx = (uint64_t) -1ULL;
329 }
330 lttng_trace_chunk_put(stream->trace_chunk);
331 stream->trace_chunk = NULL;
332 pthread_mutex_unlock(&stream->lock);
333 }
334
335 /* All good! */
336 ret = 0;
337 goto end;
338
339 error_put_subbuf:
340 ret = kernctl_put_subbuf(stream->wait_fd);
341 if (ret < 0) {
342 ERR("Snapshot kernctl_put_subbuf error path");
343 }
344 end_unlock:
345 pthread_mutex_unlock(&stream->lock);
346 end:
347 rcu_read_unlock();
348 return ret;
349 }
350
351 /*
352 * Read the whole metadata available for a snapshot.
353 * RCU read-side lock must be held across this function to ensure existence of
354 * metadata_channel. The channel lock must be held by the caller.
355 *
356 * Returns 0 on success, < 0 on error
357 */
358 static int lttng_kconsumer_snapshot_metadata(
359 struct lttng_consumer_channel *metadata_channel,
360 uint64_t key, char *path, uint64_t relayd_id,
361 struct lttng_consumer_local_data *ctx)
362 {
363 int ret, use_relayd = 0;
364 ssize_t ret_read;
365 struct lttng_consumer_stream *metadata_stream;
366
367 assert(ctx);
368
369 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
370 key, path);
371
372 rcu_read_lock();
373
374 metadata_stream = metadata_channel->metadata_stream;
375 assert(metadata_stream);
376
377 pthread_mutex_lock(&metadata_stream->lock);
378 assert(metadata_channel->trace_chunk);
379 assert(metadata_stream->trace_chunk);
380
381 /* Flag once that we have a valid relayd for the stream. */
382 if (relayd_id != (uint64_t) -1ULL) {
383 use_relayd = 1;
384 }
385
386 if (use_relayd) {
387 ret = consumer_send_relayd_stream(metadata_stream, path);
388 if (ret < 0) {
389 goto error_snapshot;
390 }
391 } else {
392 ret = consumer_stream_create_output_files(metadata_stream,
393 false);
394 if (ret < 0) {
395 goto error_snapshot;
396 }
397 }
398
399 do {
400 health_code_update();
401
402 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
403 if (ret_read < 0) {
404 if (ret_read != -EAGAIN) {
405 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
406 ret_read);
407 ret = ret_read;
408 goto error_snapshot;
409 }
410 /* ret_read is negative at this point so we will exit the loop. */
411 continue;
412 }
413 } while (ret_read >= 0);
414
415 if (use_relayd) {
416 close_relayd_stream(metadata_stream);
417 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
418 } else {
419 if (metadata_stream->out_fd >= 0) {
420 ret = close(metadata_stream->out_fd);
421 if (ret < 0) {
422 PERROR("Kernel consumer snapshot metadata close out_fd");
423 /*
424 * Don't go on error here since the snapshot was successful at this
425 * point but somehow the close failed.
426 */
427 }
428 metadata_stream->out_fd = -1;
429 lttng_trace_chunk_put(metadata_stream->trace_chunk);
430 metadata_stream->trace_chunk = NULL;
431 }
432 }
433
434 ret = 0;
435 error_snapshot:
436 pthread_mutex_unlock(&metadata_stream->lock);
437 cds_list_del(&metadata_stream->send_node);
438 consumer_stream_destroy(metadata_stream, NULL);
439 metadata_channel->metadata_stream = NULL;
440 rcu_read_unlock();
441 return ret;
442 }
443
444 /*
445 * Receive command from session daemon and process it.
446 *
447 * Return 1 on success else a negative value or 0.
448 */
449 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
450 int sock, struct pollfd *consumer_sockpoll)
451 {
452 ssize_t ret;
453 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
454 struct lttcomm_consumer_msg msg;
455
456 health_code_update();
457
458 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
459 if (ret != sizeof(msg)) {
460 if (ret > 0) {
461 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
462 ret = -1;
463 }
464 return ret;
465 }
466
467 health_code_update();
468
469 /* Deprecated command */
470 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
471
472 health_code_update();
473
474 /* relayd needs RCU read-side protection */
475 rcu_read_lock();
476
477 switch (msg.cmd_type) {
478 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
479 {
480 /* Session daemon status message are handled in the following call. */
481 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
482 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
483 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
484 msg.u.relayd_sock.relayd_session_id);
485 goto end_nosignal;
486 }
487 case LTTNG_CONSUMER_ADD_CHANNEL:
488 {
489 struct lttng_consumer_channel *new_channel;
490 int ret_recv;
491 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
492
493 health_code_update();
494
495 /* First send a status message before receiving the fds. */
496 ret = consumer_send_status_msg(sock, ret_code);
497 if (ret < 0) {
498 /* Somehow, the session daemon is not responding anymore. */
499 goto error_fatal;
500 }
501
502 health_code_update();
503
504 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
505 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
506 msg.u.channel.session_id,
507 msg.u.channel.chunk_id.is_set ?
508 &chunk_id : NULL,
509 msg.u.channel.pathname,
510 msg.u.channel.name,
511 msg.u.channel.relayd_id, msg.u.channel.output,
512 msg.u.channel.tracefile_size,
513 msg.u.channel.tracefile_count, 0,
514 msg.u.channel.monitor,
515 msg.u.channel.live_timer_interval,
516 NULL, NULL);
517 if (new_channel == NULL) {
518 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
519 goto end_nosignal;
520 }
521 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
522 switch (msg.u.channel.output) {
523 case LTTNG_EVENT_SPLICE:
524 new_channel->output = CONSUMER_CHANNEL_SPLICE;
525 break;
526 case LTTNG_EVENT_MMAP:
527 new_channel->output = CONSUMER_CHANNEL_MMAP;
528 break;
529 default:
530 ERR("Channel output unknown %d", msg.u.channel.output);
531 goto end_nosignal;
532 }
533
534 /* Translate and save channel type. */
535 switch (msg.u.channel.type) {
536 case CONSUMER_CHANNEL_TYPE_DATA:
537 case CONSUMER_CHANNEL_TYPE_METADATA:
538 new_channel->type = msg.u.channel.type;
539 break;
540 default:
541 assert(0);
542 goto end_nosignal;
543 };
544
545 health_code_update();
546
547 if (ctx->on_recv_channel != NULL) {
548 ret_recv = ctx->on_recv_channel(new_channel);
549 if (ret_recv == 0) {
550 ret = consumer_add_channel(new_channel, ctx);
551 } else if (ret_recv < 0) {
552 goto end_nosignal;
553 }
554 } else {
555 ret = consumer_add_channel(new_channel, ctx);
556 }
557 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
558 int monitor_start_ret;
559
560 DBG("Consumer starting monitor timer");
561 consumer_timer_live_start(new_channel,
562 msg.u.channel.live_timer_interval);
563 monitor_start_ret = consumer_timer_monitor_start(
564 new_channel,
565 msg.u.channel.monitor_timer_interval);
566 if (monitor_start_ret < 0) {
567 ERR("Starting channel monitoring timer failed");
568 goto end_nosignal;
569 }
570
571 }
572
573 health_code_update();
574
575 /* If we received an error in add_channel, we need to report it. */
576 if (ret < 0) {
577 ret = consumer_send_status_msg(sock, ret);
578 if (ret < 0) {
579 goto error_fatal;
580 }
581 goto end_nosignal;
582 }
583
584 goto end_nosignal;
585 }
586 case LTTNG_CONSUMER_ADD_STREAM:
587 {
588 int fd;
589 struct lttng_pipe *stream_pipe;
590 struct lttng_consumer_stream *new_stream;
591 struct lttng_consumer_channel *channel;
592 int alloc_ret = 0;
593
594 /*
595 * Get stream's channel reference. Needed when adding the stream to the
596 * global hash table.
597 */
598 channel = consumer_find_channel(msg.u.stream.channel_key);
599 if (!channel) {
600 /*
601 * We could not find the channel. Can happen if cpu hotplug
602 * happens while tearing down.
603 */
604 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
605 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
606 }
607
608 health_code_update();
609
610 /* First send a status message before receiving the fds. */
611 ret = consumer_send_status_msg(sock, ret_code);
612 if (ret < 0) {
613 /* Somehow, the session daemon is not responding anymore. */
614 goto error_add_stream_fatal;
615 }
616
617 health_code_update();
618
619 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
620 /* Channel was not found. */
621 goto error_add_stream_nosignal;
622 }
623
624 /* Blocking call */
625 health_poll_entry();
626 ret = lttng_consumer_poll_socket(consumer_sockpoll);
627 health_poll_exit();
628 if (ret) {
629 goto error_add_stream_fatal;
630 }
631
632 health_code_update();
633
634 /* Get stream file descriptor from socket */
635 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
636 if (ret != sizeof(fd)) {
637 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
638 goto end;
639 }
640
641 health_code_update();
642
643 /*
644 * Send status code to session daemon only if the recv works. If the
645 * above recv() failed, the session daemon is notified through the
646 * error socket and the teardown is eventually done.
647 */
648 ret = consumer_send_status_msg(sock, ret_code);
649 if (ret < 0) {
650 /* Somehow, the session daemon is not responding anymore. */
651 goto error_add_stream_nosignal;
652 }
653
654 health_code_update();
655
656 pthread_mutex_lock(&channel->lock);
657 new_stream = consumer_allocate_stream(channel->key,
658 fd,
659 channel->name,
660 channel->relayd_id,
661 channel->session_id,
662 channel->trace_chunk,
663 msg.u.stream.cpu,
664 &alloc_ret,
665 channel->type,
666 channel->monitor);
667 if (new_stream == NULL) {
668 switch (alloc_ret) {
669 case -ENOMEM:
670 case -EINVAL:
671 default:
672 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
673 break;
674 }
675 pthread_mutex_unlock(&channel->lock);
676 goto error_add_stream_nosignal;
677 }
678
679 new_stream->chan = channel;
680 new_stream->wait_fd = fd;
681 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
682 &new_stream->max_sb_size);
683 if (ret < 0) {
684 pthread_mutex_unlock(&channel->lock);
685 ERR("Failed to get kernel maximal subbuffer size");
686 goto error_add_stream_nosignal;
687 }
688
689 consumer_stream_update_channel_attributes(new_stream,
690 channel);
691 switch (channel->output) {
692 case CONSUMER_CHANNEL_SPLICE:
693 new_stream->output = LTTNG_EVENT_SPLICE;
694 ret = utils_create_pipe(new_stream->splice_pipe);
695 if (ret < 0) {
696 pthread_mutex_unlock(&channel->lock);
697 goto error_add_stream_nosignal;
698 }
699 break;
700 case CONSUMER_CHANNEL_MMAP:
701 new_stream->output = LTTNG_EVENT_MMAP;
702 break;
703 default:
704 ERR("Stream output unknown %d", channel->output);
705 pthread_mutex_unlock(&channel->lock);
706 goto error_add_stream_nosignal;
707 }
708
709 /*
710 * We've just assigned the channel to the stream so increment the
711 * refcount right now. We don't need to increment the refcount for
712 * streams in no monitor because we handle manually the cleanup of
713 * those. It is very important to make sure there is NO prior
714 * consumer_del_stream() calls or else the refcount will be unbalanced.
715 */
716 if (channel->monitor) {
717 uatomic_inc(&new_stream->chan->refcount);
718 }
719
720 /*
721 * The buffer flush is done on the session daemon side for the kernel
722 * so no need for the stream "hangup_flush_done" variable to be
723 * tracked. This is important for a kernel stream since we don't rely
724 * on the flush state of the stream to read data. It's not the case for
725 * user space tracing.
726 */
727 new_stream->hangup_flush_done = 0;
728
729 health_code_update();
730
731 pthread_mutex_lock(&new_stream->lock);
732 if (ctx->on_recv_stream) {
733 ret = ctx->on_recv_stream(new_stream);
734 if (ret < 0) {
735 pthread_mutex_unlock(&new_stream->lock);
736 pthread_mutex_unlock(&channel->lock);
737 consumer_stream_free(new_stream);
738 goto error_add_stream_nosignal;
739 }
740 }
741 health_code_update();
742
743 if (new_stream->metadata_flag) {
744 channel->metadata_stream = new_stream;
745 }
746
747 /* Do not monitor this stream. */
748 if (!channel->monitor) {
749 DBG("Kernel consumer add stream %s in no monitor mode with "
750 "relayd id %" PRIu64, new_stream->name,
751 new_stream->net_seq_idx);
752 cds_list_add(&new_stream->send_node, &channel->streams.head);
753 pthread_mutex_unlock(&new_stream->lock);
754 pthread_mutex_unlock(&channel->lock);
755 goto end_add_stream;
756 }
757
758 /* Send stream to relayd if the stream has an ID. */
759 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
760 ret = consumer_send_relayd_stream(new_stream,
761 new_stream->chan->pathname);
762 if (ret < 0) {
763 pthread_mutex_unlock(&new_stream->lock);
764 pthread_mutex_unlock(&channel->lock);
765 consumer_stream_free(new_stream);
766 goto error_add_stream_nosignal;
767 }
768
769 /*
770 * If adding an extra stream to an already
771 * existing channel (e.g. cpu hotplug), we need
772 * to send the "streams_sent" command to relayd.
773 */
774 if (channel->streams_sent_to_relayd) {
775 ret = consumer_send_relayd_streams_sent(
776 new_stream->net_seq_idx);
777 if (ret < 0) {
778 pthread_mutex_unlock(&new_stream->lock);
779 pthread_mutex_unlock(&channel->lock);
780 goto error_add_stream_nosignal;
781 }
782 }
783 }
784 pthread_mutex_unlock(&new_stream->lock);
785 pthread_mutex_unlock(&channel->lock);
786
787 /* Get the right pipe where the stream will be sent. */
788 if (new_stream->metadata_flag) {
789 consumer_add_metadata_stream(new_stream);
790 stream_pipe = ctx->consumer_metadata_pipe;
791 } else {
792 consumer_add_data_stream(new_stream);
793 stream_pipe = ctx->consumer_data_pipe;
794 }
795
796 /* Visible to other threads */
797 new_stream->globally_visible = 1;
798
799 health_code_update();
800
801 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
802 if (ret < 0) {
803 ERR("Consumer write %s stream to pipe %d",
804 new_stream->metadata_flag ? "metadata" : "data",
805 lttng_pipe_get_writefd(stream_pipe));
806 if (new_stream->metadata_flag) {
807 consumer_del_stream_for_metadata(new_stream);
808 } else {
809 consumer_del_stream_for_data(new_stream);
810 }
811 goto error_add_stream_nosignal;
812 }
813
814 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
815 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
816 end_add_stream:
817 break;
818 error_add_stream_nosignal:
819 goto end_nosignal;
820 error_add_stream_fatal:
821 goto error_fatal;
822 }
823 case LTTNG_CONSUMER_STREAMS_SENT:
824 {
825 struct lttng_consumer_channel *channel;
826
827 /*
828 * Get stream's channel reference. Needed when adding the stream to the
829 * global hash table.
830 */
831 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
832 if (!channel) {
833 /*
834 * We could not find the channel. Can happen if cpu hotplug
835 * happens while tearing down.
836 */
837 ERR("Unable to find channel key %" PRIu64,
838 msg.u.sent_streams.channel_key);
839 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
840 }
841
842 health_code_update();
843
844 /*
845 * Send status code to session daemon.
846 */
847 ret = consumer_send_status_msg(sock, ret_code);
848 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
849 /* Somehow, the session daemon is not responding anymore. */
850 goto error_streams_sent_nosignal;
851 }
852
853 health_code_update();
854
855 /*
856 * We should not send this message if we don't monitor the
857 * streams in this channel.
858 */
859 if (!channel->monitor) {
860 goto end_error_streams_sent;
861 }
862
863 health_code_update();
864 /* Send stream to relayd if the stream has an ID. */
865 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
866 ret = consumer_send_relayd_streams_sent(
867 msg.u.sent_streams.net_seq_idx);
868 if (ret < 0) {
869 goto error_streams_sent_nosignal;
870 }
871 channel->streams_sent_to_relayd = true;
872 }
873 end_error_streams_sent:
874 break;
875 error_streams_sent_nosignal:
876 goto end_nosignal;
877 }
878 case LTTNG_CONSUMER_UPDATE_STREAM:
879 {
880 rcu_read_unlock();
881 return -ENOSYS;
882 }
883 case LTTNG_CONSUMER_DESTROY_RELAYD:
884 {
885 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
886 struct consumer_relayd_sock_pair *relayd;
887
888 DBG("Kernel consumer destroying relayd %" PRIu64, index);
889
890 /* Get relayd reference if exists. */
891 relayd = consumer_find_relayd(index);
892 if (relayd == NULL) {
893 DBG("Unable to find relayd %" PRIu64, index);
894 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
895 }
896
897 /*
898 * Each relayd socket pair has a refcount of stream attached to it
899 * which tells if the relayd is still active or not depending on the
900 * refcount value.
901 *
902 * This will set the destroy flag of the relayd object and destroy it
903 * if the refcount reaches zero when called.
904 *
905 * The destroy can happen either here or when a stream fd hangs up.
906 */
907 if (relayd) {
908 consumer_flag_relayd_for_destroy(relayd);
909 }
910
911 health_code_update();
912
913 ret = consumer_send_status_msg(sock, ret_code);
914 if (ret < 0) {
915 /* Somehow, the session daemon is not responding anymore. */
916 goto error_fatal;
917 }
918
919 goto end_nosignal;
920 }
921 case LTTNG_CONSUMER_DATA_PENDING:
922 {
923 int32_t ret;
924 uint64_t id = msg.u.data_pending.session_id;
925
926 DBG("Kernel consumer data pending command for id %" PRIu64, id);
927
928 ret = consumer_data_pending(id);
929
930 health_code_update();
931
932 /* Send back returned value to session daemon */
933 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
934 if (ret < 0) {
935 PERROR("send data pending ret code");
936 goto error_fatal;
937 }
938
939 /*
940 * No need to send back a status message since the data pending
941 * returned value is the response.
942 */
943 break;
944 }
945 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
946 {
947 struct lttng_consumer_channel *channel;
948 uint64_t key = msg.u.snapshot_channel.key;
949
950 channel = consumer_find_channel(key);
951 if (!channel) {
952 ERR("Channel %" PRIu64 " not found", key);
953 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
954 } else {
955 pthread_mutex_lock(&channel->lock);
956 if (msg.u.snapshot_channel.metadata == 1) {
957 ret = lttng_kconsumer_snapshot_metadata(channel, key,
958 msg.u.snapshot_channel.pathname,
959 msg.u.snapshot_channel.relayd_id, ctx);
960 if (ret < 0) {
961 ERR("Snapshot metadata failed");
962 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
963 }
964 } else {
965 ret = lttng_kconsumer_snapshot_channel(channel, key,
966 msg.u.snapshot_channel.pathname,
967 msg.u.snapshot_channel.relayd_id,
968 msg.u.snapshot_channel.nb_packets_per_stream,
969 ctx);
970 if (ret < 0) {
971 ERR("Snapshot channel failed");
972 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
973 }
974 }
975 pthread_mutex_unlock(&channel->lock);
976 }
977 health_code_update();
978
979 ret = consumer_send_status_msg(sock, ret_code);
980 if (ret < 0) {
981 /* Somehow, the session daemon is not responding anymore. */
982 goto end_nosignal;
983 }
984 break;
985 }
986 case LTTNG_CONSUMER_DESTROY_CHANNEL:
987 {
988 uint64_t key = msg.u.destroy_channel.key;
989 struct lttng_consumer_channel *channel;
990
991 channel = consumer_find_channel(key);
992 if (!channel) {
993 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
994 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
995 }
996
997 health_code_update();
998
999 ret = consumer_send_status_msg(sock, ret_code);
1000 if (ret < 0) {
1001 /* Somehow, the session daemon is not responding anymore. */
1002 goto end_destroy_channel;
1003 }
1004
1005 health_code_update();
1006
1007 /* Stop right now if no channel was found. */
1008 if (!channel) {
1009 goto end_destroy_channel;
1010 }
1011
1012 /*
1013 * This command should ONLY be issued for channel with streams set in
1014 * no monitor mode.
1015 */
1016 assert(!channel->monitor);
1017
1018 /*
1019 * The refcount should ALWAYS be 0 in the case of a channel in no
1020 * monitor mode.
1021 */
1022 assert(!uatomic_sub_return(&channel->refcount, 1));
1023
1024 consumer_del_channel(channel);
1025 end_destroy_channel:
1026 goto end_nosignal;
1027 }
1028 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1029 {
1030 ssize_t ret;
1031 uint64_t count;
1032 struct lttng_consumer_channel *channel;
1033 uint64_t id = msg.u.discarded_events.session_id;
1034 uint64_t key = msg.u.discarded_events.channel_key;
1035
1036 DBG("Kernel consumer discarded events command for session id %"
1037 PRIu64 ", channel key %" PRIu64, id, key);
1038
1039 channel = consumer_find_channel(key);
1040 if (!channel) {
1041 ERR("Kernel consumer discarded events channel %"
1042 PRIu64 " not found", key);
1043 count = 0;
1044 } else {
1045 count = channel->discarded_events;
1046 }
1047
1048 health_code_update();
1049
1050 /* Send back returned value to session daemon */
1051 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1052 if (ret < 0) {
1053 PERROR("send discarded events");
1054 goto error_fatal;
1055 }
1056
1057 break;
1058 }
1059 case LTTNG_CONSUMER_LOST_PACKETS:
1060 {
1061 ssize_t ret;
1062 uint64_t count;
1063 struct lttng_consumer_channel *channel;
1064 uint64_t id = msg.u.lost_packets.session_id;
1065 uint64_t key = msg.u.lost_packets.channel_key;
1066
1067 DBG("Kernel consumer lost packets command for session id %"
1068 PRIu64 ", channel key %" PRIu64, id, key);
1069
1070 channel = consumer_find_channel(key);
1071 if (!channel) {
1072 ERR("Kernel consumer lost packets channel %"
1073 PRIu64 " not found", key);
1074 count = 0;
1075 } else {
1076 count = channel->lost_packets;
1077 }
1078
1079 health_code_update();
1080
1081 /* Send back returned value to session daemon */
1082 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1083 if (ret < 0) {
1084 PERROR("send lost packets");
1085 goto error_fatal;
1086 }
1087
1088 break;
1089 }
1090 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1091 {
1092 int channel_monitor_pipe;
1093
1094 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1095 /* Successfully received the command's type. */
1096 ret = consumer_send_status_msg(sock, ret_code);
1097 if (ret < 0) {
1098 goto error_fatal;
1099 }
1100
1101 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1102 1);
1103 if (ret != sizeof(channel_monitor_pipe)) {
1104 ERR("Failed to receive channel monitor pipe");
1105 goto error_fatal;
1106 }
1107
1108 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1109 ret = consumer_timer_thread_set_channel_monitor_pipe(
1110 channel_monitor_pipe);
1111 if (!ret) {
1112 int flags;
1113
1114 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1115 /* Set the pipe as non-blocking. */
1116 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1117 if (ret == -1) {
1118 PERROR("fcntl get flags of the channel monitoring pipe");
1119 goto error_fatal;
1120 }
1121 flags = ret;
1122
1123 ret = fcntl(channel_monitor_pipe, F_SETFL,
1124 flags | O_NONBLOCK);
1125 if (ret == -1) {
1126 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1127 goto error_fatal;
1128 }
1129 DBG("Channel monitor pipe set as non-blocking");
1130 } else {
1131 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1132 }
1133 ret = consumer_send_status_msg(sock, ret_code);
1134 if (ret < 0) {
1135 goto error_fatal;
1136 }
1137 break;
1138 }
1139 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1140 {
1141 struct lttng_consumer_channel *channel;
1142 uint64_t key = msg.u.rotate_channel.key;
1143
1144 DBG("Consumer rotate channel %" PRIu64, key);
1145
1146 channel = consumer_find_channel(key);
1147 if (!channel) {
1148 ERR("Channel %" PRIu64 " not found", key);
1149 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1150 } else {
1151 /*
1152 * Sample the rotate position of all the streams in this channel.
1153 */
1154 ret = lttng_consumer_rotate_channel(channel, key,
1155 msg.u.rotate_channel.relayd_id,
1156 msg.u.rotate_channel.metadata,
1157 ctx);
1158 if (ret < 0) {
1159 ERR("Rotate channel failed");
1160 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1161 }
1162
1163 health_code_update();
1164 }
1165 ret = consumer_send_status_msg(sock, ret_code);
1166 if (ret < 0) {
1167 /* Somehow, the session daemon is not responding anymore. */
1168 goto error_rotate_channel;
1169 }
1170 if (channel) {
1171 /* Rotate the streams that are ready right now. */
1172 ret = lttng_consumer_rotate_ready_streams(
1173 channel, key, ctx);
1174 if (ret < 0) {
1175 ERR("Rotate ready streams failed");
1176 }
1177 }
1178 break;
1179 error_rotate_channel:
1180 goto end_nosignal;
1181 }
1182 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1183 {
1184 struct lttng_consumer_channel *channel;
1185 uint64_t key = msg.u.clear_channel.key;
1186
1187 channel = consumer_find_channel(key);
1188 if (!channel) {
1189 DBG("Channel %" PRIu64 " not found", key);
1190 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1191 } else {
1192 ret = lttng_consumer_clear_channel(channel);
1193 if (ret) {
1194 ERR("Clear channel failed");
1195 ret_code = ret;
1196 }
1197
1198 health_code_update();
1199 }
1200 ret = consumer_send_status_msg(sock, ret_code);
1201 if (ret < 0) {
1202 /* Somehow, the session daemon is not responding anymore. */
1203 goto end_nosignal;
1204 }
1205
1206 break;
1207 }
1208 case LTTNG_CONSUMER_INIT:
1209 {
1210 ret_code = lttng_consumer_init_command(ctx,
1211 msg.u.init.sessiond_uuid);
1212 health_code_update();
1213 ret = consumer_send_status_msg(sock, ret_code);
1214 if (ret < 0) {
1215 /* Somehow, the session daemon is not responding anymore. */
1216 goto end_nosignal;
1217 }
1218 break;
1219 }
1220 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1221 {
1222 const struct lttng_credentials credentials = {
1223 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1224 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1225 };
1226 const bool is_local_trace =
1227 !msg.u.create_trace_chunk.relayd_id.is_set;
1228 const uint64_t relayd_id =
1229 msg.u.create_trace_chunk.relayd_id.value;
1230 const char *chunk_override_name =
1231 *msg.u.create_trace_chunk.override_name ?
1232 msg.u.create_trace_chunk.override_name :
1233 NULL;
1234 struct lttng_directory_handle *chunk_directory_handle = NULL;
1235
1236 /*
1237 * The session daemon will only provide a chunk directory file
1238 * descriptor for local traces.
1239 */
1240 if (is_local_trace) {
1241 int chunk_dirfd;
1242
1243 /* Acnowledge the reception of the command. */
1244 ret = consumer_send_status_msg(sock,
1245 LTTCOMM_CONSUMERD_SUCCESS);
1246 if (ret < 0) {
1247 /* Somehow, the session daemon is not responding anymore. */
1248 goto end_nosignal;
1249 }
1250
1251 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1252 if (ret != sizeof(chunk_dirfd)) {
1253 ERR("Failed to receive trace chunk directory file descriptor");
1254 goto error_fatal;
1255 }
1256
1257 DBG("Received trace chunk directory fd (%d)",
1258 chunk_dirfd);
1259 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1260 chunk_dirfd);
1261 if (!chunk_directory_handle) {
1262 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1263 if (close(chunk_dirfd)) {
1264 PERROR("Failed to close chunk directory file descriptor");
1265 }
1266 goto error_fatal;
1267 }
1268 }
1269
1270 ret_code = lttng_consumer_create_trace_chunk(
1271 !is_local_trace ? &relayd_id : NULL,
1272 msg.u.create_trace_chunk.session_id,
1273 msg.u.create_trace_chunk.chunk_id,
1274 (time_t) msg.u.create_trace_chunk
1275 .creation_timestamp,
1276 chunk_override_name,
1277 msg.u.create_trace_chunk.credentials.is_set ?
1278 &credentials :
1279 NULL,
1280 chunk_directory_handle);
1281 lttng_directory_handle_put(chunk_directory_handle);
1282 goto end_msg_sessiond;
1283 }
1284 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1285 {
1286 enum lttng_trace_chunk_command_type close_command =
1287 msg.u.close_trace_chunk.close_command.value;
1288 const uint64_t relayd_id =
1289 msg.u.close_trace_chunk.relayd_id.value;
1290 struct lttcomm_consumer_close_trace_chunk_reply reply;
1291 char path[LTTNG_PATH_MAX];
1292
1293 ret_code = lttng_consumer_close_trace_chunk(
1294 msg.u.close_trace_chunk.relayd_id.is_set ?
1295 &relayd_id :
1296 NULL,
1297 msg.u.close_trace_chunk.session_id,
1298 msg.u.close_trace_chunk.chunk_id,
1299 (time_t) msg.u.close_trace_chunk.close_timestamp,
1300 msg.u.close_trace_chunk.close_command.is_set ?
1301 &close_command :
1302 NULL, path);
1303 reply.ret_code = ret_code;
1304 reply.path_length = strlen(path) + 1;
1305 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1306 if (ret != sizeof(reply)) {
1307 goto error_fatal;
1308 }
1309 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1310 if (ret != reply.path_length) {
1311 goto error_fatal;
1312 }
1313 goto end_nosignal;
1314 }
1315 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1316 {
1317 const uint64_t relayd_id =
1318 msg.u.trace_chunk_exists.relayd_id.value;
1319
1320 ret_code = lttng_consumer_trace_chunk_exists(
1321 msg.u.trace_chunk_exists.relayd_id.is_set ?
1322 &relayd_id : NULL,
1323 msg.u.trace_chunk_exists.session_id,
1324 msg.u.trace_chunk_exists.chunk_id);
1325 goto end_msg_sessiond;
1326 }
1327 default:
1328 goto end_nosignal;
1329 }
1330
1331 end_nosignal:
1332 /*
1333 * Return 1 to indicate success since the 0 value can be a socket
1334 * shutdown during the recv() or send() call.
1335 */
1336 ret = 1;
1337 goto end;
1338 error_fatal:
1339 /* This will issue a consumer stop. */
1340 ret = -1;
1341 goto end;
1342 end_msg_sessiond:
1343 /*
1344 * The returned value here is not useful since either way we'll return 1 to
1345 * the caller because the session daemon socket management is done
1346 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1347 */
1348 ret = consumer_send_status_msg(sock, ret_code);
1349 if (ret < 0) {
1350 goto error_fatal;
1351 }
1352 ret = 1;
1353 end:
1354 health_code_update();
1355 rcu_read_unlock();
1356 return ret;
1357 }
1358
1359 /*
1360 * Populate index values of a kernel stream. Values are set in big endian order.
1361 *
1362 * Return 0 on success or else a negative value.
1363 */
1364 static int get_index_values(struct ctf_packet_index *index, int infd)
1365 {
1366 int ret;
1367 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1368 events_discarded, stream_id, stream_instance_id,
1369 packet_seq_num;
1370
1371 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1372 if (ret < 0) {
1373 PERROR("kernctl_get_timestamp_begin");
1374 goto error;
1375 }
1376
1377 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1378 if (ret < 0) {
1379 PERROR("kernctl_get_timestamp_end");
1380 goto error;
1381 }
1382
1383 ret = kernctl_get_events_discarded(infd, &events_discarded);
1384 if (ret < 0) {
1385 PERROR("kernctl_get_events_discarded");
1386 goto error;
1387 }
1388
1389 ret = kernctl_get_content_size(infd, &content_size);
1390 if (ret < 0) {
1391 PERROR("kernctl_get_content_size");
1392 goto error;
1393 }
1394
1395 ret = kernctl_get_packet_size(infd, &packet_size);
1396 if (ret < 0) {
1397 PERROR("kernctl_get_packet_size");
1398 goto error;
1399 }
1400
1401 ret = kernctl_get_stream_id(infd, &stream_id);
1402 if (ret < 0) {
1403 PERROR("kernctl_get_stream_id");
1404 goto error;
1405 }
1406
1407 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1408 if (ret < 0) {
1409 if (ret == -ENOTTY) {
1410 /* Command not implemented by lttng-modules. */
1411 stream_instance_id = -1ULL;
1412 } else {
1413 PERROR("kernctl_get_instance_id");
1414 goto error;
1415 }
1416 }
1417
1418 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1419 if (ret < 0) {
1420 if (ret == -ENOTTY) {
1421 /* Command not implemented by lttng-modules. */
1422 packet_seq_num = -1ULL;
1423 ret = 0;
1424 } else {
1425 PERROR("kernctl_get_sequence_number");
1426 goto error;
1427 }
1428 }
1429 index->packet_seq_num = htobe64(index->packet_seq_num);
1430
1431 *index = (typeof(*index)) {
1432 .offset = index->offset,
1433 .packet_size = htobe64(packet_size),
1434 .content_size = htobe64(content_size),
1435 .timestamp_begin = htobe64(timestamp_begin),
1436 .timestamp_end = htobe64(timestamp_end),
1437 .events_discarded = htobe64(events_discarded),
1438 .stream_id = htobe64(stream_id),
1439 .stream_instance_id = htobe64(stream_instance_id),
1440 .packet_seq_num = htobe64(packet_seq_num),
1441 };
1442
1443 error:
1444 return ret;
1445 }
1446 /*
1447 * Sync metadata meaning request them to the session daemon and snapshot to the
1448 * metadata thread can consumer them.
1449 *
1450 * Metadata stream lock MUST be acquired.
1451 *
1452 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1453 * is empty or a negative value on error.
1454 */
1455 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1456 {
1457 int ret;
1458
1459 assert(metadata);
1460
1461 ret = kernctl_buffer_flush(metadata->wait_fd);
1462 if (ret < 0) {
1463 ERR("Failed to flush kernel stream");
1464 goto end;
1465 }
1466
1467 ret = kernctl_snapshot(metadata->wait_fd);
1468 if (ret < 0) {
1469 if (ret != -EAGAIN) {
1470 ERR("Sync metadata, taking kernel snapshot failed.");
1471 goto end;
1472 }
1473 DBG("Sync metadata, no new kernel metadata");
1474 /* No new metadata, exit. */
1475 ret = ENODATA;
1476 goto end;
1477 }
1478
1479 end:
1480 return ret;
1481 }
1482
1483 static
1484 int update_stream_stats(struct lttng_consumer_stream *stream)
1485 {
1486 int ret;
1487 uint64_t seq, discarded;
1488
1489 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1490 if (ret < 0) {
1491 if (ret == -ENOTTY) {
1492 /* Command not implemented by lttng-modules. */
1493 seq = -1ULL;
1494 stream->sequence_number_unavailable = true;
1495 } else {
1496 PERROR("kernctl_get_sequence_number");
1497 goto end;
1498 }
1499 }
1500
1501 /*
1502 * Start the sequence when we extract the first packet in case we don't
1503 * start at 0 (for example if a consumer is not connected to the
1504 * session immediately after the beginning).
1505 */
1506 if (stream->last_sequence_number == -1ULL) {
1507 stream->last_sequence_number = seq;
1508 } else if (seq > stream->last_sequence_number) {
1509 stream->chan->lost_packets += seq -
1510 stream->last_sequence_number - 1;
1511 } else {
1512 /* seq <= last_sequence_number */
1513 ERR("Sequence number inconsistent : prev = %" PRIu64
1514 ", current = %" PRIu64,
1515 stream->last_sequence_number, seq);
1516 ret = -1;
1517 goto end;
1518 }
1519 stream->last_sequence_number = seq;
1520
1521 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1522 if (ret < 0) {
1523 PERROR("kernctl_get_events_discarded");
1524 goto end;
1525 }
1526 if (discarded < stream->last_discarded_events) {
1527 /*
1528 * Overflow has occurred. We assume only one wrap-around
1529 * has occurred.
1530 */
1531 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1532 stream->last_discarded_events + discarded;
1533 } else {
1534 stream->chan->discarded_events += discarded -
1535 stream->last_discarded_events;
1536 }
1537 stream->last_discarded_events = discarded;
1538 ret = 0;
1539
1540 end:
1541 return ret;
1542 }
1543
1544 /*
1545 * Check if the local version of the metadata stream matches with the version
1546 * of the metadata stream in the kernel. If it was updated, set the reset flag
1547 * on the stream.
1548 */
1549 static
1550 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1551 {
1552 int ret;
1553 uint64_t cur_version;
1554
1555 ret = kernctl_get_metadata_version(infd, &cur_version);
1556 if (ret < 0) {
1557 if (ret == -ENOTTY) {
1558 /*
1559 * LTTng-modules does not implement this
1560 * command.
1561 */
1562 ret = 0;
1563 goto end;
1564 }
1565 ERR("Failed to get the metadata version");
1566 goto end;
1567 }
1568
1569 if (stream->metadata_version == cur_version) {
1570 ret = 0;
1571 goto end;
1572 }
1573
1574 DBG("New metadata version detected");
1575 stream->metadata_version = cur_version;
1576 stream->reset_metadata_flag = 1;
1577 ret = 0;
1578
1579 end:
1580 return ret;
1581 }
1582
1583 /*
1584 * Consume data on a file descriptor and write it on a trace file.
1585 * The stream and channel locks must be held by the caller.
1586 */
1587 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1588 struct lttng_consumer_local_data *ctx)
1589 {
1590 unsigned long len, subbuf_size, padding;
1591 int err, write_index = 1, rotation_ret;
1592 ssize_t ret = 0;
1593 int infd = stream->wait_fd;
1594 struct ctf_packet_index index = {};
1595
1596 DBG("In read_subbuffer (infd : %d)", infd);
1597
1598 /*
1599 * If the stream was flagged to be ready for rotation before we extract the
1600 * next packet, rotate it now.
1601 */
1602 if (stream->rotate_ready) {
1603 DBG("Rotate stream before extracting data");
1604 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1605 if (rotation_ret < 0) {
1606 ERR("Stream rotation error");
1607 ret = -1;
1608 goto error;
1609 }
1610 }
1611
1612 /* Get the next subbuffer */
1613 err = kernctl_get_next_subbuf(infd);
1614 if (err != 0) {
1615 /*
1616 * This is a debug message even for single-threaded consumer,
1617 * because poll() have more relaxed criterions than get subbuf,
1618 * so get_subbuf may fail for short race windows where poll()
1619 * would issue wakeups.
1620 */
1621 DBG("Reserving sub buffer failed (everything is normal, "
1622 "it is due to concurrency)");
1623 ret = err;
1624 goto error;
1625 }
1626
1627 /* Get the full subbuffer size including padding */
1628 err = kernctl_get_padded_subbuf_size(infd, &len);
1629 if (err != 0) {
1630 PERROR("Getting sub-buffer len failed.");
1631 err = kernctl_put_subbuf(infd);
1632 if (err != 0) {
1633 if (err == -EFAULT) {
1634 PERROR("Error in unreserving sub buffer\n");
1635 } else if (err == -EIO) {
1636 /* Should never happen with newer LTTng versions */
1637 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1638 }
1639 ret = err;
1640 goto error;
1641 }
1642 ret = err;
1643 goto error;
1644 }
1645
1646 if (!stream->metadata_flag) {
1647 ret = get_index_values(&index, infd);
1648 if (ret < 0) {
1649 err = kernctl_put_subbuf(infd);
1650 if (err != 0) {
1651 if (err == -EFAULT) {
1652 PERROR("Error in unreserving sub buffer\n");
1653 } else if (err == -EIO) {
1654 /* Should never happen with newer LTTng versions */
1655 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1656 }
1657 ret = err;
1658 goto error;
1659 }
1660 goto error;
1661 }
1662 ret = update_stream_stats(stream);
1663 if (ret < 0) {
1664 err = kernctl_put_subbuf(infd);
1665 if (err != 0) {
1666 if (err == -EFAULT) {
1667 PERROR("Error in unreserving sub buffer\n");
1668 } else if (err == -EIO) {
1669 /* Should never happen with newer LTTng versions */
1670 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1671 }
1672 ret = err;
1673 goto error;
1674 }
1675 goto error;
1676 }
1677 } else {
1678 write_index = 0;
1679 ret = metadata_stream_check_version(infd, stream);
1680 if (ret < 0) {
1681 err = kernctl_put_subbuf(infd);
1682 if (err != 0) {
1683 if (err == -EFAULT) {
1684 PERROR("Error in unreserving sub buffer\n");
1685 } else if (err == -EIO) {
1686 /* Should never happen with newer LTTng versions */
1687 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1688 }
1689 ret = err;
1690 goto error;
1691 }
1692 goto error;
1693 }
1694 }
1695
1696 switch (stream->chan->output) {
1697 case CONSUMER_CHANNEL_SPLICE:
1698 /*
1699 * XXX: The lttng-modules splice "actor" does not handle copying
1700 * partial pages hence only using the subbuffer size without the
1701 * padding makes the splice fail.
1702 */
1703 subbuf_size = len;
1704 padding = 0;
1705
1706 /* splice the subbuffer to the tracefile */
1707 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1708 padding, &index);
1709 /*
1710 * XXX: Splice does not support network streaming so the return value
1711 * is simply checked against subbuf_size and not like the mmap() op.
1712 */
1713 if (ret != subbuf_size) {
1714 /*
1715 * display the error but continue processing to try
1716 * to release the subbuffer
1717 */
1718 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1719 ret, subbuf_size);
1720 write_index = 0;
1721 }
1722 break;
1723 case CONSUMER_CHANNEL_MMAP:
1724 {
1725 const char *subbuf_addr;
1726 struct lttng_buffer_view subbuf_view;
1727
1728 /* Get subbuffer size without padding */
1729 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1730 if (err != 0) {
1731 PERROR("Getting sub-buffer len failed.");
1732 err = kernctl_put_subbuf(infd);
1733 if (err != 0) {
1734 if (err == -EFAULT) {
1735 PERROR("Error in unreserving sub buffer\n");
1736 } else if (err == -EIO) {
1737 /* Should never happen with newer LTTng versions */
1738 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1739 }
1740 ret = err;
1741 goto error;
1742 }
1743 ret = err;
1744 goto error;
1745 }
1746
1747 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1748 if (ret) {
1749 goto error_put_subbuf;
1750 }
1751
1752 /* Make sure the tracer is not gone mad on us! */
1753 assert(len >= subbuf_size);
1754
1755 padding = len - subbuf_size;
1756
1757 subbuf_view = lttng_buffer_view_init(subbuf_addr, 0, len);
1758
1759 /* write the subbuffer to the tracefile */
1760 ret = lttng_consumer_on_read_subbuffer_mmap(
1761 ctx, stream, &subbuf_view, padding, &index);
1762 /*
1763 * The mmap operation should write subbuf_size amount of data
1764 * when network streaming or the full padding (len) size when we
1765 * are _not_ streaming.
1766 */
1767 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1768 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1769 /*
1770 * Display the error but continue processing to try to release the
1771 * subbuffer. This is a DBG statement since this is possible to
1772 * happen without being a critical error.
1773 */
1774 DBG("Error writing to tracefile "
1775 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1776 ret, len, subbuf_size);
1777 write_index = 0;
1778 }
1779 break;
1780 }
1781 default:
1782 ERR("Unknown output method");
1783 ret = -EPERM;
1784 }
1785 error_put_subbuf:
1786 err = kernctl_put_next_subbuf(infd);
1787 if (err != 0) {
1788 if (err == -EFAULT) {
1789 PERROR("Error in unreserving sub buffer\n");
1790 } else if (err == -EIO) {
1791 /* Should never happen with newer LTTng versions */
1792 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1793 }
1794 ret = err;
1795 goto error;
1796 }
1797
1798 /* Write index if needed. */
1799 if (!write_index) {
1800 goto rotate;
1801 }
1802
1803 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1804 /*
1805 * In live, block until all the metadata is sent.
1806 */
1807 pthread_mutex_lock(&stream->metadata_timer_lock);
1808 assert(!stream->missed_metadata_flush);
1809 stream->waiting_on_metadata = true;
1810 pthread_mutex_unlock(&stream->metadata_timer_lock);
1811
1812 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1813
1814 pthread_mutex_lock(&stream->metadata_timer_lock);
1815 stream->waiting_on_metadata = false;
1816 if (stream->missed_metadata_flush) {
1817 stream->missed_metadata_flush = false;
1818 pthread_mutex_unlock(&stream->metadata_timer_lock);
1819 (void) consumer_flush_kernel_index(stream);
1820 } else {
1821 pthread_mutex_unlock(&stream->metadata_timer_lock);
1822 }
1823 if (err < 0) {
1824 goto error;
1825 }
1826 }
1827
1828 err = consumer_stream_write_index(stream, &index);
1829 if (err < 0) {
1830 goto error;
1831 }
1832
1833 rotate:
1834 /*
1835 * After extracting the packet, we check if the stream is now ready to be
1836 * rotated and perform the action immediately.
1837 */
1838 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1839 if (rotation_ret == 1) {
1840 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1841 if (rotation_ret < 0) {
1842 ERR("Stream rotation error");
1843 ret = -1;
1844 goto error;
1845 }
1846 } else if (rotation_ret < 0) {
1847 ERR("Checking if stream is ready to rotate");
1848 ret = -1;
1849 goto error;
1850 }
1851
1852 error:
1853 return ret;
1854 }
1855
1856 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1857 {
1858 int ret;
1859
1860 assert(stream);
1861
1862 /*
1863 * Don't create anything if this is set for streaming or if there is
1864 * no current trace chunk on the parent channel.
1865 */
1866 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1867 stream->chan->trace_chunk) {
1868 ret = consumer_stream_create_output_files(stream, true);
1869 if (ret) {
1870 goto error;
1871 }
1872 }
1873
1874 if (stream->output == LTTNG_EVENT_MMAP) {
1875 /* get the len of the mmap region */
1876 unsigned long mmap_len;
1877
1878 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1879 if (ret != 0) {
1880 PERROR("kernctl_get_mmap_len");
1881 goto error_close_fd;
1882 }
1883 stream->mmap_len = (size_t) mmap_len;
1884
1885 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1886 MAP_PRIVATE, stream->wait_fd, 0);
1887 if (stream->mmap_base == MAP_FAILED) {
1888 PERROR("Error mmaping");
1889 ret = -1;
1890 goto error_close_fd;
1891 }
1892 }
1893
1894 /* we return 0 to let the library handle the FD internally */
1895 return 0;
1896
1897 error_close_fd:
1898 if (stream->out_fd >= 0) {
1899 int err;
1900
1901 err = close(stream->out_fd);
1902 assert(!err);
1903 stream->out_fd = -1;
1904 }
1905 error:
1906 return ret;
1907 }
1908
1909 /*
1910 * Check if data is still being extracted from the buffers for a specific
1911 * stream. Consumer data lock MUST be acquired before calling this function
1912 * and the stream lock.
1913 *
1914 * Return 1 if the traced data are still getting read else 0 meaning that the
1915 * data is available for trace viewer reading.
1916 */
1917 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1918 {
1919 int ret;
1920
1921 assert(stream);
1922
1923 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1924 ret = 0;
1925 goto end;
1926 }
1927
1928 ret = kernctl_get_next_subbuf(stream->wait_fd);
1929 if (ret == 0) {
1930 /* There is still data so let's put back this subbuffer. */
1931 ret = kernctl_put_subbuf(stream->wait_fd);
1932 assert(ret == 0);
1933 ret = 1; /* Data is pending */
1934 goto end;
1935 }
1936
1937 /* Data is NOT pending and ready to be read. */
1938 ret = 0;
1939
1940 end:
1941 return ret;
1942 }
This page took 0.108637 seconds and 5 git commands to generate.