Fix: consumer: snapshot: assertion on subsequent snapshot
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 EfficiOS Inc.
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #define _LGPL_SOURCE
11 #include <assert.h>
12 #include <poll.h>
13 #include <pthread.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/socket.h>
18 #include <sys/types.h>
19 #include <inttypes.h>
20 #include <unistd.h>
21 #include <sys/stat.h>
22 #include <stdint.h>
23
24 #include <bin/lttng-consumerd/health-consumerd.h>
25 #include <common/common.h>
26 #include <common/kernel-ctl/kernel-ctl.h>
27 #include <common/sessiond-comm/sessiond-comm.h>
28 #include <common/sessiond-comm/relayd.h>
29 #include <common/compat/fcntl.h>
30 #include <common/compat/endian.h>
31 #include <common/pipe.h>
32 #include <common/relayd/relayd.h>
33 #include <common/utils.h>
34 #include <common/consumer/consumer-stream.h>
35 #include <common/index/index.h>
36 #include <common/consumer/consumer-timer.h>
37 #include <common/optional.h>
38 #include <common/buffer-view.h>
39 #include <common/consumer/consumer.h>
40 #include <common/consumer/metadata-bucket.h>
41
42 #include "kernel-consumer.h"
43
44 extern struct lttng_consumer_global_data consumer_data;
45 extern int consumer_poll_timeout;
46
47 /*
48 * Take a snapshot for a specific fd
49 *
50 * Returns 0 on success, < 0 on error
51 */
52 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
53 {
54 int ret = 0;
55 int infd = stream->wait_fd;
56
57 ret = kernctl_snapshot(infd);
58 /*
59 * -EAGAIN is not an error, it just means that there is no data to
60 * be read.
61 */
62 if (ret != 0 && ret != -EAGAIN) {
63 PERROR("Getting sub-buffer snapshot.");
64 }
65
66 return ret;
67 }
68
69 /*
70 * Sample consumed and produced positions for a specific fd.
71 *
72 * Returns 0 on success, < 0 on error.
73 */
74 int lttng_kconsumer_sample_snapshot_positions(
75 struct lttng_consumer_stream *stream)
76 {
77 assert(stream);
78
79 return kernctl_snapshot_sample_positions(stream->wait_fd);
80 }
81
82 /*
83 * Get the produced position
84 *
85 * Returns 0 on success, < 0 on error
86 */
87 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
88 unsigned long *pos)
89 {
90 int ret;
91 int infd = stream->wait_fd;
92
93 ret = kernctl_snapshot_get_produced(infd, pos);
94 if (ret != 0) {
95 PERROR("kernctl_snapshot_get_produced");
96 }
97
98 return ret;
99 }
100
101 /*
102 * Get the consumerd position
103 *
104 * Returns 0 on success, < 0 on error
105 */
106 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
107 unsigned long *pos)
108 {
109 int ret;
110 int infd = stream->wait_fd;
111
112 ret = kernctl_snapshot_get_consumed(infd, pos);
113 if (ret != 0) {
114 PERROR("kernctl_snapshot_get_consumed");
115 }
116
117 return ret;
118 }
119
120 static
121 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
122 const char **addr)
123 {
124 int ret;
125 unsigned long mmap_offset;
126 const char *mmap_base = stream->mmap_base;
127
128 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
129 if (ret < 0) {
130 PERROR("Failed to get mmap read offset");
131 goto error;
132 }
133
134 *addr = mmap_base + mmap_offset;
135 error:
136 return ret;
137 }
138
139 static void finalize_snapshot_stream(
140 struct lttng_consumer_stream *stream, uint64_t relayd_id)
141 {
142 ASSERT_LOCKED(stream->lock);
143
144 if (relayd_id == (uint64_t) -1ULL) {
145 if (stream->out_fd >= 0) {
146 const int ret = close(stream->out_fd);
147
148 if (ret < 0) {
149 PERROR("Failed to close stream snapshot output file descriptor");
150 }
151
152 stream->out_fd = -1;
153 }
154 } else {
155 close_relayd_stream(stream);
156 stream->net_seq_idx = (uint64_t) -1ULL;
157 }
158
159 lttng_trace_chunk_put(stream->trace_chunk);
160 stream->trace_chunk = NULL;
161 }
162
163 /*
164 * Take a snapshot of all the stream of a channel
165 * RCU read-side lock must be held across this function to ensure existence of
166 * channel.
167 *
168 * Returns 0 on success, < 0 on error
169 */
170 static int lttng_kconsumer_snapshot_channel(
171 struct lttng_consumer_channel *channel,
172 uint64_t key, char *path, uint64_t relayd_id,
173 uint64_t nb_packets_per_stream,
174 struct lttng_consumer_local_data *ctx)
175 {
176 int ret;
177 struct lttng_consumer_stream *stream;
178
179 DBG("Kernel consumer snapshot channel %" PRIu64, key);
180
181 /* Prevent channel modifications while we perform the snapshot.*/
182 pthread_mutex_lock(&channel->lock);
183
184 rcu_read_lock();
185
186 /* Splice is not supported yet for channel snapshot. */
187 if (channel->output != CONSUMER_CHANNEL_MMAP) {
188 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
189 channel->name);
190 ret = -1;
191 goto end;
192 }
193
194 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
195 unsigned long consumed_pos, produced_pos;
196
197 health_code_update();
198
199 /*
200 * Lock stream because we are about to change its state.
201 */
202 pthread_mutex_lock(&stream->lock);
203
204 assert(channel->trace_chunk);
205 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
206 /*
207 * Can't happen barring an internal error as the channel
208 * holds a reference to the trace chunk.
209 */
210 ERR("Failed to acquire reference to channel's trace chunk");
211 ret = -1;
212 goto end_unlock;
213 }
214 assert(!stream->trace_chunk);
215 stream->trace_chunk = channel->trace_chunk;
216
217 /*
218 * Assign the received relayd ID so we can use it for streaming. The streams
219 * are not visible to anyone so this is OK to change it.
220 */
221 stream->net_seq_idx = relayd_id;
222 channel->relayd_id = relayd_id;
223 if (relayd_id != (uint64_t) -1ULL) {
224 ret = consumer_send_relayd_stream(stream, path);
225 if (ret < 0) {
226 ERR("sending stream to relayd");
227 goto error_finalize_stream;
228 }
229 } else {
230 ret = consumer_stream_create_output_files(stream,
231 false);
232 if (ret < 0) {
233 goto error_finalize_stream;
234 }
235 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
236 stream->key);
237 }
238
239 ret = kernctl_buffer_flush_empty(stream->wait_fd);
240 if (ret < 0) {
241 /*
242 * Doing a buffer flush which does not take into
243 * account empty packets. This is not perfect
244 * for stream intersection, but required as a
245 * fall-back when "flush_empty" is not
246 * implemented by lttng-modules.
247 */
248 ret = kernctl_buffer_flush(stream->wait_fd);
249 if (ret < 0) {
250 ERR("Failed to flush kernel stream");
251 goto error_finalize_stream;
252 }
253 goto end_unlock;
254 }
255
256 ret = lttng_kconsumer_take_snapshot(stream);
257 if (ret < 0) {
258 ERR("Taking kernel snapshot");
259 goto error_finalize_stream;
260 }
261
262 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
263 if (ret < 0) {
264 ERR("Produced kernel snapshot position");
265 goto error_finalize_stream;
266 }
267
268 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
269 if (ret < 0) {
270 ERR("Consumerd kernel snapshot position");
271 goto error_finalize_stream;
272 }
273
274 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
275 produced_pos, nb_packets_per_stream,
276 stream->max_sb_size);
277
278 while ((long) (consumed_pos - produced_pos) < 0) {
279 ssize_t read_len;
280 unsigned long len, padded_len;
281 const char *subbuf_addr;
282 struct lttng_buffer_view subbuf_view;
283
284 health_code_update();
285 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
286
287 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
288 if (ret < 0) {
289 if (ret != -EAGAIN) {
290 PERROR("kernctl_get_subbuf snapshot");
291 goto error_finalize_stream;
292 }
293 DBG("Kernel consumer get subbuf failed. Skipping it.");
294 consumed_pos += stream->max_sb_size;
295 stream->chan->lost_packets++;
296 continue;
297 }
298
299 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
300 if (ret < 0) {
301 ERR("Snapshot kernctl_get_subbuf_size");
302 goto error_put_subbuf;
303 }
304
305 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
306 if (ret < 0) {
307 ERR("Snapshot kernctl_get_padded_subbuf_size");
308 goto error_put_subbuf;
309 }
310
311 ret = get_current_subbuf_addr(stream, &subbuf_addr);
312 if (ret) {
313 goto error_put_subbuf;
314 }
315
316 subbuf_view = lttng_buffer_view_init(
317 subbuf_addr, 0, padded_len);
318 read_len = lttng_consumer_on_read_subbuffer_mmap(
319 stream, &subbuf_view,
320 padded_len - len);
321 /*
322 * We write the padded len in local tracefiles but the data len
323 * when using a relay. Display the error but continue processing
324 * to try to release the subbuffer.
325 */
326 if (relayd_id != (uint64_t) -1ULL) {
327 if (read_len != len) {
328 ERR("Error sending to the relay (ret: %zd != len: %lu)",
329 read_len, len);
330 }
331 } else {
332 if (read_len != padded_len) {
333 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
334 read_len, padded_len);
335 }
336 }
337
338 ret = kernctl_put_subbuf(stream->wait_fd);
339 if (ret < 0) {
340 ERR("Snapshot kernctl_put_subbuf");
341 goto error_finalize_stream;
342 }
343 consumed_pos += stream->max_sb_size;
344 }
345
346 finalize_snapshot_stream(stream, relayd_id);
347 pthread_mutex_unlock(&stream->lock);
348 }
349
350 /* All good! */
351 ret = 0;
352 goto end;
353
354 error_put_subbuf:
355 ret = kernctl_put_subbuf(stream->wait_fd);
356 if (ret < 0) {
357 ERR("Snapshot kernctl_put_subbuf error path");
358 }
359 error_finalize_stream:
360 finalize_snapshot_stream(stream, relayd_id);
361 end_unlock:
362 pthread_mutex_unlock(&stream->lock);
363 end:
364 rcu_read_unlock();
365 pthread_mutex_unlock(&channel->lock);
366 return ret;
367 }
368
369 /*
370 * Read the whole metadata available for a snapshot.
371 * RCU read-side lock must be held across this function to ensure existence of
372 * metadata_channel.
373 *
374 * Returns 0 on success, < 0 on error
375 */
376 static int lttng_kconsumer_snapshot_metadata(
377 struct lttng_consumer_channel *metadata_channel,
378 uint64_t key, char *path, uint64_t relayd_id,
379 struct lttng_consumer_local_data *ctx)
380 {
381 int ret, use_relayd = 0;
382 ssize_t ret_read;
383 struct lttng_consumer_stream *metadata_stream;
384
385 assert(ctx);
386
387 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
388 key, path);
389
390 rcu_read_lock();
391
392 metadata_stream = metadata_channel->metadata_stream;
393 assert(metadata_stream);
394
395 /* Take all the appropriate locks hehehe.*/
396 metadata_stream->read_subbuffer_ops.lock(metadata_stream);
397 assert(metadata_channel->trace_chunk);
398 assert(metadata_stream->trace_chunk);
399
400 /* Flag once that we have a valid relayd for the stream. */
401 if (relayd_id != (uint64_t) -1ULL) {
402 use_relayd = 1;
403 }
404
405 if (use_relayd) {
406 ret = consumer_send_relayd_stream(metadata_stream, path);
407 if (ret < 0) {
408 goto error_snapshot;
409 }
410 } else {
411 ret = consumer_stream_create_output_files(metadata_stream,
412 false);
413 if (ret < 0) {
414 goto error_snapshot;
415 }
416 }
417
418 do {
419 health_code_update();
420
421 ret_read = lttng_consumer_read_subbuffer(metadata_stream, ctx, true);
422 if (ret_read < 0) {
423 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
424 ret_read);
425 ret = ret_read;
426 goto error_snapshot;
427 }
428 } while (ret_read > 0);
429
430 if (use_relayd) {
431 close_relayd_stream(metadata_stream);
432 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
433 } else {
434 if (metadata_stream->out_fd >= 0) {
435 ret = close(metadata_stream->out_fd);
436 if (ret < 0) {
437 PERROR("Kernel consumer snapshot metadata close out_fd");
438 /*
439 * Don't go on error here since the snapshot was successful at this
440 * point but somehow the close failed.
441 */
442 }
443 metadata_stream->out_fd = -1;
444 lttng_trace_chunk_put(metadata_stream->trace_chunk);
445 metadata_stream->trace_chunk = NULL;
446 }
447 }
448
449 ret = 0;
450 error_snapshot:
451 metadata_stream->read_subbuffer_ops.unlock(metadata_stream);
452 consumer_stream_destroy(metadata_stream, NULL);
453 metadata_channel->metadata_stream = NULL;
454 rcu_read_unlock();
455 return ret;
456 }
457
458 /*
459 * Receive command from session daemon and process it.
460 *
461 * Return 1 on success else a negative value or 0.
462 */
463 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
464 int sock, struct pollfd *consumer_sockpoll)
465 {
466 ssize_t ret;
467 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
468 struct lttcomm_consumer_msg msg;
469
470 health_code_update();
471
472 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
473 if (ret != sizeof(msg)) {
474 if (ret > 0) {
475 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
476 ret = -1;
477 }
478 return ret;
479 }
480
481 health_code_update();
482
483 /* Deprecated command */
484 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
485
486 health_code_update();
487
488 /* relayd needs RCU read-side protection */
489 rcu_read_lock();
490
491 switch (msg.cmd_type) {
492 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
493 {
494 uint32_t major = msg.u.relayd_sock.major;
495 uint32_t minor = msg.u.relayd_sock.minor;
496 enum lttcomm_sock_proto protocol =
497 msg.u.relayd_sock.relayd_socket_protocol;
498
499 /* Session daemon status message are handled in the following call. */
500 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
501 msg.u.relayd_sock.type, ctx, sock,
502 consumer_sockpoll, msg.u.relayd_sock.session_id,
503 msg.u.relayd_sock.relayd_session_id, major,
504 minor, protocol);
505 goto end_nosignal;
506 }
507 case LTTNG_CONSUMER_ADD_CHANNEL:
508 {
509 struct lttng_consumer_channel *new_channel;
510 int ret_recv;
511 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
512
513 health_code_update();
514
515 /* First send a status message before receiving the fds. */
516 ret = consumer_send_status_msg(sock, ret_code);
517 if (ret < 0) {
518 /* Somehow, the session daemon is not responding anymore. */
519 goto error_fatal;
520 }
521
522 health_code_update();
523
524 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
525 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
526 msg.u.channel.session_id,
527 msg.u.channel.chunk_id.is_set ?
528 &chunk_id : NULL,
529 msg.u.channel.pathname,
530 msg.u.channel.name,
531 msg.u.channel.relayd_id, msg.u.channel.output,
532 msg.u.channel.tracefile_size,
533 msg.u.channel.tracefile_count, 0,
534 msg.u.channel.monitor,
535 msg.u.channel.live_timer_interval,
536 msg.u.channel.is_live,
537 NULL, NULL);
538 if (new_channel == NULL) {
539 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
540 goto end_nosignal;
541 }
542 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
543 switch (msg.u.channel.output) {
544 case LTTNG_EVENT_SPLICE:
545 new_channel->output = CONSUMER_CHANNEL_SPLICE;
546 break;
547 case LTTNG_EVENT_MMAP:
548 new_channel->output = CONSUMER_CHANNEL_MMAP;
549 break;
550 default:
551 ERR("Channel output unknown %d", msg.u.channel.output);
552 goto end_nosignal;
553 }
554
555 /* Translate and save channel type. */
556 switch (msg.u.channel.type) {
557 case CONSUMER_CHANNEL_TYPE_DATA:
558 case CONSUMER_CHANNEL_TYPE_METADATA:
559 new_channel->type = msg.u.channel.type;
560 break;
561 default:
562 assert(0);
563 goto end_nosignal;
564 };
565
566 health_code_update();
567
568 if (ctx->on_recv_channel != NULL) {
569 ret_recv = ctx->on_recv_channel(new_channel);
570 if (ret_recv == 0) {
571 ret = consumer_add_channel(new_channel, ctx);
572 } else if (ret_recv < 0) {
573 goto end_nosignal;
574 }
575 } else {
576 ret = consumer_add_channel(new_channel, ctx);
577 }
578 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
579 int monitor_start_ret;
580
581 DBG("Consumer starting monitor timer");
582 consumer_timer_live_start(new_channel,
583 msg.u.channel.live_timer_interval);
584 monitor_start_ret = consumer_timer_monitor_start(
585 new_channel,
586 msg.u.channel.monitor_timer_interval);
587 if (monitor_start_ret < 0) {
588 ERR("Starting channel monitoring timer failed");
589 goto end_nosignal;
590 }
591
592 }
593
594 health_code_update();
595
596 /* If we received an error in add_channel, we need to report it. */
597 if (ret < 0) {
598 ret = consumer_send_status_msg(sock, ret);
599 if (ret < 0) {
600 goto error_fatal;
601 }
602 goto end_nosignal;
603 }
604
605 goto end_nosignal;
606 }
607 case LTTNG_CONSUMER_ADD_STREAM:
608 {
609 int fd;
610 struct lttng_pipe *stream_pipe;
611 struct lttng_consumer_stream *new_stream;
612 struct lttng_consumer_channel *channel;
613 int alloc_ret = 0;
614
615 /*
616 * Get stream's channel reference. Needed when adding the stream to the
617 * global hash table.
618 */
619 channel = consumer_find_channel(msg.u.stream.channel_key);
620 if (!channel) {
621 /*
622 * We could not find the channel. Can happen if cpu hotplug
623 * happens while tearing down.
624 */
625 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
626 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
627 }
628
629 health_code_update();
630
631 /* First send a status message before receiving the fds. */
632 ret = consumer_send_status_msg(sock, ret_code);
633 if (ret < 0) {
634 /* Somehow, the session daemon is not responding anymore. */
635 goto error_add_stream_fatal;
636 }
637
638 health_code_update();
639
640 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
641 /* Channel was not found. */
642 goto error_add_stream_nosignal;
643 }
644
645 /* Blocking call */
646 health_poll_entry();
647 ret = lttng_consumer_poll_socket(consumer_sockpoll);
648 health_poll_exit();
649 if (ret) {
650 goto error_add_stream_fatal;
651 }
652
653 health_code_update();
654
655 /* Get stream file descriptor from socket */
656 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
657 if (ret != sizeof(fd)) {
658 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
659 goto end;
660 }
661
662 health_code_update();
663
664 /*
665 * Send status code to session daemon only if the recv works. If the
666 * above recv() failed, the session daemon is notified through the
667 * error socket and the teardown is eventually done.
668 */
669 ret = consumer_send_status_msg(sock, ret_code);
670 if (ret < 0) {
671 /* Somehow, the session daemon is not responding anymore. */
672 goto error_add_stream_nosignal;
673 }
674
675 health_code_update();
676
677 pthread_mutex_lock(&channel->lock);
678 new_stream = consumer_stream_create(
679 channel,
680 channel->key,
681 fd,
682 channel->name,
683 channel->relayd_id,
684 channel->session_id,
685 channel->trace_chunk,
686 msg.u.stream.cpu,
687 &alloc_ret,
688 channel->type,
689 channel->monitor);
690 if (new_stream == NULL) {
691 switch (alloc_ret) {
692 case -ENOMEM:
693 case -EINVAL:
694 default:
695 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
696 break;
697 }
698 pthread_mutex_unlock(&channel->lock);
699 goto error_add_stream_nosignal;
700 }
701
702 new_stream->wait_fd = fd;
703 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
704 &new_stream->max_sb_size);
705 if (ret < 0) {
706 pthread_mutex_unlock(&channel->lock);
707 ERR("Failed to get kernel maximal subbuffer size");
708 goto error_add_stream_nosignal;
709 }
710
711 consumer_stream_update_channel_attributes(new_stream,
712 channel);
713
714 /*
715 * We've just assigned the channel to the stream so increment the
716 * refcount right now. We don't need to increment the refcount for
717 * streams in no monitor because we handle manually the cleanup of
718 * those. It is very important to make sure there is NO prior
719 * consumer_del_stream() calls or else the refcount will be unbalanced.
720 */
721 if (channel->monitor) {
722 uatomic_inc(&new_stream->chan->refcount);
723 }
724
725 /*
726 * The buffer flush is done on the session daemon side for the kernel
727 * so no need for the stream "hangup_flush_done" variable to be
728 * tracked. This is important for a kernel stream since we don't rely
729 * on the flush state of the stream to read data. It's not the case for
730 * user space tracing.
731 */
732 new_stream->hangup_flush_done = 0;
733
734 health_code_update();
735
736 pthread_mutex_lock(&new_stream->lock);
737 if (ctx->on_recv_stream) {
738 ret = ctx->on_recv_stream(new_stream);
739 if (ret < 0) {
740 pthread_mutex_unlock(&new_stream->lock);
741 pthread_mutex_unlock(&channel->lock);
742 consumer_stream_free(new_stream);
743 goto error_add_stream_nosignal;
744 }
745 }
746 health_code_update();
747
748 if (new_stream->metadata_flag) {
749 channel->metadata_stream = new_stream;
750 }
751
752 /* Do not monitor this stream. */
753 if (!channel->monitor) {
754 DBG("Kernel consumer add stream %s in no monitor mode with "
755 "relayd id %" PRIu64, new_stream->name,
756 new_stream->net_seq_idx);
757 cds_list_add(&new_stream->send_node, &channel->streams.head);
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto end_add_stream;
761 }
762
763 /* Send stream to relayd if the stream has an ID. */
764 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
765 ret = consumer_send_relayd_stream(new_stream,
766 new_stream->chan->pathname);
767 if (ret < 0) {
768 pthread_mutex_unlock(&new_stream->lock);
769 pthread_mutex_unlock(&channel->lock);
770 consumer_stream_free(new_stream);
771 goto error_add_stream_nosignal;
772 }
773
774 /*
775 * If adding an extra stream to an already
776 * existing channel (e.g. cpu hotplug), we need
777 * to send the "streams_sent" command to relayd.
778 */
779 if (channel->streams_sent_to_relayd) {
780 ret = consumer_send_relayd_streams_sent(
781 new_stream->net_seq_idx);
782 if (ret < 0) {
783 pthread_mutex_unlock(&new_stream->lock);
784 pthread_mutex_unlock(&channel->lock);
785 goto error_add_stream_nosignal;
786 }
787 }
788 }
789 pthread_mutex_unlock(&new_stream->lock);
790 pthread_mutex_unlock(&channel->lock);
791
792 /* Get the right pipe where the stream will be sent. */
793 if (new_stream->metadata_flag) {
794 consumer_add_metadata_stream(new_stream);
795 stream_pipe = ctx->consumer_metadata_pipe;
796 } else {
797 consumer_add_data_stream(new_stream);
798 stream_pipe = ctx->consumer_data_pipe;
799 }
800
801 /* Visible to other threads */
802 new_stream->globally_visible = 1;
803
804 health_code_update();
805
806 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
807 if (ret < 0) {
808 ERR("Consumer write %s stream to pipe %d",
809 new_stream->metadata_flag ? "metadata" : "data",
810 lttng_pipe_get_writefd(stream_pipe));
811 if (new_stream->metadata_flag) {
812 consumer_del_stream_for_metadata(new_stream);
813 } else {
814 consumer_del_stream_for_data(new_stream);
815 }
816 goto error_add_stream_nosignal;
817 }
818
819 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
820 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
821 end_add_stream:
822 break;
823 error_add_stream_nosignal:
824 goto end_nosignal;
825 error_add_stream_fatal:
826 goto error_fatal;
827 }
828 case LTTNG_CONSUMER_STREAMS_SENT:
829 {
830 struct lttng_consumer_channel *channel;
831
832 /*
833 * Get stream's channel reference. Needed when adding the stream to the
834 * global hash table.
835 */
836 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
837 if (!channel) {
838 /*
839 * We could not find the channel. Can happen if cpu hotplug
840 * happens while tearing down.
841 */
842 ERR("Unable to find channel key %" PRIu64,
843 msg.u.sent_streams.channel_key);
844 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
845 }
846
847 health_code_update();
848
849 /*
850 * Send status code to session daemon.
851 */
852 ret = consumer_send_status_msg(sock, ret_code);
853 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
854 /* Somehow, the session daemon is not responding anymore. */
855 goto error_streams_sent_nosignal;
856 }
857
858 health_code_update();
859
860 /*
861 * We should not send this message if we don't monitor the
862 * streams in this channel.
863 */
864 if (!channel->monitor) {
865 goto end_error_streams_sent;
866 }
867
868 health_code_update();
869 /* Send stream to relayd if the stream has an ID. */
870 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
871 ret = consumer_send_relayd_streams_sent(
872 msg.u.sent_streams.net_seq_idx);
873 if (ret < 0) {
874 goto error_streams_sent_nosignal;
875 }
876 channel->streams_sent_to_relayd = true;
877 }
878 end_error_streams_sent:
879 break;
880 error_streams_sent_nosignal:
881 goto end_nosignal;
882 }
883 case LTTNG_CONSUMER_UPDATE_STREAM:
884 {
885 rcu_read_unlock();
886 return -ENOSYS;
887 }
888 case LTTNG_CONSUMER_DESTROY_RELAYD:
889 {
890 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
891 struct consumer_relayd_sock_pair *relayd;
892
893 DBG("Kernel consumer destroying relayd %" PRIu64, index);
894
895 /* Get relayd reference if exists. */
896 relayd = consumer_find_relayd(index);
897 if (relayd == NULL) {
898 DBG("Unable to find relayd %" PRIu64, index);
899 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
900 }
901
902 /*
903 * Each relayd socket pair has a refcount of stream attached to it
904 * which tells if the relayd is still active or not depending on the
905 * refcount value.
906 *
907 * This will set the destroy flag of the relayd object and destroy it
908 * if the refcount reaches zero when called.
909 *
910 * The destroy can happen either here or when a stream fd hangs up.
911 */
912 if (relayd) {
913 consumer_flag_relayd_for_destroy(relayd);
914 }
915
916 health_code_update();
917
918 ret = consumer_send_status_msg(sock, ret_code);
919 if (ret < 0) {
920 /* Somehow, the session daemon is not responding anymore. */
921 goto error_fatal;
922 }
923
924 goto end_nosignal;
925 }
926 case LTTNG_CONSUMER_DATA_PENDING:
927 {
928 int32_t ret;
929 uint64_t id = msg.u.data_pending.session_id;
930
931 DBG("Kernel consumer data pending command for id %" PRIu64, id);
932
933 ret = consumer_data_pending(id);
934
935 health_code_update();
936
937 /* Send back returned value to session daemon */
938 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
939 if (ret < 0) {
940 PERROR("send data pending ret code");
941 goto error_fatal;
942 }
943
944 /*
945 * No need to send back a status message since the data pending
946 * returned value is the response.
947 */
948 break;
949 }
950 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
951 {
952 struct lttng_consumer_channel *channel;
953 uint64_t key = msg.u.snapshot_channel.key;
954
955 channel = consumer_find_channel(key);
956 if (!channel) {
957 ERR("Channel %" PRIu64 " not found", key);
958 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
959 } else {
960 if (msg.u.snapshot_channel.metadata == 1) {
961 ret = lttng_kconsumer_snapshot_metadata(channel, key,
962 msg.u.snapshot_channel.pathname,
963 msg.u.snapshot_channel.relayd_id, ctx);
964 if (ret < 0) {
965 ERR("Snapshot metadata failed");
966 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
967 }
968 } else {
969 ret = lttng_kconsumer_snapshot_channel(channel, key,
970 msg.u.snapshot_channel.pathname,
971 msg.u.snapshot_channel.relayd_id,
972 msg.u.snapshot_channel.nb_packets_per_stream,
973 ctx);
974 if (ret < 0) {
975 ERR("Snapshot channel failed");
976 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
977 }
978 }
979 }
980 health_code_update();
981
982 ret = consumer_send_status_msg(sock, ret_code);
983 if (ret < 0) {
984 /* Somehow, the session daemon is not responding anymore. */
985 goto end_nosignal;
986 }
987 break;
988 }
989 case LTTNG_CONSUMER_DESTROY_CHANNEL:
990 {
991 uint64_t key = msg.u.destroy_channel.key;
992 struct lttng_consumer_channel *channel;
993
994 channel = consumer_find_channel(key);
995 if (!channel) {
996 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
997 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
998 }
999
1000 health_code_update();
1001
1002 ret = consumer_send_status_msg(sock, ret_code);
1003 if (ret < 0) {
1004 /* Somehow, the session daemon is not responding anymore. */
1005 goto end_destroy_channel;
1006 }
1007
1008 health_code_update();
1009
1010 /* Stop right now if no channel was found. */
1011 if (!channel) {
1012 goto end_destroy_channel;
1013 }
1014
1015 /*
1016 * This command should ONLY be issued for channel with streams set in
1017 * no monitor mode.
1018 */
1019 assert(!channel->monitor);
1020
1021 /*
1022 * The refcount should ALWAYS be 0 in the case of a channel in no
1023 * monitor mode.
1024 */
1025 assert(!uatomic_sub_return(&channel->refcount, 1));
1026
1027 consumer_del_channel(channel);
1028 end_destroy_channel:
1029 goto end_nosignal;
1030 }
1031 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1032 {
1033 ssize_t ret;
1034 uint64_t count;
1035 struct lttng_consumer_channel *channel;
1036 uint64_t id = msg.u.discarded_events.session_id;
1037 uint64_t key = msg.u.discarded_events.channel_key;
1038
1039 DBG("Kernel consumer discarded events command for session id %"
1040 PRIu64 ", channel key %" PRIu64, id, key);
1041
1042 channel = consumer_find_channel(key);
1043 if (!channel) {
1044 ERR("Kernel consumer discarded events channel %"
1045 PRIu64 " not found", key);
1046 count = 0;
1047 } else {
1048 count = channel->discarded_events;
1049 }
1050
1051 health_code_update();
1052
1053 /* Send back returned value to session daemon */
1054 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1055 if (ret < 0) {
1056 PERROR("send discarded events");
1057 goto error_fatal;
1058 }
1059
1060 break;
1061 }
1062 case LTTNG_CONSUMER_LOST_PACKETS:
1063 {
1064 ssize_t ret;
1065 uint64_t count;
1066 struct lttng_consumer_channel *channel;
1067 uint64_t id = msg.u.lost_packets.session_id;
1068 uint64_t key = msg.u.lost_packets.channel_key;
1069
1070 DBG("Kernel consumer lost packets command for session id %"
1071 PRIu64 ", channel key %" PRIu64, id, key);
1072
1073 channel = consumer_find_channel(key);
1074 if (!channel) {
1075 ERR("Kernel consumer lost packets channel %"
1076 PRIu64 " not found", key);
1077 count = 0;
1078 } else {
1079 count = channel->lost_packets;
1080 }
1081
1082 health_code_update();
1083
1084 /* Send back returned value to session daemon */
1085 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1086 if (ret < 0) {
1087 PERROR("send lost packets");
1088 goto error_fatal;
1089 }
1090
1091 break;
1092 }
1093 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1094 {
1095 int channel_monitor_pipe;
1096
1097 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1098 /* Successfully received the command's type. */
1099 ret = consumer_send_status_msg(sock, ret_code);
1100 if (ret < 0) {
1101 goto error_fatal;
1102 }
1103
1104 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1105 1);
1106 if (ret != sizeof(channel_monitor_pipe)) {
1107 ERR("Failed to receive channel monitor pipe");
1108 goto error_fatal;
1109 }
1110
1111 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1112 ret = consumer_timer_thread_set_channel_monitor_pipe(
1113 channel_monitor_pipe);
1114 if (!ret) {
1115 int flags;
1116
1117 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1118 /* Set the pipe as non-blocking. */
1119 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1120 if (ret == -1) {
1121 PERROR("fcntl get flags of the channel monitoring pipe");
1122 goto error_fatal;
1123 }
1124 flags = ret;
1125
1126 ret = fcntl(channel_monitor_pipe, F_SETFL,
1127 flags | O_NONBLOCK);
1128 if (ret == -1) {
1129 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1130 goto error_fatal;
1131 }
1132 DBG("Channel monitor pipe set as non-blocking");
1133 } else {
1134 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1135 }
1136 ret = consumer_send_status_msg(sock, ret_code);
1137 if (ret < 0) {
1138 goto error_fatal;
1139 }
1140 break;
1141 }
1142 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1143 {
1144 struct lttng_consumer_channel *channel;
1145 uint64_t key = msg.u.rotate_channel.key;
1146
1147 DBG("Consumer rotate channel %" PRIu64, key);
1148
1149 channel = consumer_find_channel(key);
1150 if (!channel) {
1151 ERR("Channel %" PRIu64 " not found", key);
1152 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1153 } else {
1154 /*
1155 * Sample the rotate position of all the streams in this channel.
1156 */
1157 ret = lttng_consumer_rotate_channel(channel, key,
1158 msg.u.rotate_channel.relayd_id,
1159 msg.u.rotate_channel.metadata,
1160 ctx);
1161 if (ret < 0) {
1162 ERR("Rotate channel failed");
1163 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1164 }
1165
1166 health_code_update();
1167 }
1168 ret = consumer_send_status_msg(sock, ret_code);
1169 if (ret < 0) {
1170 /* Somehow, the session daemon is not responding anymore. */
1171 goto error_rotate_channel;
1172 }
1173 if (channel) {
1174 /* Rotate the streams that are ready right now. */
1175 ret = lttng_consumer_rotate_ready_streams(
1176 channel, key, ctx);
1177 if (ret < 0) {
1178 ERR("Rotate ready streams failed");
1179 }
1180 }
1181 break;
1182 error_rotate_channel:
1183 goto end_nosignal;
1184 }
1185 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1186 {
1187 struct lttng_consumer_channel *channel;
1188 uint64_t key = msg.u.clear_channel.key;
1189
1190 channel = consumer_find_channel(key);
1191 if (!channel) {
1192 DBG("Channel %" PRIu64 " not found", key);
1193 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1194 } else {
1195 ret = lttng_consumer_clear_channel(channel);
1196 if (ret) {
1197 ERR("Clear channel failed");
1198 ret_code = ret;
1199 }
1200
1201 health_code_update();
1202 }
1203 ret = consumer_send_status_msg(sock, ret_code);
1204 if (ret < 0) {
1205 /* Somehow, the session daemon is not responding anymore. */
1206 goto end_nosignal;
1207 }
1208
1209 break;
1210 }
1211 case LTTNG_CONSUMER_INIT:
1212 {
1213 ret_code = lttng_consumer_init_command(ctx,
1214 msg.u.init.sessiond_uuid);
1215 health_code_update();
1216 ret = consumer_send_status_msg(sock, ret_code);
1217 if (ret < 0) {
1218 /* Somehow, the session daemon is not responding anymore. */
1219 goto end_nosignal;
1220 }
1221 break;
1222 }
1223 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1224 {
1225 const struct lttng_credentials credentials = {
1226 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1227 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1228 };
1229 const bool is_local_trace =
1230 !msg.u.create_trace_chunk.relayd_id.is_set;
1231 const uint64_t relayd_id =
1232 msg.u.create_trace_chunk.relayd_id.value;
1233 const char *chunk_override_name =
1234 *msg.u.create_trace_chunk.override_name ?
1235 msg.u.create_trace_chunk.override_name :
1236 NULL;
1237 struct lttng_directory_handle *chunk_directory_handle = NULL;
1238
1239 /*
1240 * The session daemon will only provide a chunk directory file
1241 * descriptor for local traces.
1242 */
1243 if (is_local_trace) {
1244 int chunk_dirfd;
1245
1246 /* Acnowledge the reception of the command. */
1247 ret = consumer_send_status_msg(sock,
1248 LTTCOMM_CONSUMERD_SUCCESS);
1249 if (ret < 0) {
1250 /* Somehow, the session daemon is not responding anymore. */
1251 goto end_nosignal;
1252 }
1253
1254 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1255 if (ret != sizeof(chunk_dirfd)) {
1256 ERR("Failed to receive trace chunk directory file descriptor");
1257 goto error_fatal;
1258 }
1259
1260 DBG("Received trace chunk directory fd (%d)",
1261 chunk_dirfd);
1262 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1263 chunk_dirfd);
1264 if (!chunk_directory_handle) {
1265 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1266 if (close(chunk_dirfd)) {
1267 PERROR("Failed to close chunk directory file descriptor");
1268 }
1269 goto error_fatal;
1270 }
1271 }
1272
1273 ret_code = lttng_consumer_create_trace_chunk(
1274 !is_local_trace ? &relayd_id : NULL,
1275 msg.u.create_trace_chunk.session_id,
1276 msg.u.create_trace_chunk.chunk_id,
1277 (time_t) msg.u.create_trace_chunk
1278 .creation_timestamp,
1279 chunk_override_name,
1280 msg.u.create_trace_chunk.credentials.is_set ?
1281 &credentials :
1282 NULL,
1283 chunk_directory_handle);
1284 lttng_directory_handle_put(chunk_directory_handle);
1285 goto end_msg_sessiond;
1286 }
1287 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1288 {
1289 enum lttng_trace_chunk_command_type close_command =
1290 msg.u.close_trace_chunk.close_command.value;
1291 const uint64_t relayd_id =
1292 msg.u.close_trace_chunk.relayd_id.value;
1293 struct lttcomm_consumer_close_trace_chunk_reply reply;
1294 char path[LTTNG_PATH_MAX];
1295
1296 ret_code = lttng_consumer_close_trace_chunk(
1297 msg.u.close_trace_chunk.relayd_id.is_set ?
1298 &relayd_id :
1299 NULL,
1300 msg.u.close_trace_chunk.session_id,
1301 msg.u.close_trace_chunk.chunk_id,
1302 (time_t) msg.u.close_trace_chunk.close_timestamp,
1303 msg.u.close_trace_chunk.close_command.is_set ?
1304 &close_command :
1305 NULL, path);
1306 reply.ret_code = ret_code;
1307 reply.path_length = strlen(path) + 1;
1308 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1309 if (ret != sizeof(reply)) {
1310 goto error_fatal;
1311 }
1312 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1313 if (ret != reply.path_length) {
1314 goto error_fatal;
1315 }
1316 goto end_nosignal;
1317 }
1318 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1319 {
1320 const uint64_t relayd_id =
1321 msg.u.trace_chunk_exists.relayd_id.value;
1322
1323 ret_code = lttng_consumer_trace_chunk_exists(
1324 msg.u.trace_chunk_exists.relayd_id.is_set ?
1325 &relayd_id : NULL,
1326 msg.u.trace_chunk_exists.session_id,
1327 msg.u.trace_chunk_exists.chunk_id);
1328 goto end_msg_sessiond;
1329 }
1330 case LTTNG_CONSUMER_OPEN_CHANNEL_PACKETS:
1331 {
1332 const uint64_t key = msg.u.open_channel_packets.key;
1333 struct lttng_consumer_channel *channel =
1334 consumer_find_channel(key);
1335
1336 if (channel) {
1337 pthread_mutex_lock(&channel->lock);
1338 ret_code = lttng_consumer_open_channel_packets(channel);
1339 pthread_mutex_unlock(&channel->lock);
1340 } else {
1341 WARN("Channel %" PRIu64 " not found", key);
1342 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1343 }
1344
1345 health_code_update();
1346 goto end_msg_sessiond;
1347 }
1348 default:
1349 goto end_nosignal;
1350 }
1351
1352 end_nosignal:
1353 /*
1354 * Return 1 to indicate success since the 0 value can be a socket
1355 * shutdown during the recv() or send() call.
1356 */
1357 ret = 1;
1358 goto end;
1359 error_fatal:
1360 /* This will issue a consumer stop. */
1361 ret = -1;
1362 goto end;
1363 end_msg_sessiond:
1364 /*
1365 * The returned value here is not useful since either way we'll return 1 to
1366 * the caller because the session daemon socket management is done
1367 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1368 */
1369 ret = consumer_send_status_msg(sock, ret_code);
1370 if (ret < 0) {
1371 goto error_fatal;
1372 }
1373 ret = 1;
1374 end:
1375 health_code_update();
1376 rcu_read_unlock();
1377 return ret;
1378 }
1379
1380 /*
1381 * Sync metadata meaning request them to the session daemon and snapshot to the
1382 * metadata thread can consumer them.
1383 *
1384 * Metadata stream lock MUST be acquired.
1385 */
1386 enum sync_metadata_status lttng_kconsumer_sync_metadata(
1387 struct lttng_consumer_stream *metadata)
1388 {
1389 int ret;
1390 enum sync_metadata_status status;
1391
1392 assert(metadata);
1393
1394 ret = kernctl_buffer_flush(metadata->wait_fd);
1395 if (ret < 0) {
1396 ERR("Failed to flush kernel stream");
1397 status = SYNC_METADATA_STATUS_ERROR;
1398 goto end;
1399 }
1400
1401 ret = kernctl_snapshot(metadata->wait_fd);
1402 if (ret < 0) {
1403 if (errno == EAGAIN) {
1404 /* No new metadata, exit. */
1405 DBG("Sync metadata, no new kernel metadata");
1406 status = SYNC_METADATA_STATUS_NO_DATA;
1407 } else {
1408 ERR("Sync metadata, taking kernel snapshot failed.");
1409 status = SYNC_METADATA_STATUS_ERROR;
1410 }
1411 } else {
1412 status = SYNC_METADATA_STATUS_NEW_DATA;
1413 }
1414
1415 end:
1416 return status;
1417 }
1418
1419 static
1420 int extract_common_subbuffer_info(struct lttng_consumer_stream *stream,
1421 struct stream_subbuffer *subbuf)
1422 {
1423 int ret;
1424
1425 ret = kernctl_get_subbuf_size(
1426 stream->wait_fd, &subbuf->info.data.subbuf_size);
1427 if (ret) {
1428 goto end;
1429 }
1430
1431 ret = kernctl_get_padded_subbuf_size(
1432 stream->wait_fd, &subbuf->info.data.padded_subbuf_size);
1433 if (ret) {
1434 goto end;
1435 }
1436
1437 end:
1438 return ret;
1439 }
1440
1441 static
1442 int extract_metadata_subbuffer_info(struct lttng_consumer_stream *stream,
1443 struct stream_subbuffer *subbuf)
1444 {
1445 int ret;
1446
1447 ret = extract_common_subbuffer_info(stream, subbuf);
1448 if (ret) {
1449 goto end;
1450 }
1451
1452 ret = kernctl_get_metadata_version(
1453 stream->wait_fd, &subbuf->info.metadata.version);
1454 if (ret) {
1455 goto end;
1456 }
1457
1458 end:
1459 return ret;
1460 }
1461
1462 static
1463 int extract_data_subbuffer_info(struct lttng_consumer_stream *stream,
1464 struct stream_subbuffer *subbuf)
1465 {
1466 int ret;
1467
1468 ret = extract_common_subbuffer_info(stream, subbuf);
1469 if (ret) {
1470 goto end;
1471 }
1472
1473 ret = kernctl_get_packet_size(
1474 stream->wait_fd, &subbuf->info.data.packet_size);
1475 if (ret < 0) {
1476 PERROR("Failed to get sub-buffer packet size");
1477 goto end;
1478 }
1479
1480 ret = kernctl_get_content_size(
1481 stream->wait_fd, &subbuf->info.data.content_size);
1482 if (ret < 0) {
1483 PERROR("Failed to get sub-buffer content size");
1484 goto end;
1485 }
1486
1487 ret = kernctl_get_timestamp_begin(
1488 stream->wait_fd, &subbuf->info.data.timestamp_begin);
1489 if (ret < 0) {
1490 PERROR("Failed to get sub-buffer begin timestamp");
1491 goto end;
1492 }
1493
1494 ret = kernctl_get_timestamp_end(
1495 stream->wait_fd, &subbuf->info.data.timestamp_end);
1496 if (ret < 0) {
1497 PERROR("Failed to get sub-buffer end timestamp");
1498 goto end;
1499 }
1500
1501 ret = kernctl_get_events_discarded(
1502 stream->wait_fd, &subbuf->info.data.events_discarded);
1503 if (ret) {
1504 PERROR("Failed to get sub-buffer events discarded count");
1505 goto end;
1506 }
1507
1508 ret = kernctl_get_sequence_number(stream->wait_fd,
1509 &subbuf->info.data.sequence_number.value);
1510 if (ret) {
1511 /* May not be supported by older LTTng-modules. */
1512 if (ret != -ENOTTY) {
1513 PERROR("Failed to get sub-buffer sequence number");
1514 goto end;
1515 }
1516 } else {
1517 subbuf->info.data.sequence_number.is_set = true;
1518 }
1519
1520 ret = kernctl_get_stream_id(
1521 stream->wait_fd, &subbuf->info.data.stream_id);
1522 if (ret < 0) {
1523 PERROR("Failed to get stream id");
1524 goto end;
1525 }
1526
1527 ret = kernctl_get_instance_id(stream->wait_fd,
1528 &subbuf->info.data.stream_instance_id.value);
1529 if (ret) {
1530 /* May not be supported by older LTTng-modules. */
1531 if (ret != -ENOTTY) {
1532 PERROR("Failed to get stream instance id");
1533 goto end;
1534 }
1535 } else {
1536 subbuf->info.data.stream_instance_id.is_set = true;
1537 }
1538 end:
1539 return ret;
1540 }
1541
1542 static
1543 int get_subbuffer_common(struct lttng_consumer_stream *stream,
1544 struct stream_subbuffer *subbuffer)
1545 {
1546 int ret;
1547
1548 ret = kernctl_get_next_subbuf(stream->wait_fd);
1549 if (ret) {
1550 /*
1551 * The caller only expects -ENODATA when there is no data to
1552 * read, but the kernel tracer returns -EAGAIN when there is
1553 * currently no data for a non-finalized stream, and -ENODATA
1554 * when there is no data for a finalized stream. Those can be
1555 * combined into a -ENODATA return value.
1556 */
1557 if (ret == -EAGAIN) {
1558 ret = -ENODATA;
1559 }
1560
1561 goto end;
1562 }
1563
1564 ret = stream->read_subbuffer_ops.extract_subbuffer_info(
1565 stream, subbuffer);
1566 end:
1567 return ret;
1568 }
1569
1570 static
1571 int get_next_subbuffer_splice(struct lttng_consumer_stream *stream,
1572 struct stream_subbuffer *subbuffer)
1573 {
1574 int ret;
1575
1576 ret = get_subbuffer_common(stream, subbuffer);
1577 if (ret) {
1578 goto end;
1579 }
1580
1581 subbuffer->buffer.fd = stream->wait_fd;
1582 end:
1583 return ret;
1584 }
1585
1586 static
1587 int get_next_subbuffer_mmap(struct lttng_consumer_stream *stream,
1588 struct stream_subbuffer *subbuffer)
1589 {
1590 int ret;
1591 const char *addr;
1592
1593 ret = get_subbuffer_common(stream, subbuffer);
1594 if (ret) {
1595 goto end;
1596 }
1597
1598 ret = get_current_subbuf_addr(stream, &addr);
1599 if (ret) {
1600 goto end;
1601 }
1602
1603 subbuffer->buffer.buffer = lttng_buffer_view_init(
1604 addr, 0, subbuffer->info.data.padded_subbuf_size);
1605 end:
1606 return ret;
1607 }
1608
1609 static
1610 int get_next_subbuffer_metadata_check(struct lttng_consumer_stream *stream,
1611 struct stream_subbuffer *subbuffer)
1612 {
1613 int ret;
1614 const char *addr;
1615 bool coherent;
1616
1617 ret = kernctl_get_next_subbuf_metadata_check(stream->wait_fd,
1618 &coherent);
1619 if (ret) {
1620 goto end;
1621 }
1622
1623 ret = stream->read_subbuffer_ops.extract_subbuffer_info(
1624 stream, subbuffer);
1625 if (ret) {
1626 goto end;
1627 }
1628
1629 LTTNG_OPTIONAL_SET(&subbuffer->info.metadata.coherent, coherent);
1630
1631 ret = get_current_subbuf_addr(stream, &addr);
1632 if (ret) {
1633 goto end;
1634 }
1635
1636 subbuffer->buffer.buffer = lttng_buffer_view_init(
1637 addr, 0, subbuffer->info.data.padded_subbuf_size);
1638 DBG("Got metadata packet with padded_subbuf_size = %lu, coherent = %s",
1639 subbuffer->info.metadata.padded_subbuf_size,
1640 coherent ? "true" : "false");
1641 end:
1642 /*
1643 * The caller only expects -ENODATA when there is no data to read, but
1644 * the kernel tracer returns -EAGAIN when there is currently no data
1645 * for a non-finalized stream, and -ENODATA when there is no data for a
1646 * finalized stream. Those can be combined into a -ENODATA return value.
1647 */
1648 if (ret == -EAGAIN) {
1649 ret = -ENODATA;
1650 }
1651
1652 return ret;
1653 }
1654
1655 static
1656 int put_next_subbuffer(struct lttng_consumer_stream *stream,
1657 struct stream_subbuffer *subbuffer)
1658 {
1659 const int ret = kernctl_put_next_subbuf(stream->wait_fd);
1660
1661 if (ret) {
1662 if (ret == -EFAULT) {
1663 PERROR("Error in unreserving sub buffer");
1664 } else if (ret == -EIO) {
1665 /* Should never happen with newer LTTng versions */
1666 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted");
1667 }
1668 }
1669
1670 return ret;
1671 }
1672
1673 static
1674 bool is_get_next_check_metadata_available(int tracer_fd)
1675 {
1676 const int ret = kernctl_get_next_subbuf_metadata_check(tracer_fd, NULL);
1677 const bool available = ret != -ENOTTY;
1678
1679 if (ret == 0) {
1680 /* get succeeded, make sure to put the subbuffer. */
1681 kernctl_put_subbuf(tracer_fd);
1682 }
1683
1684 return available;
1685 }
1686
1687 static
1688 int signal_metadata(struct lttng_consumer_stream *stream,
1689 struct lttng_consumer_local_data *ctx)
1690 {
1691 ASSERT_LOCKED(stream->metadata_rdv_lock);
1692 return pthread_cond_broadcast(&stream->metadata_rdv) ? -errno : 0;
1693 }
1694
1695 static
1696 int lttng_kconsumer_set_stream_ops(
1697 struct lttng_consumer_stream *stream)
1698 {
1699 int ret = 0;
1700
1701 if (stream->metadata_flag && stream->chan->is_live) {
1702 DBG("Attempting to enable metadata bucketization for live consumers");
1703 if (is_get_next_check_metadata_available(stream->wait_fd)) {
1704 DBG("Kernel tracer supports get_next_subbuffer_metadata_check, metadata will be accumulated until a coherent state is reached");
1705 stream->read_subbuffer_ops.get_next_subbuffer =
1706 get_next_subbuffer_metadata_check;
1707 ret = consumer_stream_enable_metadata_bucketization(
1708 stream);
1709 if (ret) {
1710 goto end;
1711 }
1712 } else {
1713 /*
1714 * The kernel tracer version is too old to indicate
1715 * when the metadata stream has reached a "coherent"
1716 * (parseable) point.
1717 *
1718 * This means that a live viewer may see an incoherent
1719 * sequence of metadata and fail to parse it.
1720 */
1721 WARN("Kernel tracer does not support get_next_subbuffer_metadata_check which may cause live clients to fail to parse the metadata stream");
1722 metadata_bucket_destroy(stream->metadata_bucket);
1723 stream->metadata_bucket = NULL;
1724 }
1725
1726 stream->read_subbuffer_ops.on_sleep = signal_metadata;
1727 }
1728
1729 if (!stream->read_subbuffer_ops.get_next_subbuffer) {
1730 if (stream->chan->output == CONSUMER_CHANNEL_MMAP) {
1731 stream->read_subbuffer_ops.get_next_subbuffer =
1732 get_next_subbuffer_mmap;
1733 } else {
1734 stream->read_subbuffer_ops.get_next_subbuffer =
1735 get_next_subbuffer_splice;
1736 }
1737 }
1738
1739 if (stream->metadata_flag) {
1740 stream->read_subbuffer_ops.extract_subbuffer_info =
1741 extract_metadata_subbuffer_info;
1742 } else {
1743 stream->read_subbuffer_ops.extract_subbuffer_info =
1744 extract_data_subbuffer_info;
1745 if (stream->chan->is_live) {
1746 stream->read_subbuffer_ops.send_live_beacon =
1747 consumer_flush_kernel_index;
1748 }
1749 }
1750
1751 stream->read_subbuffer_ops.put_next_subbuffer = put_next_subbuffer;
1752 end:
1753 return ret;
1754 }
1755
1756 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1757 {
1758 int ret;
1759
1760 assert(stream);
1761
1762 /*
1763 * Don't create anything if this is set for streaming or if there is
1764 * no current trace chunk on the parent channel.
1765 */
1766 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1767 stream->chan->trace_chunk) {
1768 ret = consumer_stream_create_output_files(stream, true);
1769 if (ret) {
1770 goto error;
1771 }
1772 }
1773
1774 if (stream->output == LTTNG_EVENT_MMAP) {
1775 /* get the len of the mmap region */
1776 unsigned long mmap_len;
1777
1778 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1779 if (ret != 0) {
1780 PERROR("kernctl_get_mmap_len");
1781 goto error_close_fd;
1782 }
1783 stream->mmap_len = (size_t) mmap_len;
1784
1785 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1786 MAP_PRIVATE, stream->wait_fd, 0);
1787 if (stream->mmap_base == MAP_FAILED) {
1788 PERROR("Error mmaping");
1789 ret = -1;
1790 goto error_close_fd;
1791 }
1792 }
1793
1794 ret = lttng_kconsumer_set_stream_ops(stream);
1795 if (ret) {
1796 goto error_close_fd;
1797 }
1798
1799 /* we return 0 to let the library handle the FD internally */
1800 return 0;
1801
1802 error_close_fd:
1803 if (stream->out_fd >= 0) {
1804 int err;
1805
1806 err = close(stream->out_fd);
1807 assert(!err);
1808 stream->out_fd = -1;
1809 }
1810 error:
1811 return ret;
1812 }
1813
1814 /*
1815 * Check if data is still being extracted from the buffers for a specific
1816 * stream. Consumer data lock MUST be acquired before calling this function
1817 * and the stream lock.
1818 *
1819 * Return 1 if the traced data are still getting read else 0 meaning that the
1820 * data is available for trace viewer reading.
1821 */
1822 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1823 {
1824 int ret;
1825
1826 assert(stream);
1827
1828 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1829 ret = 0;
1830 goto end;
1831 }
1832
1833 ret = kernctl_get_next_subbuf(stream->wait_fd);
1834 if (ret == 0) {
1835 /* There is still data so let's put back this subbuffer. */
1836 ret = kernctl_put_subbuf(stream->wait_fd);
1837 assert(ret == 0);
1838 ret = 1; /* Data is pending */
1839 goto end;
1840 }
1841
1842 /* Data is NOT pending and ready to be read. */
1843 ret = 0;
1844
1845 end:
1846 return ret;
1847 }
This page took 0.07263 seconds and 4 git commands to generate.