7032a7f7ffcc9bea224db0c6059f1928de72aa47
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #include "common/buffer-view.h"
11 #include <stdint.h>
12 #define _LGPL_SOURCE
13 #include <assert.h>
14 #include <poll.h>
15 #include <pthread.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/mman.h>
19 #include <sys/socket.h>
20 #include <sys/types.h>
21 #include <inttypes.h>
22 #include <unistd.h>
23 #include <sys/stat.h>
24
25 #include <bin/lttng-consumerd/health-consumerd.h>
26 #include <common/common.h>
27 #include <common/kernel-ctl/kernel-ctl.h>
28 #include <common/sessiond-comm/sessiond-comm.h>
29 #include <common/sessiond-comm/relayd.h>
30 #include <common/compat/fcntl.h>
31 #include <common/compat/endian.h>
32 #include <common/pipe.h>
33 #include <common/relayd/relayd.h>
34 #include <common/utils.h>
35 #include <common/consumer/consumer-stream.h>
36 #include <common/index/index.h>
37 #include <common/consumer/consumer-timer.h>
38 #include <common/optional.h>
39
40 #include "kernel-consumer.h"
41
42 extern struct lttng_consumer_global_data consumer_data;
43 extern int consumer_poll_timeout;
44
45 /*
46 * Take a snapshot for a specific fd
47 *
48 * Returns 0 on success, < 0 on error
49 */
50 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
51 {
52 int ret = 0;
53 int infd = stream->wait_fd;
54
55 ret = kernctl_snapshot(infd);
56 /*
57 * -EAGAIN is not an error, it just means that there is no data to
58 * be read.
59 */
60 if (ret != 0 && ret != -EAGAIN) {
61 PERROR("Getting sub-buffer snapshot.");
62 }
63
64 return ret;
65 }
66
67 /*
68 * Sample consumed and produced positions for a specific fd.
69 *
70 * Returns 0 on success, < 0 on error.
71 */
72 int lttng_kconsumer_sample_snapshot_positions(
73 struct lttng_consumer_stream *stream)
74 {
75 assert(stream);
76
77 return kernctl_snapshot_sample_positions(stream->wait_fd);
78 }
79
80 /*
81 * Get the produced position
82 *
83 * Returns 0 on success, < 0 on error
84 */
85 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
86 unsigned long *pos)
87 {
88 int ret;
89 int infd = stream->wait_fd;
90
91 ret = kernctl_snapshot_get_produced(infd, pos);
92 if (ret != 0) {
93 PERROR("kernctl_snapshot_get_produced");
94 }
95
96 return ret;
97 }
98
99 /*
100 * Get the consumerd position
101 *
102 * Returns 0 on success, < 0 on error
103 */
104 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
105 unsigned long *pos)
106 {
107 int ret;
108 int infd = stream->wait_fd;
109
110 ret = kernctl_snapshot_get_consumed(infd, pos);
111 if (ret != 0) {
112 PERROR("kernctl_snapshot_get_consumed");
113 }
114
115 return ret;
116 }
117
118 static
119 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
120 const char **addr)
121 {
122 int ret;
123 unsigned long mmap_offset;
124 const char *mmap_base = stream->mmap_base;
125
126 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
127 if (ret < 0) {
128 PERROR("Failed to get mmap read offset");
129 goto error;
130 }
131
132 *addr = mmap_base + mmap_offset;
133 error:
134 return ret;
135 }
136
137 /*
138 * Take a snapshot of all the stream of a channel
139 * RCU read-side lock must be held across this function to ensure existence of
140 * channel. The channel lock must be held by the caller.
141 *
142 * Returns 0 on success, < 0 on error
143 */
144 static int lttng_kconsumer_snapshot_channel(
145 struct lttng_consumer_channel *channel,
146 uint64_t key, char *path, uint64_t relayd_id,
147 uint64_t nb_packets_per_stream,
148 struct lttng_consumer_local_data *ctx)
149 {
150 int ret;
151 struct lttng_consumer_stream *stream;
152
153 DBG("Kernel consumer snapshot channel %" PRIu64, key);
154
155 rcu_read_lock();
156
157 /* Splice is not supported yet for channel snapshot. */
158 if (channel->output != CONSUMER_CHANNEL_MMAP) {
159 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
160 channel->name);
161 ret = -1;
162 goto end;
163 }
164
165 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
166 unsigned long consumed_pos, produced_pos;
167
168 health_code_update();
169
170 /*
171 * Lock stream because we are about to change its state.
172 */
173 pthread_mutex_lock(&stream->lock);
174
175 assert(channel->trace_chunk);
176 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
177 /*
178 * Can't happen barring an internal error as the channel
179 * holds a reference to the trace chunk.
180 */
181 ERR("Failed to acquire reference to channel's trace chunk");
182 ret = -1;
183 goto end_unlock;
184 }
185 assert(!stream->trace_chunk);
186 stream->trace_chunk = channel->trace_chunk;
187
188 /*
189 * Assign the received relayd ID so we can use it for streaming. The streams
190 * are not visible to anyone so this is OK to change it.
191 */
192 stream->net_seq_idx = relayd_id;
193 channel->relayd_id = relayd_id;
194 if (relayd_id != (uint64_t) -1ULL) {
195 ret = consumer_send_relayd_stream(stream, path);
196 if (ret < 0) {
197 ERR("sending stream to relayd");
198 goto end_unlock;
199 }
200 } else {
201 ret = consumer_stream_create_output_files(stream,
202 false);
203 if (ret < 0) {
204 goto end_unlock;
205 }
206 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
207 stream->key);
208 }
209
210 ret = kernctl_buffer_flush_empty(stream->wait_fd);
211 if (ret < 0) {
212 /*
213 * Doing a buffer flush which does not take into
214 * account empty packets. This is not perfect
215 * for stream intersection, but required as a
216 * fall-back when "flush_empty" is not
217 * implemented by lttng-modules.
218 */
219 ret = kernctl_buffer_flush(stream->wait_fd);
220 if (ret < 0) {
221 ERR("Failed to flush kernel stream");
222 goto end_unlock;
223 }
224 goto end_unlock;
225 }
226
227 ret = lttng_kconsumer_take_snapshot(stream);
228 if (ret < 0) {
229 ERR("Taking kernel snapshot");
230 goto end_unlock;
231 }
232
233 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
234 if (ret < 0) {
235 ERR("Produced kernel snapshot position");
236 goto end_unlock;
237 }
238
239 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
240 if (ret < 0) {
241 ERR("Consumerd kernel snapshot position");
242 goto end_unlock;
243 }
244
245 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
246 produced_pos, nb_packets_per_stream,
247 stream->max_sb_size);
248
249 while ((long) (consumed_pos - produced_pos) < 0) {
250 ssize_t read_len;
251 unsigned long len, padded_len;
252 const char *subbuf_addr;
253 struct lttng_buffer_view subbuf_view;
254
255 health_code_update();
256 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
257
258 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
259 if (ret < 0) {
260 if (ret != -EAGAIN) {
261 PERROR("kernctl_get_subbuf snapshot");
262 goto end_unlock;
263 }
264 DBG("Kernel consumer get subbuf failed. Skipping it.");
265 consumed_pos += stream->max_sb_size;
266 stream->chan->lost_packets++;
267 continue;
268 }
269
270 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
271 if (ret < 0) {
272 ERR("Snapshot kernctl_get_subbuf_size");
273 goto error_put_subbuf;
274 }
275
276 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
277 if (ret < 0) {
278 ERR("Snapshot kernctl_get_padded_subbuf_size");
279 goto error_put_subbuf;
280 }
281
282 ret = get_current_subbuf_addr(stream, &subbuf_addr);
283 if (ret) {
284 goto error_put_subbuf;
285 }
286
287 subbuf_view = lttng_buffer_view_init(
288 subbuf_addr, 0, padded_len);
289 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
290 stream, &subbuf_view,
291 padded_len - len, NULL);
292 /*
293 * We write the padded len in local tracefiles but the data len
294 * when using a relay. Display the error but continue processing
295 * to try to release the subbuffer.
296 */
297 if (relayd_id != (uint64_t) -1ULL) {
298 if (read_len != len) {
299 ERR("Error sending to the relay (ret: %zd != len: %lu)",
300 read_len, len);
301 }
302 } else {
303 if (read_len != padded_len) {
304 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
305 read_len, padded_len);
306 }
307 }
308
309 ret = kernctl_put_subbuf(stream->wait_fd);
310 if (ret < 0) {
311 ERR("Snapshot kernctl_put_subbuf");
312 goto end_unlock;
313 }
314 consumed_pos += stream->max_sb_size;
315 }
316
317 if (relayd_id == (uint64_t) -1ULL) {
318 if (stream->out_fd >= 0) {
319 ret = close(stream->out_fd);
320 if (ret < 0) {
321 PERROR("Kernel consumer snapshot close out_fd");
322 goto end_unlock;
323 }
324 stream->out_fd = -1;
325 }
326 } else {
327 close_relayd_stream(stream);
328 stream->net_seq_idx = (uint64_t) -1ULL;
329 }
330 lttng_trace_chunk_put(stream->trace_chunk);
331 stream->trace_chunk = NULL;
332 pthread_mutex_unlock(&stream->lock);
333 }
334
335 /* All good! */
336 ret = 0;
337 goto end;
338
339 error_put_subbuf:
340 ret = kernctl_put_subbuf(stream->wait_fd);
341 if (ret < 0) {
342 ERR("Snapshot kernctl_put_subbuf error path");
343 }
344 end_unlock:
345 pthread_mutex_unlock(&stream->lock);
346 end:
347 rcu_read_unlock();
348 return ret;
349 }
350
351 /*
352 * Read the whole metadata available for a snapshot.
353 * RCU read-side lock must be held across this function to ensure existence of
354 * metadata_channel. The channel lock must be held by the caller.
355 *
356 * Returns 0 on success, < 0 on error
357 */
358 static int lttng_kconsumer_snapshot_metadata(
359 struct lttng_consumer_channel *metadata_channel,
360 uint64_t key, char *path, uint64_t relayd_id,
361 struct lttng_consumer_local_data *ctx)
362 {
363 int ret, use_relayd = 0;
364 ssize_t ret_read;
365 struct lttng_consumer_stream *metadata_stream;
366
367 assert(ctx);
368
369 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
370 key, path);
371
372 rcu_read_lock();
373
374 metadata_stream = metadata_channel->metadata_stream;
375 assert(metadata_stream);
376
377 pthread_mutex_lock(&metadata_stream->lock);
378 assert(metadata_channel->trace_chunk);
379 assert(metadata_stream->trace_chunk);
380
381 /* Flag once that we have a valid relayd for the stream. */
382 if (relayd_id != (uint64_t) -1ULL) {
383 use_relayd = 1;
384 }
385
386 if (use_relayd) {
387 ret = consumer_send_relayd_stream(metadata_stream, path);
388 if (ret < 0) {
389 goto error_snapshot;
390 }
391 } else {
392 ret = consumer_stream_create_output_files(metadata_stream,
393 false);
394 if (ret < 0) {
395 goto error_snapshot;
396 }
397 }
398
399 do {
400 health_code_update();
401
402 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
403 if (ret_read < 0) {
404 if (ret_read != -EAGAIN) {
405 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
406 ret_read);
407 ret = ret_read;
408 goto error_snapshot;
409 }
410 /* ret_read is negative at this point so we will exit the loop. */
411 continue;
412 }
413 } while (ret_read >= 0);
414
415 if (use_relayd) {
416 close_relayd_stream(metadata_stream);
417 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
418 } else {
419 if (metadata_stream->out_fd >= 0) {
420 ret = close(metadata_stream->out_fd);
421 if (ret < 0) {
422 PERROR("Kernel consumer snapshot metadata close out_fd");
423 /*
424 * Don't go on error here since the snapshot was successful at this
425 * point but somehow the close failed.
426 */
427 }
428 metadata_stream->out_fd = -1;
429 lttng_trace_chunk_put(metadata_stream->trace_chunk);
430 metadata_stream->trace_chunk = NULL;
431 }
432 }
433
434 ret = 0;
435 error_snapshot:
436 pthread_mutex_unlock(&metadata_stream->lock);
437 cds_list_del(&metadata_stream->send_node);
438 consumer_stream_destroy(metadata_stream, NULL);
439 metadata_channel->metadata_stream = NULL;
440 rcu_read_unlock();
441 return ret;
442 }
443
444 /*
445 * Receive command from session daemon and process it.
446 *
447 * Return 1 on success else a negative value or 0.
448 */
449 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
450 int sock, struct pollfd *consumer_sockpoll)
451 {
452 ssize_t ret;
453 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
454 struct lttcomm_consumer_msg msg;
455
456 health_code_update();
457
458 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
459 if (ret != sizeof(msg)) {
460 if (ret > 0) {
461 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
462 ret = -1;
463 }
464 return ret;
465 }
466
467 health_code_update();
468
469 /* Deprecated command */
470 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
471
472 health_code_update();
473
474 /* relayd needs RCU read-side protection */
475 rcu_read_lock();
476
477 switch (msg.cmd_type) {
478 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
479 {
480 /* Session daemon status message are handled in the following call. */
481 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
482 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
483 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
484 msg.u.relayd_sock.relayd_session_id);
485 goto end_nosignal;
486 }
487 case LTTNG_CONSUMER_ADD_CHANNEL:
488 {
489 struct lttng_consumer_channel *new_channel;
490 int ret_recv;
491 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
492
493 health_code_update();
494
495 /* First send a status message before receiving the fds. */
496 ret = consumer_send_status_msg(sock, ret_code);
497 if (ret < 0) {
498 /* Somehow, the session daemon is not responding anymore. */
499 goto error_fatal;
500 }
501
502 health_code_update();
503
504 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
505 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
506 msg.u.channel.session_id,
507 msg.u.channel.chunk_id.is_set ?
508 &chunk_id : NULL,
509 msg.u.channel.pathname,
510 msg.u.channel.name,
511 msg.u.channel.relayd_id, msg.u.channel.output,
512 msg.u.channel.tracefile_size,
513 msg.u.channel.tracefile_count, 0,
514 msg.u.channel.monitor,
515 msg.u.channel.live_timer_interval,
516 msg.u.channel.is_live,
517 NULL, NULL);
518 if (new_channel == NULL) {
519 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
520 goto end_nosignal;
521 }
522 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
523 switch (msg.u.channel.output) {
524 case LTTNG_EVENT_SPLICE:
525 new_channel->output = CONSUMER_CHANNEL_SPLICE;
526 break;
527 case LTTNG_EVENT_MMAP:
528 new_channel->output = CONSUMER_CHANNEL_MMAP;
529 break;
530 default:
531 ERR("Channel output unknown %d", msg.u.channel.output);
532 goto end_nosignal;
533 }
534
535 /* Translate and save channel type. */
536 switch (msg.u.channel.type) {
537 case CONSUMER_CHANNEL_TYPE_DATA:
538 case CONSUMER_CHANNEL_TYPE_METADATA:
539 new_channel->type = msg.u.channel.type;
540 break;
541 default:
542 assert(0);
543 goto end_nosignal;
544 };
545
546 health_code_update();
547
548 if (ctx->on_recv_channel != NULL) {
549 ret_recv = ctx->on_recv_channel(new_channel);
550 if (ret_recv == 0) {
551 ret = consumer_add_channel(new_channel, ctx);
552 } else if (ret_recv < 0) {
553 goto end_nosignal;
554 }
555 } else {
556 ret = consumer_add_channel(new_channel, ctx);
557 }
558 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
559 int monitor_start_ret;
560
561 DBG("Consumer starting monitor timer");
562 consumer_timer_live_start(new_channel,
563 msg.u.channel.live_timer_interval);
564 monitor_start_ret = consumer_timer_monitor_start(
565 new_channel,
566 msg.u.channel.monitor_timer_interval);
567 if (monitor_start_ret < 0) {
568 ERR("Starting channel monitoring timer failed");
569 goto end_nosignal;
570 }
571
572 }
573
574 health_code_update();
575
576 /* If we received an error in add_channel, we need to report it. */
577 if (ret < 0) {
578 ret = consumer_send_status_msg(sock, ret);
579 if (ret < 0) {
580 goto error_fatal;
581 }
582 goto end_nosignal;
583 }
584
585 goto end_nosignal;
586 }
587 case LTTNG_CONSUMER_ADD_STREAM:
588 {
589 int fd;
590 struct lttng_pipe *stream_pipe;
591 struct lttng_consumer_stream *new_stream;
592 struct lttng_consumer_channel *channel;
593 int alloc_ret = 0;
594
595 /*
596 * Get stream's channel reference. Needed when adding the stream to the
597 * global hash table.
598 */
599 channel = consumer_find_channel(msg.u.stream.channel_key);
600 if (!channel) {
601 /*
602 * We could not find the channel. Can happen if cpu hotplug
603 * happens while tearing down.
604 */
605 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
606 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
607 }
608
609 health_code_update();
610
611 /* First send a status message before receiving the fds. */
612 ret = consumer_send_status_msg(sock, ret_code);
613 if (ret < 0) {
614 /* Somehow, the session daemon is not responding anymore. */
615 goto error_add_stream_fatal;
616 }
617
618 health_code_update();
619
620 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
621 /* Channel was not found. */
622 goto error_add_stream_nosignal;
623 }
624
625 /* Blocking call */
626 health_poll_entry();
627 ret = lttng_consumer_poll_socket(consumer_sockpoll);
628 health_poll_exit();
629 if (ret) {
630 goto error_add_stream_fatal;
631 }
632
633 health_code_update();
634
635 /* Get stream file descriptor from socket */
636 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
637 if (ret != sizeof(fd)) {
638 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
639 goto end;
640 }
641
642 health_code_update();
643
644 /*
645 * Send status code to session daemon only if the recv works. If the
646 * above recv() failed, the session daemon is notified through the
647 * error socket and the teardown is eventually done.
648 */
649 ret = consumer_send_status_msg(sock, ret_code);
650 if (ret < 0) {
651 /* Somehow, the session daemon is not responding anymore. */
652 goto error_add_stream_nosignal;
653 }
654
655 health_code_update();
656
657 pthread_mutex_lock(&channel->lock);
658 new_stream = consumer_allocate_stream(
659 channel,
660 channel->key,
661 fd,
662 channel->name,
663 channel->relayd_id,
664 channel->session_id,
665 channel->trace_chunk,
666 msg.u.stream.cpu,
667 &alloc_ret,
668 channel->type,
669 channel->monitor);
670 if (new_stream == NULL) {
671 switch (alloc_ret) {
672 case -ENOMEM:
673 case -EINVAL:
674 default:
675 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
676 break;
677 }
678 pthread_mutex_unlock(&channel->lock);
679 goto error_add_stream_nosignal;
680 }
681
682 new_stream->wait_fd = fd;
683 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
684 &new_stream->max_sb_size);
685 if (ret < 0) {
686 pthread_mutex_unlock(&channel->lock);
687 ERR("Failed to get kernel maximal subbuffer size");
688 goto error_add_stream_nosignal;
689 }
690
691 consumer_stream_update_channel_attributes(new_stream,
692 channel);
693 switch (channel->output) {
694 case CONSUMER_CHANNEL_SPLICE:
695 new_stream->output = LTTNG_EVENT_SPLICE;
696 ret = utils_create_pipe(new_stream->splice_pipe);
697 if (ret < 0) {
698 pthread_mutex_unlock(&channel->lock);
699 goto error_add_stream_nosignal;
700 }
701 break;
702 case CONSUMER_CHANNEL_MMAP:
703 new_stream->output = LTTNG_EVENT_MMAP;
704 break;
705 default:
706 ERR("Stream output unknown %d", channel->output);
707 pthread_mutex_unlock(&channel->lock);
708 goto error_add_stream_nosignal;
709 }
710
711 /*
712 * We've just assigned the channel to the stream so increment the
713 * refcount right now. We don't need to increment the refcount for
714 * streams in no monitor because we handle manually the cleanup of
715 * those. It is very important to make sure there is NO prior
716 * consumer_del_stream() calls or else the refcount will be unbalanced.
717 */
718 if (channel->monitor) {
719 uatomic_inc(&new_stream->chan->refcount);
720 }
721
722 /*
723 * The buffer flush is done on the session daemon side for the kernel
724 * so no need for the stream "hangup_flush_done" variable to be
725 * tracked. This is important for a kernel stream since we don't rely
726 * on the flush state of the stream to read data. It's not the case for
727 * user space tracing.
728 */
729 new_stream->hangup_flush_done = 0;
730
731 health_code_update();
732
733 pthread_mutex_lock(&new_stream->lock);
734 if (ctx->on_recv_stream) {
735 ret = ctx->on_recv_stream(new_stream);
736 if (ret < 0) {
737 pthread_mutex_unlock(&new_stream->lock);
738 pthread_mutex_unlock(&channel->lock);
739 consumer_stream_free(new_stream);
740 goto error_add_stream_nosignal;
741 }
742 }
743 health_code_update();
744
745 if (new_stream->metadata_flag) {
746 channel->metadata_stream = new_stream;
747 }
748
749 /* Do not monitor this stream. */
750 if (!channel->monitor) {
751 DBG("Kernel consumer add stream %s in no monitor mode with "
752 "relayd id %" PRIu64, new_stream->name,
753 new_stream->net_seq_idx);
754 cds_list_add(&new_stream->send_node, &channel->streams.head);
755 pthread_mutex_unlock(&new_stream->lock);
756 pthread_mutex_unlock(&channel->lock);
757 goto end_add_stream;
758 }
759
760 /* Send stream to relayd if the stream has an ID. */
761 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
762 ret = consumer_send_relayd_stream(new_stream,
763 new_stream->chan->pathname);
764 if (ret < 0) {
765 pthread_mutex_unlock(&new_stream->lock);
766 pthread_mutex_unlock(&channel->lock);
767 consumer_stream_free(new_stream);
768 goto error_add_stream_nosignal;
769 }
770
771 /*
772 * If adding an extra stream to an already
773 * existing channel (e.g. cpu hotplug), we need
774 * to send the "streams_sent" command to relayd.
775 */
776 if (channel->streams_sent_to_relayd) {
777 ret = consumer_send_relayd_streams_sent(
778 new_stream->net_seq_idx);
779 if (ret < 0) {
780 pthread_mutex_unlock(&new_stream->lock);
781 pthread_mutex_unlock(&channel->lock);
782 goto error_add_stream_nosignal;
783 }
784 }
785 }
786 pthread_mutex_unlock(&new_stream->lock);
787 pthread_mutex_unlock(&channel->lock);
788
789 /* Get the right pipe where the stream will be sent. */
790 if (new_stream->metadata_flag) {
791 consumer_add_metadata_stream(new_stream);
792 stream_pipe = ctx->consumer_metadata_pipe;
793 } else {
794 consumer_add_data_stream(new_stream);
795 stream_pipe = ctx->consumer_data_pipe;
796 }
797
798 /* Visible to other threads */
799 new_stream->globally_visible = 1;
800
801 health_code_update();
802
803 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
804 if (ret < 0) {
805 ERR("Consumer write %s stream to pipe %d",
806 new_stream->metadata_flag ? "metadata" : "data",
807 lttng_pipe_get_writefd(stream_pipe));
808 if (new_stream->metadata_flag) {
809 consumer_del_stream_for_metadata(new_stream);
810 } else {
811 consumer_del_stream_for_data(new_stream);
812 }
813 goto error_add_stream_nosignal;
814 }
815
816 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
817 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
818 end_add_stream:
819 break;
820 error_add_stream_nosignal:
821 goto end_nosignal;
822 error_add_stream_fatal:
823 goto error_fatal;
824 }
825 case LTTNG_CONSUMER_STREAMS_SENT:
826 {
827 struct lttng_consumer_channel *channel;
828
829 /*
830 * Get stream's channel reference. Needed when adding the stream to the
831 * global hash table.
832 */
833 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
834 if (!channel) {
835 /*
836 * We could not find the channel. Can happen if cpu hotplug
837 * happens while tearing down.
838 */
839 ERR("Unable to find channel key %" PRIu64,
840 msg.u.sent_streams.channel_key);
841 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
842 }
843
844 health_code_update();
845
846 /*
847 * Send status code to session daemon.
848 */
849 ret = consumer_send_status_msg(sock, ret_code);
850 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
851 /* Somehow, the session daemon is not responding anymore. */
852 goto error_streams_sent_nosignal;
853 }
854
855 health_code_update();
856
857 /*
858 * We should not send this message if we don't monitor the
859 * streams in this channel.
860 */
861 if (!channel->monitor) {
862 goto end_error_streams_sent;
863 }
864
865 health_code_update();
866 /* Send stream to relayd if the stream has an ID. */
867 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
868 ret = consumer_send_relayd_streams_sent(
869 msg.u.sent_streams.net_seq_idx);
870 if (ret < 0) {
871 goto error_streams_sent_nosignal;
872 }
873 channel->streams_sent_to_relayd = true;
874 }
875 end_error_streams_sent:
876 break;
877 error_streams_sent_nosignal:
878 goto end_nosignal;
879 }
880 case LTTNG_CONSUMER_UPDATE_STREAM:
881 {
882 rcu_read_unlock();
883 return -ENOSYS;
884 }
885 case LTTNG_CONSUMER_DESTROY_RELAYD:
886 {
887 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
888 struct consumer_relayd_sock_pair *relayd;
889
890 DBG("Kernel consumer destroying relayd %" PRIu64, index);
891
892 /* Get relayd reference if exists. */
893 relayd = consumer_find_relayd(index);
894 if (relayd == NULL) {
895 DBG("Unable to find relayd %" PRIu64, index);
896 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
897 }
898
899 /*
900 * Each relayd socket pair has a refcount of stream attached to it
901 * which tells if the relayd is still active or not depending on the
902 * refcount value.
903 *
904 * This will set the destroy flag of the relayd object and destroy it
905 * if the refcount reaches zero when called.
906 *
907 * The destroy can happen either here or when a stream fd hangs up.
908 */
909 if (relayd) {
910 consumer_flag_relayd_for_destroy(relayd);
911 }
912
913 health_code_update();
914
915 ret = consumer_send_status_msg(sock, ret_code);
916 if (ret < 0) {
917 /* Somehow, the session daemon is not responding anymore. */
918 goto error_fatal;
919 }
920
921 goto end_nosignal;
922 }
923 case LTTNG_CONSUMER_DATA_PENDING:
924 {
925 int32_t ret;
926 uint64_t id = msg.u.data_pending.session_id;
927
928 DBG("Kernel consumer data pending command for id %" PRIu64, id);
929
930 ret = consumer_data_pending(id);
931
932 health_code_update();
933
934 /* Send back returned value to session daemon */
935 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
936 if (ret < 0) {
937 PERROR("send data pending ret code");
938 goto error_fatal;
939 }
940
941 /*
942 * No need to send back a status message since the data pending
943 * returned value is the response.
944 */
945 break;
946 }
947 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
948 {
949 struct lttng_consumer_channel *channel;
950 uint64_t key = msg.u.snapshot_channel.key;
951
952 channel = consumer_find_channel(key);
953 if (!channel) {
954 ERR("Channel %" PRIu64 " not found", key);
955 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
956 } else {
957 pthread_mutex_lock(&channel->lock);
958 if (msg.u.snapshot_channel.metadata == 1) {
959 ret = lttng_kconsumer_snapshot_metadata(channel, key,
960 msg.u.snapshot_channel.pathname,
961 msg.u.snapshot_channel.relayd_id, ctx);
962 if (ret < 0) {
963 ERR("Snapshot metadata failed");
964 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
965 }
966 } else {
967 ret = lttng_kconsumer_snapshot_channel(channel, key,
968 msg.u.snapshot_channel.pathname,
969 msg.u.snapshot_channel.relayd_id,
970 msg.u.snapshot_channel.nb_packets_per_stream,
971 ctx);
972 if (ret < 0) {
973 ERR("Snapshot channel failed");
974 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
975 }
976 }
977 pthread_mutex_unlock(&channel->lock);
978 }
979 health_code_update();
980
981 ret = consumer_send_status_msg(sock, ret_code);
982 if (ret < 0) {
983 /* Somehow, the session daemon is not responding anymore. */
984 goto end_nosignal;
985 }
986 break;
987 }
988 case LTTNG_CONSUMER_DESTROY_CHANNEL:
989 {
990 uint64_t key = msg.u.destroy_channel.key;
991 struct lttng_consumer_channel *channel;
992
993 channel = consumer_find_channel(key);
994 if (!channel) {
995 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
996 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
997 }
998
999 health_code_update();
1000
1001 ret = consumer_send_status_msg(sock, ret_code);
1002 if (ret < 0) {
1003 /* Somehow, the session daemon is not responding anymore. */
1004 goto end_destroy_channel;
1005 }
1006
1007 health_code_update();
1008
1009 /* Stop right now if no channel was found. */
1010 if (!channel) {
1011 goto end_destroy_channel;
1012 }
1013
1014 /*
1015 * This command should ONLY be issued for channel with streams set in
1016 * no monitor mode.
1017 */
1018 assert(!channel->monitor);
1019
1020 /*
1021 * The refcount should ALWAYS be 0 in the case of a channel in no
1022 * monitor mode.
1023 */
1024 assert(!uatomic_sub_return(&channel->refcount, 1));
1025
1026 consumer_del_channel(channel);
1027 end_destroy_channel:
1028 goto end_nosignal;
1029 }
1030 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1031 {
1032 ssize_t ret;
1033 uint64_t count;
1034 struct lttng_consumer_channel *channel;
1035 uint64_t id = msg.u.discarded_events.session_id;
1036 uint64_t key = msg.u.discarded_events.channel_key;
1037
1038 DBG("Kernel consumer discarded events command for session id %"
1039 PRIu64 ", channel key %" PRIu64, id, key);
1040
1041 channel = consumer_find_channel(key);
1042 if (!channel) {
1043 ERR("Kernel consumer discarded events channel %"
1044 PRIu64 " not found", key);
1045 count = 0;
1046 } else {
1047 count = channel->discarded_events;
1048 }
1049
1050 health_code_update();
1051
1052 /* Send back returned value to session daemon */
1053 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1054 if (ret < 0) {
1055 PERROR("send discarded events");
1056 goto error_fatal;
1057 }
1058
1059 break;
1060 }
1061 case LTTNG_CONSUMER_LOST_PACKETS:
1062 {
1063 ssize_t ret;
1064 uint64_t count;
1065 struct lttng_consumer_channel *channel;
1066 uint64_t id = msg.u.lost_packets.session_id;
1067 uint64_t key = msg.u.lost_packets.channel_key;
1068
1069 DBG("Kernel consumer lost packets command for session id %"
1070 PRIu64 ", channel key %" PRIu64, id, key);
1071
1072 channel = consumer_find_channel(key);
1073 if (!channel) {
1074 ERR("Kernel consumer lost packets channel %"
1075 PRIu64 " not found", key);
1076 count = 0;
1077 } else {
1078 count = channel->lost_packets;
1079 }
1080
1081 health_code_update();
1082
1083 /* Send back returned value to session daemon */
1084 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1085 if (ret < 0) {
1086 PERROR("send lost packets");
1087 goto error_fatal;
1088 }
1089
1090 break;
1091 }
1092 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1093 {
1094 int channel_monitor_pipe;
1095
1096 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1097 /* Successfully received the command's type. */
1098 ret = consumer_send_status_msg(sock, ret_code);
1099 if (ret < 0) {
1100 goto error_fatal;
1101 }
1102
1103 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1104 1);
1105 if (ret != sizeof(channel_monitor_pipe)) {
1106 ERR("Failed to receive channel monitor pipe");
1107 goto error_fatal;
1108 }
1109
1110 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1111 ret = consumer_timer_thread_set_channel_monitor_pipe(
1112 channel_monitor_pipe);
1113 if (!ret) {
1114 int flags;
1115
1116 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1117 /* Set the pipe as non-blocking. */
1118 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1119 if (ret == -1) {
1120 PERROR("fcntl get flags of the channel monitoring pipe");
1121 goto error_fatal;
1122 }
1123 flags = ret;
1124
1125 ret = fcntl(channel_monitor_pipe, F_SETFL,
1126 flags | O_NONBLOCK);
1127 if (ret == -1) {
1128 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1129 goto error_fatal;
1130 }
1131 DBG("Channel monitor pipe set as non-blocking");
1132 } else {
1133 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1134 }
1135 ret = consumer_send_status_msg(sock, ret_code);
1136 if (ret < 0) {
1137 goto error_fatal;
1138 }
1139 break;
1140 }
1141 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1142 {
1143 struct lttng_consumer_channel *channel;
1144 uint64_t key = msg.u.rotate_channel.key;
1145
1146 DBG("Consumer rotate channel %" PRIu64, key);
1147
1148 channel = consumer_find_channel(key);
1149 if (!channel) {
1150 ERR("Channel %" PRIu64 " not found", key);
1151 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1152 } else {
1153 /*
1154 * Sample the rotate position of all the streams in this channel.
1155 */
1156 ret = lttng_consumer_rotate_channel(channel, key,
1157 msg.u.rotate_channel.relayd_id,
1158 msg.u.rotate_channel.metadata,
1159 ctx);
1160 if (ret < 0) {
1161 ERR("Rotate channel failed");
1162 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1163 }
1164
1165 health_code_update();
1166 }
1167 ret = consumer_send_status_msg(sock, ret_code);
1168 if (ret < 0) {
1169 /* Somehow, the session daemon is not responding anymore. */
1170 goto error_rotate_channel;
1171 }
1172 if (channel) {
1173 /* Rotate the streams that are ready right now. */
1174 ret = lttng_consumer_rotate_ready_streams(
1175 channel, key, ctx);
1176 if (ret < 0) {
1177 ERR("Rotate ready streams failed");
1178 }
1179 }
1180 break;
1181 error_rotate_channel:
1182 goto end_nosignal;
1183 }
1184 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1185 {
1186 struct lttng_consumer_channel *channel;
1187 uint64_t key = msg.u.clear_channel.key;
1188
1189 channel = consumer_find_channel(key);
1190 if (!channel) {
1191 DBG("Channel %" PRIu64 " not found", key);
1192 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1193 } else {
1194 ret = lttng_consumer_clear_channel(channel);
1195 if (ret) {
1196 ERR("Clear channel failed");
1197 ret_code = ret;
1198 }
1199
1200 health_code_update();
1201 }
1202 ret = consumer_send_status_msg(sock, ret_code);
1203 if (ret < 0) {
1204 /* Somehow, the session daemon is not responding anymore. */
1205 goto end_nosignal;
1206 }
1207
1208 break;
1209 }
1210 case LTTNG_CONSUMER_INIT:
1211 {
1212 ret_code = lttng_consumer_init_command(ctx,
1213 msg.u.init.sessiond_uuid);
1214 health_code_update();
1215 ret = consumer_send_status_msg(sock, ret_code);
1216 if (ret < 0) {
1217 /* Somehow, the session daemon is not responding anymore. */
1218 goto end_nosignal;
1219 }
1220 break;
1221 }
1222 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1223 {
1224 const struct lttng_credentials credentials = {
1225 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1226 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1227 };
1228 const bool is_local_trace =
1229 !msg.u.create_trace_chunk.relayd_id.is_set;
1230 const uint64_t relayd_id =
1231 msg.u.create_trace_chunk.relayd_id.value;
1232 const char *chunk_override_name =
1233 *msg.u.create_trace_chunk.override_name ?
1234 msg.u.create_trace_chunk.override_name :
1235 NULL;
1236 struct lttng_directory_handle *chunk_directory_handle = NULL;
1237
1238 /*
1239 * The session daemon will only provide a chunk directory file
1240 * descriptor for local traces.
1241 */
1242 if (is_local_trace) {
1243 int chunk_dirfd;
1244
1245 /* Acnowledge the reception of the command. */
1246 ret = consumer_send_status_msg(sock,
1247 LTTCOMM_CONSUMERD_SUCCESS);
1248 if (ret < 0) {
1249 /* Somehow, the session daemon is not responding anymore. */
1250 goto end_nosignal;
1251 }
1252
1253 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1254 if (ret != sizeof(chunk_dirfd)) {
1255 ERR("Failed to receive trace chunk directory file descriptor");
1256 goto error_fatal;
1257 }
1258
1259 DBG("Received trace chunk directory fd (%d)",
1260 chunk_dirfd);
1261 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1262 chunk_dirfd);
1263 if (!chunk_directory_handle) {
1264 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1265 if (close(chunk_dirfd)) {
1266 PERROR("Failed to close chunk directory file descriptor");
1267 }
1268 goto error_fatal;
1269 }
1270 }
1271
1272 ret_code = lttng_consumer_create_trace_chunk(
1273 !is_local_trace ? &relayd_id : NULL,
1274 msg.u.create_trace_chunk.session_id,
1275 msg.u.create_trace_chunk.chunk_id,
1276 (time_t) msg.u.create_trace_chunk
1277 .creation_timestamp,
1278 chunk_override_name,
1279 msg.u.create_trace_chunk.credentials.is_set ?
1280 &credentials :
1281 NULL,
1282 chunk_directory_handle);
1283 lttng_directory_handle_put(chunk_directory_handle);
1284 goto end_msg_sessiond;
1285 }
1286 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1287 {
1288 enum lttng_trace_chunk_command_type close_command =
1289 msg.u.close_trace_chunk.close_command.value;
1290 const uint64_t relayd_id =
1291 msg.u.close_trace_chunk.relayd_id.value;
1292 struct lttcomm_consumer_close_trace_chunk_reply reply;
1293 char path[LTTNG_PATH_MAX];
1294
1295 ret_code = lttng_consumer_close_trace_chunk(
1296 msg.u.close_trace_chunk.relayd_id.is_set ?
1297 &relayd_id :
1298 NULL,
1299 msg.u.close_trace_chunk.session_id,
1300 msg.u.close_trace_chunk.chunk_id,
1301 (time_t) msg.u.close_trace_chunk.close_timestamp,
1302 msg.u.close_trace_chunk.close_command.is_set ?
1303 &close_command :
1304 NULL, path);
1305 reply.ret_code = ret_code;
1306 reply.path_length = strlen(path) + 1;
1307 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1308 if (ret != sizeof(reply)) {
1309 goto error_fatal;
1310 }
1311 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1312 if (ret != reply.path_length) {
1313 goto error_fatal;
1314 }
1315 goto end_nosignal;
1316 }
1317 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1318 {
1319 const uint64_t relayd_id =
1320 msg.u.trace_chunk_exists.relayd_id.value;
1321
1322 ret_code = lttng_consumer_trace_chunk_exists(
1323 msg.u.trace_chunk_exists.relayd_id.is_set ?
1324 &relayd_id : NULL,
1325 msg.u.trace_chunk_exists.session_id,
1326 msg.u.trace_chunk_exists.chunk_id);
1327 goto end_msg_sessiond;
1328 }
1329 default:
1330 goto end_nosignal;
1331 }
1332
1333 end_nosignal:
1334 /*
1335 * Return 1 to indicate success since the 0 value can be a socket
1336 * shutdown during the recv() or send() call.
1337 */
1338 ret = 1;
1339 goto end;
1340 error_fatal:
1341 /* This will issue a consumer stop. */
1342 ret = -1;
1343 goto end;
1344 end_msg_sessiond:
1345 /*
1346 * The returned value here is not useful since either way we'll return 1 to
1347 * the caller because the session daemon socket management is done
1348 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1349 */
1350 ret = consumer_send_status_msg(sock, ret_code);
1351 if (ret < 0) {
1352 goto error_fatal;
1353 }
1354 ret = 1;
1355 end:
1356 health_code_update();
1357 rcu_read_unlock();
1358 return ret;
1359 }
1360
1361 /*
1362 * Populate index values of a kernel stream. Values are set in big endian order.
1363 *
1364 * Return 0 on success or else a negative value.
1365 */
1366 static int get_index_values(struct ctf_packet_index *index, int infd)
1367 {
1368 int ret;
1369 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1370 events_discarded, stream_id, stream_instance_id,
1371 packet_seq_num;
1372
1373 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1374 if (ret < 0) {
1375 PERROR("kernctl_get_timestamp_begin");
1376 goto error;
1377 }
1378
1379 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1380 if (ret < 0) {
1381 PERROR("kernctl_get_timestamp_end");
1382 goto error;
1383 }
1384
1385 ret = kernctl_get_events_discarded(infd, &events_discarded);
1386 if (ret < 0) {
1387 PERROR("kernctl_get_events_discarded");
1388 goto error;
1389 }
1390
1391 ret = kernctl_get_content_size(infd, &content_size);
1392 if (ret < 0) {
1393 PERROR("kernctl_get_content_size");
1394 goto error;
1395 }
1396
1397 ret = kernctl_get_packet_size(infd, &packet_size);
1398 if (ret < 0) {
1399 PERROR("kernctl_get_packet_size");
1400 goto error;
1401 }
1402
1403 ret = kernctl_get_stream_id(infd, &stream_id);
1404 if (ret < 0) {
1405 PERROR("kernctl_get_stream_id");
1406 goto error;
1407 }
1408
1409 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1410 if (ret < 0) {
1411 if (ret == -ENOTTY) {
1412 /* Command not implemented by lttng-modules. */
1413 stream_instance_id = -1ULL;
1414 } else {
1415 PERROR("kernctl_get_instance_id");
1416 goto error;
1417 }
1418 }
1419
1420 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1421 if (ret < 0) {
1422 if (ret == -ENOTTY) {
1423 /* Command not implemented by lttng-modules. */
1424 packet_seq_num = -1ULL;
1425 ret = 0;
1426 } else {
1427 PERROR("kernctl_get_sequence_number");
1428 goto error;
1429 }
1430 }
1431 index->packet_seq_num = htobe64(index->packet_seq_num);
1432
1433 *index = (typeof(*index)) {
1434 .offset = index->offset,
1435 .packet_size = htobe64(packet_size),
1436 .content_size = htobe64(content_size),
1437 .timestamp_begin = htobe64(timestamp_begin),
1438 .timestamp_end = htobe64(timestamp_end),
1439 .events_discarded = htobe64(events_discarded),
1440 .stream_id = htobe64(stream_id),
1441 .stream_instance_id = htobe64(stream_instance_id),
1442 .packet_seq_num = htobe64(packet_seq_num),
1443 };
1444
1445 error:
1446 return ret;
1447 }
1448 /*
1449 * Sync metadata meaning request them to the session daemon and snapshot to the
1450 * metadata thread can consumer them.
1451 *
1452 * Metadata stream lock MUST be acquired.
1453 *
1454 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1455 * is empty or a negative value on error.
1456 */
1457 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1458 {
1459 int ret;
1460
1461 assert(metadata);
1462
1463 ret = kernctl_buffer_flush(metadata->wait_fd);
1464 if (ret < 0) {
1465 ERR("Failed to flush kernel stream");
1466 goto end;
1467 }
1468
1469 ret = kernctl_snapshot(metadata->wait_fd);
1470 if (ret < 0) {
1471 if (ret != -EAGAIN) {
1472 ERR("Sync metadata, taking kernel snapshot failed.");
1473 goto end;
1474 }
1475 DBG("Sync metadata, no new kernel metadata");
1476 /* No new metadata, exit. */
1477 ret = ENODATA;
1478 goto end;
1479 }
1480
1481 end:
1482 return ret;
1483 }
1484
1485 static
1486 int update_stream_stats(struct lttng_consumer_stream *stream)
1487 {
1488 int ret;
1489 uint64_t seq, discarded;
1490
1491 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1492 if (ret < 0) {
1493 if (ret == -ENOTTY) {
1494 /* Command not implemented by lttng-modules. */
1495 seq = -1ULL;
1496 stream->sequence_number_unavailable = true;
1497 } else {
1498 PERROR("kernctl_get_sequence_number");
1499 goto end;
1500 }
1501 }
1502
1503 /*
1504 * Start the sequence when we extract the first packet in case we don't
1505 * start at 0 (for example if a consumer is not connected to the
1506 * session immediately after the beginning).
1507 */
1508 if (stream->last_sequence_number == -1ULL) {
1509 stream->last_sequence_number = seq;
1510 } else if (seq > stream->last_sequence_number) {
1511 stream->chan->lost_packets += seq -
1512 stream->last_sequence_number - 1;
1513 } else {
1514 /* seq <= last_sequence_number */
1515 ERR("Sequence number inconsistent : prev = %" PRIu64
1516 ", current = %" PRIu64,
1517 stream->last_sequence_number, seq);
1518 ret = -1;
1519 goto end;
1520 }
1521 stream->last_sequence_number = seq;
1522
1523 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1524 if (ret < 0) {
1525 PERROR("kernctl_get_events_discarded");
1526 goto end;
1527 }
1528 if (discarded < stream->last_discarded_events) {
1529 /*
1530 * Overflow has occurred. We assume only one wrap-around
1531 * has occurred.
1532 */
1533 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1534 stream->last_discarded_events + discarded;
1535 } else {
1536 stream->chan->discarded_events += discarded -
1537 stream->last_discarded_events;
1538 }
1539 stream->last_discarded_events = discarded;
1540 ret = 0;
1541
1542 end:
1543 return ret;
1544 }
1545
1546 /*
1547 * Check if the local version of the metadata stream matches with the version
1548 * of the metadata stream in the kernel. If it was updated, set the reset flag
1549 * on the stream.
1550 */
1551 static
1552 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1553 {
1554 int ret;
1555 uint64_t cur_version;
1556
1557 ret = kernctl_get_metadata_version(infd, &cur_version);
1558 if (ret < 0) {
1559 if (ret == -ENOTTY) {
1560 /*
1561 * LTTng-modules does not implement this
1562 * command.
1563 */
1564 ret = 0;
1565 goto end;
1566 }
1567 ERR("Failed to get the metadata version");
1568 goto end;
1569 }
1570
1571 if (stream->metadata_version == cur_version) {
1572 ret = 0;
1573 goto end;
1574 }
1575
1576 DBG("New metadata version detected");
1577 stream->metadata_version = cur_version;
1578 stream->reset_metadata_flag = 1;
1579 ret = 0;
1580
1581 end:
1582 return ret;
1583 }
1584
1585 /*
1586 * Consume data on a file descriptor and write it on a trace file.
1587 * The stream and channel locks must be held by the caller.
1588 */
1589 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1590 struct lttng_consumer_local_data *ctx)
1591 {
1592 unsigned long len, subbuf_size, padding;
1593 int err, write_index = 1, rotation_ret;
1594 ssize_t ret = 0;
1595 int infd = stream->wait_fd;
1596 struct ctf_packet_index index = {};
1597
1598 DBG("In read_subbuffer (infd : %d)", infd);
1599
1600 /*
1601 * If the stream was flagged to be ready for rotation before we extract the
1602 * next packet, rotate it now.
1603 */
1604 if (stream->rotate_ready) {
1605 DBG("Rotate stream before extracting data");
1606 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1607 if (rotation_ret < 0) {
1608 ERR("Stream rotation error");
1609 ret = -1;
1610 goto error;
1611 }
1612 }
1613
1614 /* Get the next subbuffer */
1615 err = kernctl_get_next_subbuf(infd);
1616 if (err != 0) {
1617 /*
1618 * This is a debug message even for single-threaded consumer,
1619 * because poll() have more relaxed criterions than get subbuf,
1620 * so get_subbuf may fail for short race windows where poll()
1621 * would issue wakeups.
1622 */
1623 DBG("Reserving sub buffer failed (everything is normal, "
1624 "it is due to concurrency)");
1625 ret = err;
1626 goto error;
1627 }
1628
1629 /* Get the full subbuffer size including padding */
1630 err = kernctl_get_padded_subbuf_size(infd, &len);
1631 if (err != 0) {
1632 PERROR("Getting sub-buffer len failed.");
1633 err = kernctl_put_subbuf(infd);
1634 if (err != 0) {
1635 if (err == -EFAULT) {
1636 PERROR("Error in unreserving sub buffer\n");
1637 } else if (err == -EIO) {
1638 /* Should never happen with newer LTTng versions */
1639 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1640 }
1641 ret = err;
1642 goto error;
1643 }
1644 ret = err;
1645 goto error;
1646 }
1647
1648 if (!stream->metadata_flag) {
1649 ret = get_index_values(&index, infd);
1650 if (ret < 0) {
1651 err = kernctl_put_subbuf(infd);
1652 if (err != 0) {
1653 if (err == -EFAULT) {
1654 PERROR("Error in unreserving sub buffer\n");
1655 } else if (err == -EIO) {
1656 /* Should never happen with newer LTTng versions */
1657 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1658 }
1659 ret = err;
1660 goto error;
1661 }
1662 goto error;
1663 }
1664 ret = update_stream_stats(stream);
1665 if (ret < 0) {
1666 err = kernctl_put_subbuf(infd);
1667 if (err != 0) {
1668 if (err == -EFAULT) {
1669 PERROR("Error in unreserving sub buffer\n");
1670 } else if (err == -EIO) {
1671 /* Should never happen with newer LTTng versions */
1672 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1673 }
1674 ret = err;
1675 goto error;
1676 }
1677 goto error;
1678 }
1679 } else {
1680 write_index = 0;
1681 ret = metadata_stream_check_version(infd, stream);
1682 if (ret < 0) {
1683 err = kernctl_put_subbuf(infd);
1684 if (err != 0) {
1685 if (err == -EFAULT) {
1686 PERROR("Error in unreserving sub buffer\n");
1687 } else if (err == -EIO) {
1688 /* Should never happen with newer LTTng versions */
1689 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1690 }
1691 ret = err;
1692 goto error;
1693 }
1694 goto error;
1695 }
1696 }
1697
1698 switch (stream->chan->output) {
1699 case CONSUMER_CHANNEL_SPLICE:
1700 /*
1701 * XXX: The lttng-modules splice "actor" does not handle copying
1702 * partial pages hence only using the subbuffer size without the
1703 * padding makes the splice fail.
1704 */
1705 subbuf_size = len;
1706 padding = 0;
1707
1708 /* splice the subbuffer to the tracefile */
1709 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1710 padding, &index);
1711 /*
1712 * XXX: Splice does not support network streaming so the return value
1713 * is simply checked against subbuf_size and not like the mmap() op.
1714 */
1715 if (ret != subbuf_size) {
1716 /*
1717 * display the error but continue processing to try
1718 * to release the subbuffer
1719 */
1720 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1721 ret, subbuf_size);
1722 write_index = 0;
1723 }
1724 break;
1725 case CONSUMER_CHANNEL_MMAP:
1726 {
1727 const char *subbuf_addr;
1728 struct lttng_buffer_view subbuf_view;
1729
1730 /* Get subbuffer size without padding */
1731 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1732 if (err != 0) {
1733 PERROR("Getting sub-buffer len failed.");
1734 err = kernctl_put_subbuf(infd);
1735 if (err != 0) {
1736 if (err == -EFAULT) {
1737 PERROR("Error in unreserving sub buffer\n");
1738 } else if (err == -EIO) {
1739 /* Should never happen with newer LTTng versions */
1740 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1741 }
1742 ret = err;
1743 goto error;
1744 }
1745 ret = err;
1746 goto error;
1747 }
1748
1749 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1750 if (ret) {
1751 goto error_put_subbuf;
1752 }
1753
1754 /* Make sure the tracer is not gone mad on us! */
1755 assert(len >= subbuf_size);
1756
1757 padding = len - subbuf_size;
1758
1759 subbuf_view = lttng_buffer_view_init(subbuf_addr, 0, len);
1760
1761 /* write the subbuffer to the tracefile */
1762 ret = lttng_consumer_on_read_subbuffer_mmap(
1763 ctx, stream, &subbuf_view, padding, &index);
1764 /*
1765 * The mmap operation should write subbuf_size amount of data
1766 * when network streaming or the full padding (len) size when we
1767 * are _not_ streaming.
1768 */
1769 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1770 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1771 /*
1772 * Display the error but continue processing to try to release the
1773 * subbuffer. This is a DBG statement since this is possible to
1774 * happen without being a critical error.
1775 */
1776 DBG("Error writing to tracefile "
1777 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1778 ret, len, subbuf_size);
1779 write_index = 0;
1780 }
1781 break;
1782 }
1783 default:
1784 ERR("Unknown output method");
1785 ret = -EPERM;
1786 }
1787 error_put_subbuf:
1788 err = kernctl_put_next_subbuf(infd);
1789 if (err != 0) {
1790 if (err == -EFAULT) {
1791 PERROR("Error in unreserving sub buffer\n");
1792 } else if (err == -EIO) {
1793 /* Should never happen with newer LTTng versions */
1794 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1795 }
1796 ret = err;
1797 goto error;
1798 }
1799
1800 /* Write index if needed. */
1801 if (!write_index) {
1802 goto rotate;
1803 }
1804
1805 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1806 /*
1807 * In live, block until all the metadata is sent.
1808 */
1809 pthread_mutex_lock(&stream->metadata_timer_lock);
1810 assert(!stream->missed_metadata_flush);
1811 stream->waiting_on_metadata = true;
1812 pthread_mutex_unlock(&stream->metadata_timer_lock);
1813
1814 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1815
1816 pthread_mutex_lock(&stream->metadata_timer_lock);
1817 stream->waiting_on_metadata = false;
1818 if (stream->missed_metadata_flush) {
1819 stream->missed_metadata_flush = false;
1820 pthread_mutex_unlock(&stream->metadata_timer_lock);
1821 (void) consumer_flush_kernel_index(stream);
1822 } else {
1823 pthread_mutex_unlock(&stream->metadata_timer_lock);
1824 }
1825 if (err < 0) {
1826 goto error;
1827 }
1828 }
1829
1830 err = consumer_stream_write_index(stream, &index);
1831 if (err < 0) {
1832 goto error;
1833 }
1834
1835 rotate:
1836 /*
1837 * After extracting the packet, we check if the stream is now ready to be
1838 * rotated and perform the action immediately.
1839 */
1840 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1841 if (rotation_ret == 1) {
1842 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1843 if (rotation_ret < 0) {
1844 ERR("Stream rotation error");
1845 ret = -1;
1846 goto error;
1847 }
1848 } else if (rotation_ret < 0) {
1849 ERR("Checking if stream is ready to rotate");
1850 ret = -1;
1851 goto error;
1852 }
1853
1854 error:
1855 return ret;
1856 }
1857
1858 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1859 {
1860 int ret;
1861
1862 assert(stream);
1863
1864 /*
1865 * Don't create anything if this is set for streaming or if there is
1866 * no current trace chunk on the parent channel.
1867 */
1868 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1869 stream->chan->trace_chunk) {
1870 ret = consumer_stream_create_output_files(stream, true);
1871 if (ret) {
1872 goto error;
1873 }
1874 }
1875
1876 if (stream->output == LTTNG_EVENT_MMAP) {
1877 /* get the len of the mmap region */
1878 unsigned long mmap_len;
1879
1880 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1881 if (ret != 0) {
1882 PERROR("kernctl_get_mmap_len");
1883 goto error_close_fd;
1884 }
1885 stream->mmap_len = (size_t) mmap_len;
1886
1887 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1888 MAP_PRIVATE, stream->wait_fd, 0);
1889 if (stream->mmap_base == MAP_FAILED) {
1890 PERROR("Error mmaping");
1891 ret = -1;
1892 goto error_close_fd;
1893 }
1894 }
1895
1896 /* we return 0 to let the library handle the FD internally */
1897 return 0;
1898
1899 error_close_fd:
1900 if (stream->out_fd >= 0) {
1901 int err;
1902
1903 err = close(stream->out_fd);
1904 assert(!err);
1905 stream->out_fd = -1;
1906 }
1907 error:
1908 return ret;
1909 }
1910
1911 /*
1912 * Check if data is still being extracted from the buffers for a specific
1913 * stream. Consumer data lock MUST be acquired before calling this function
1914 * and the stream lock.
1915 *
1916 * Return 1 if the traced data are still getting read else 0 meaning that the
1917 * data is available for trace viewer reading.
1918 */
1919 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1920 {
1921 int ret;
1922
1923 assert(stream);
1924
1925 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1926 ret = 0;
1927 goto end;
1928 }
1929
1930 ret = kernctl_get_next_subbuf(stream->wait_fd);
1931 if (ret == 0) {
1932 /* There is still data so let's put back this subbuffer. */
1933 ret = kernctl_put_subbuf(stream->wait_fd);
1934 assert(ret == 0);
1935 ret = 1; /* Data is pending */
1936 goto end;
1937 }
1938
1939 /* Data is NOT pending and ready to be read. */
1940 ret = 0;
1941
1942 end:
1943 return ret;
1944 }
This page took 0.112421 seconds and 4 git commands to generate.