consumerd: pass channel instance to stream creation function
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #include "common/buffer-view.h"
11 #include <stdint.h>
12 #define _LGPL_SOURCE
13 #include <assert.h>
14 #include <poll.h>
15 #include <pthread.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/mman.h>
19 #include <sys/socket.h>
20 #include <sys/types.h>
21 #include <inttypes.h>
22 #include <unistd.h>
23 #include <sys/stat.h>
24
25 #include <bin/lttng-consumerd/health-consumerd.h>
26 #include <common/common.h>
27 #include <common/kernel-ctl/kernel-ctl.h>
28 #include <common/sessiond-comm/sessiond-comm.h>
29 #include <common/sessiond-comm/relayd.h>
30 #include <common/compat/fcntl.h>
31 #include <common/compat/endian.h>
32 #include <common/pipe.h>
33 #include <common/relayd/relayd.h>
34 #include <common/utils.h>
35 #include <common/consumer/consumer-stream.h>
36 #include <common/index/index.h>
37 #include <common/consumer/consumer-timer.h>
38 #include <common/optional.h>
39
40 #include "kernel-consumer.h"
41
42 extern struct lttng_consumer_global_data consumer_data;
43 extern int consumer_poll_timeout;
44
45 /*
46 * Take a snapshot for a specific fd
47 *
48 * Returns 0 on success, < 0 on error
49 */
50 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
51 {
52 int ret = 0;
53 int infd = stream->wait_fd;
54
55 ret = kernctl_snapshot(infd);
56 /*
57 * -EAGAIN is not an error, it just means that there is no data to
58 * be read.
59 */
60 if (ret != 0 && ret != -EAGAIN) {
61 PERROR("Getting sub-buffer snapshot.");
62 }
63
64 return ret;
65 }
66
67 /*
68 * Sample consumed and produced positions for a specific fd.
69 *
70 * Returns 0 on success, < 0 on error.
71 */
72 int lttng_kconsumer_sample_snapshot_positions(
73 struct lttng_consumer_stream *stream)
74 {
75 assert(stream);
76
77 return kernctl_snapshot_sample_positions(stream->wait_fd);
78 }
79
80 /*
81 * Get the produced position
82 *
83 * Returns 0 on success, < 0 on error
84 */
85 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
86 unsigned long *pos)
87 {
88 int ret;
89 int infd = stream->wait_fd;
90
91 ret = kernctl_snapshot_get_produced(infd, pos);
92 if (ret != 0) {
93 PERROR("kernctl_snapshot_get_produced");
94 }
95
96 return ret;
97 }
98
99 /*
100 * Get the consumerd position
101 *
102 * Returns 0 on success, < 0 on error
103 */
104 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
105 unsigned long *pos)
106 {
107 int ret;
108 int infd = stream->wait_fd;
109
110 ret = kernctl_snapshot_get_consumed(infd, pos);
111 if (ret != 0) {
112 PERROR("kernctl_snapshot_get_consumed");
113 }
114
115 return ret;
116 }
117
118 static
119 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
120 const char **addr)
121 {
122 int ret;
123 unsigned long mmap_offset;
124 const char *mmap_base = stream->mmap_base;
125
126 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
127 if (ret < 0) {
128 PERROR("Failed to get mmap read offset");
129 goto error;
130 }
131
132 *addr = mmap_base + mmap_offset;
133 error:
134 return ret;
135 }
136
137 /*
138 * Take a snapshot of all the stream of a channel
139 * RCU read-side lock must be held across this function to ensure existence of
140 * channel. The channel lock must be held by the caller.
141 *
142 * Returns 0 on success, < 0 on error
143 */
144 static int lttng_kconsumer_snapshot_channel(
145 struct lttng_consumer_channel *channel,
146 uint64_t key, char *path, uint64_t relayd_id,
147 uint64_t nb_packets_per_stream,
148 struct lttng_consumer_local_data *ctx)
149 {
150 int ret;
151 struct lttng_consumer_stream *stream;
152
153 DBG("Kernel consumer snapshot channel %" PRIu64, key);
154
155 rcu_read_lock();
156
157 /* Splice is not supported yet for channel snapshot. */
158 if (channel->output != CONSUMER_CHANNEL_MMAP) {
159 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
160 channel->name);
161 ret = -1;
162 goto end;
163 }
164
165 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
166 unsigned long consumed_pos, produced_pos;
167
168 health_code_update();
169
170 /*
171 * Lock stream because we are about to change its state.
172 */
173 pthread_mutex_lock(&stream->lock);
174
175 assert(channel->trace_chunk);
176 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
177 /*
178 * Can't happen barring an internal error as the channel
179 * holds a reference to the trace chunk.
180 */
181 ERR("Failed to acquire reference to channel's trace chunk");
182 ret = -1;
183 goto end_unlock;
184 }
185 assert(!stream->trace_chunk);
186 stream->trace_chunk = channel->trace_chunk;
187
188 /*
189 * Assign the received relayd ID so we can use it for streaming. The streams
190 * are not visible to anyone so this is OK to change it.
191 */
192 stream->net_seq_idx = relayd_id;
193 channel->relayd_id = relayd_id;
194 if (relayd_id != (uint64_t) -1ULL) {
195 ret = consumer_send_relayd_stream(stream, path);
196 if (ret < 0) {
197 ERR("sending stream to relayd");
198 goto end_unlock;
199 }
200 } else {
201 ret = consumer_stream_create_output_files(stream,
202 false);
203 if (ret < 0) {
204 goto end_unlock;
205 }
206 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
207 stream->key);
208 }
209
210 ret = kernctl_buffer_flush_empty(stream->wait_fd);
211 if (ret < 0) {
212 /*
213 * Doing a buffer flush which does not take into
214 * account empty packets. This is not perfect
215 * for stream intersection, but required as a
216 * fall-back when "flush_empty" is not
217 * implemented by lttng-modules.
218 */
219 ret = kernctl_buffer_flush(stream->wait_fd);
220 if (ret < 0) {
221 ERR("Failed to flush kernel stream");
222 goto end_unlock;
223 }
224 goto end_unlock;
225 }
226
227 ret = lttng_kconsumer_take_snapshot(stream);
228 if (ret < 0) {
229 ERR("Taking kernel snapshot");
230 goto end_unlock;
231 }
232
233 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
234 if (ret < 0) {
235 ERR("Produced kernel snapshot position");
236 goto end_unlock;
237 }
238
239 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
240 if (ret < 0) {
241 ERR("Consumerd kernel snapshot position");
242 goto end_unlock;
243 }
244
245 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
246 produced_pos, nb_packets_per_stream,
247 stream->max_sb_size);
248
249 while ((long) (consumed_pos - produced_pos) < 0) {
250 ssize_t read_len;
251 unsigned long len, padded_len;
252 const char *subbuf_addr;
253 struct lttng_buffer_view subbuf_view;
254
255 health_code_update();
256 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
257
258 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
259 if (ret < 0) {
260 if (ret != -EAGAIN) {
261 PERROR("kernctl_get_subbuf snapshot");
262 goto end_unlock;
263 }
264 DBG("Kernel consumer get subbuf failed. Skipping it.");
265 consumed_pos += stream->max_sb_size;
266 stream->chan->lost_packets++;
267 continue;
268 }
269
270 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
271 if (ret < 0) {
272 ERR("Snapshot kernctl_get_subbuf_size");
273 goto error_put_subbuf;
274 }
275
276 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
277 if (ret < 0) {
278 ERR("Snapshot kernctl_get_padded_subbuf_size");
279 goto error_put_subbuf;
280 }
281
282 ret = get_current_subbuf_addr(stream, &subbuf_addr);
283 if (ret) {
284 goto error_put_subbuf;
285 }
286
287 subbuf_view = lttng_buffer_view_init(
288 subbuf_addr, 0, padded_len);
289 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx,
290 stream, &subbuf_view,
291 padded_len - len, NULL);
292 /*
293 * We write the padded len in local tracefiles but the data len
294 * when using a relay. Display the error but continue processing
295 * to try to release the subbuffer.
296 */
297 if (relayd_id != (uint64_t) -1ULL) {
298 if (read_len != len) {
299 ERR("Error sending to the relay (ret: %zd != len: %lu)",
300 read_len, len);
301 }
302 } else {
303 if (read_len != padded_len) {
304 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
305 read_len, padded_len);
306 }
307 }
308
309 ret = kernctl_put_subbuf(stream->wait_fd);
310 if (ret < 0) {
311 ERR("Snapshot kernctl_put_subbuf");
312 goto end_unlock;
313 }
314 consumed_pos += stream->max_sb_size;
315 }
316
317 if (relayd_id == (uint64_t) -1ULL) {
318 if (stream->out_fd >= 0) {
319 ret = close(stream->out_fd);
320 if (ret < 0) {
321 PERROR("Kernel consumer snapshot close out_fd");
322 goto end_unlock;
323 }
324 stream->out_fd = -1;
325 }
326 } else {
327 close_relayd_stream(stream);
328 stream->net_seq_idx = (uint64_t) -1ULL;
329 }
330 lttng_trace_chunk_put(stream->trace_chunk);
331 stream->trace_chunk = NULL;
332 pthread_mutex_unlock(&stream->lock);
333 }
334
335 /* All good! */
336 ret = 0;
337 goto end;
338
339 error_put_subbuf:
340 ret = kernctl_put_subbuf(stream->wait_fd);
341 if (ret < 0) {
342 ERR("Snapshot kernctl_put_subbuf error path");
343 }
344 end_unlock:
345 pthread_mutex_unlock(&stream->lock);
346 end:
347 rcu_read_unlock();
348 return ret;
349 }
350
351 /*
352 * Read the whole metadata available for a snapshot.
353 * RCU read-side lock must be held across this function to ensure existence of
354 * metadata_channel. The channel lock must be held by the caller.
355 *
356 * Returns 0 on success, < 0 on error
357 */
358 static int lttng_kconsumer_snapshot_metadata(
359 struct lttng_consumer_channel *metadata_channel,
360 uint64_t key, char *path, uint64_t relayd_id,
361 struct lttng_consumer_local_data *ctx)
362 {
363 int ret, use_relayd = 0;
364 ssize_t ret_read;
365 struct lttng_consumer_stream *metadata_stream;
366
367 assert(ctx);
368
369 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
370 key, path);
371
372 rcu_read_lock();
373
374 metadata_stream = metadata_channel->metadata_stream;
375 assert(metadata_stream);
376
377 pthread_mutex_lock(&metadata_stream->lock);
378 assert(metadata_channel->trace_chunk);
379 assert(metadata_stream->trace_chunk);
380
381 /* Flag once that we have a valid relayd for the stream. */
382 if (relayd_id != (uint64_t) -1ULL) {
383 use_relayd = 1;
384 }
385
386 if (use_relayd) {
387 ret = consumer_send_relayd_stream(metadata_stream, path);
388 if (ret < 0) {
389 goto error_snapshot;
390 }
391 } else {
392 ret = consumer_stream_create_output_files(metadata_stream,
393 false);
394 if (ret < 0) {
395 goto error_snapshot;
396 }
397 }
398
399 do {
400 health_code_update();
401
402 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
403 if (ret_read < 0) {
404 if (ret_read != -EAGAIN) {
405 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
406 ret_read);
407 ret = ret_read;
408 goto error_snapshot;
409 }
410 /* ret_read is negative at this point so we will exit the loop. */
411 continue;
412 }
413 } while (ret_read >= 0);
414
415 if (use_relayd) {
416 close_relayd_stream(metadata_stream);
417 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
418 } else {
419 if (metadata_stream->out_fd >= 0) {
420 ret = close(metadata_stream->out_fd);
421 if (ret < 0) {
422 PERROR("Kernel consumer snapshot metadata close out_fd");
423 /*
424 * Don't go on error here since the snapshot was successful at this
425 * point but somehow the close failed.
426 */
427 }
428 metadata_stream->out_fd = -1;
429 lttng_trace_chunk_put(metadata_stream->trace_chunk);
430 metadata_stream->trace_chunk = NULL;
431 }
432 }
433
434 ret = 0;
435 error_snapshot:
436 pthread_mutex_unlock(&metadata_stream->lock);
437 cds_list_del(&metadata_stream->send_node);
438 consumer_stream_destroy(metadata_stream, NULL);
439 metadata_channel->metadata_stream = NULL;
440 rcu_read_unlock();
441 return ret;
442 }
443
444 /*
445 * Receive command from session daemon and process it.
446 *
447 * Return 1 on success else a negative value or 0.
448 */
449 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
450 int sock, struct pollfd *consumer_sockpoll)
451 {
452 ssize_t ret;
453 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
454 struct lttcomm_consumer_msg msg;
455
456 health_code_update();
457
458 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
459 if (ret != sizeof(msg)) {
460 if (ret > 0) {
461 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
462 ret = -1;
463 }
464 return ret;
465 }
466
467 health_code_update();
468
469 /* Deprecated command */
470 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
471
472 health_code_update();
473
474 /* relayd needs RCU read-side protection */
475 rcu_read_lock();
476
477 switch (msg.cmd_type) {
478 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
479 {
480 /* Session daemon status message are handled in the following call. */
481 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
482 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
483 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
484 msg.u.relayd_sock.relayd_session_id);
485 goto end_nosignal;
486 }
487 case LTTNG_CONSUMER_ADD_CHANNEL:
488 {
489 struct lttng_consumer_channel *new_channel;
490 int ret_recv;
491 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
492
493 health_code_update();
494
495 /* First send a status message before receiving the fds. */
496 ret = consumer_send_status_msg(sock, ret_code);
497 if (ret < 0) {
498 /* Somehow, the session daemon is not responding anymore. */
499 goto error_fatal;
500 }
501
502 health_code_update();
503
504 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
505 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
506 msg.u.channel.session_id,
507 msg.u.channel.chunk_id.is_set ?
508 &chunk_id : NULL,
509 msg.u.channel.pathname,
510 msg.u.channel.name,
511 msg.u.channel.relayd_id, msg.u.channel.output,
512 msg.u.channel.tracefile_size,
513 msg.u.channel.tracefile_count, 0,
514 msg.u.channel.monitor,
515 msg.u.channel.live_timer_interval,
516 NULL, NULL);
517 if (new_channel == NULL) {
518 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
519 goto end_nosignal;
520 }
521 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
522 switch (msg.u.channel.output) {
523 case LTTNG_EVENT_SPLICE:
524 new_channel->output = CONSUMER_CHANNEL_SPLICE;
525 break;
526 case LTTNG_EVENT_MMAP:
527 new_channel->output = CONSUMER_CHANNEL_MMAP;
528 break;
529 default:
530 ERR("Channel output unknown %d", msg.u.channel.output);
531 goto end_nosignal;
532 }
533
534 /* Translate and save channel type. */
535 switch (msg.u.channel.type) {
536 case CONSUMER_CHANNEL_TYPE_DATA:
537 case CONSUMER_CHANNEL_TYPE_METADATA:
538 new_channel->type = msg.u.channel.type;
539 break;
540 default:
541 assert(0);
542 goto end_nosignal;
543 };
544
545 health_code_update();
546
547 if (ctx->on_recv_channel != NULL) {
548 ret_recv = ctx->on_recv_channel(new_channel);
549 if (ret_recv == 0) {
550 ret = consumer_add_channel(new_channel, ctx);
551 } else if (ret_recv < 0) {
552 goto end_nosignal;
553 }
554 } else {
555 ret = consumer_add_channel(new_channel, ctx);
556 }
557 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
558 int monitor_start_ret;
559
560 DBG("Consumer starting monitor timer");
561 consumer_timer_live_start(new_channel,
562 msg.u.channel.live_timer_interval);
563 monitor_start_ret = consumer_timer_monitor_start(
564 new_channel,
565 msg.u.channel.monitor_timer_interval);
566 if (monitor_start_ret < 0) {
567 ERR("Starting channel monitoring timer failed");
568 goto end_nosignal;
569 }
570
571 }
572
573 health_code_update();
574
575 /* If we received an error in add_channel, we need to report it. */
576 if (ret < 0) {
577 ret = consumer_send_status_msg(sock, ret);
578 if (ret < 0) {
579 goto error_fatal;
580 }
581 goto end_nosignal;
582 }
583
584 goto end_nosignal;
585 }
586 case LTTNG_CONSUMER_ADD_STREAM:
587 {
588 int fd;
589 struct lttng_pipe *stream_pipe;
590 struct lttng_consumer_stream *new_stream;
591 struct lttng_consumer_channel *channel;
592 int alloc_ret = 0;
593
594 /*
595 * Get stream's channel reference. Needed when adding the stream to the
596 * global hash table.
597 */
598 channel = consumer_find_channel(msg.u.stream.channel_key);
599 if (!channel) {
600 /*
601 * We could not find the channel. Can happen if cpu hotplug
602 * happens while tearing down.
603 */
604 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
605 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
606 }
607
608 health_code_update();
609
610 /* First send a status message before receiving the fds. */
611 ret = consumer_send_status_msg(sock, ret_code);
612 if (ret < 0) {
613 /* Somehow, the session daemon is not responding anymore. */
614 goto error_add_stream_fatal;
615 }
616
617 health_code_update();
618
619 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
620 /* Channel was not found. */
621 goto error_add_stream_nosignal;
622 }
623
624 /* Blocking call */
625 health_poll_entry();
626 ret = lttng_consumer_poll_socket(consumer_sockpoll);
627 health_poll_exit();
628 if (ret) {
629 goto error_add_stream_fatal;
630 }
631
632 health_code_update();
633
634 /* Get stream file descriptor from socket */
635 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
636 if (ret != sizeof(fd)) {
637 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
638 goto end;
639 }
640
641 health_code_update();
642
643 /*
644 * Send status code to session daemon only if the recv works. If the
645 * above recv() failed, the session daemon is notified through the
646 * error socket and the teardown is eventually done.
647 */
648 ret = consumer_send_status_msg(sock, ret_code);
649 if (ret < 0) {
650 /* Somehow, the session daemon is not responding anymore. */
651 goto error_add_stream_nosignal;
652 }
653
654 health_code_update();
655
656 pthread_mutex_lock(&channel->lock);
657 new_stream = consumer_allocate_stream(
658 channel,
659 channel->key,
660 fd,
661 channel->name,
662 channel->relayd_id,
663 channel->session_id,
664 channel->trace_chunk,
665 msg.u.stream.cpu,
666 &alloc_ret,
667 channel->type,
668 channel->monitor);
669 if (new_stream == NULL) {
670 switch (alloc_ret) {
671 case -ENOMEM:
672 case -EINVAL:
673 default:
674 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
675 break;
676 }
677 pthread_mutex_unlock(&channel->lock);
678 goto error_add_stream_nosignal;
679 }
680
681 new_stream->wait_fd = fd;
682 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
683 &new_stream->max_sb_size);
684 if (ret < 0) {
685 pthread_mutex_unlock(&channel->lock);
686 ERR("Failed to get kernel maximal subbuffer size");
687 goto error_add_stream_nosignal;
688 }
689
690 consumer_stream_update_channel_attributes(new_stream,
691 channel);
692 switch (channel->output) {
693 case CONSUMER_CHANNEL_SPLICE:
694 new_stream->output = LTTNG_EVENT_SPLICE;
695 ret = utils_create_pipe(new_stream->splice_pipe);
696 if (ret < 0) {
697 pthread_mutex_unlock(&channel->lock);
698 goto error_add_stream_nosignal;
699 }
700 break;
701 case CONSUMER_CHANNEL_MMAP:
702 new_stream->output = LTTNG_EVENT_MMAP;
703 break;
704 default:
705 ERR("Stream output unknown %d", channel->output);
706 pthread_mutex_unlock(&channel->lock);
707 goto error_add_stream_nosignal;
708 }
709
710 /*
711 * We've just assigned the channel to the stream so increment the
712 * refcount right now. We don't need to increment the refcount for
713 * streams in no monitor because we handle manually the cleanup of
714 * those. It is very important to make sure there is NO prior
715 * consumer_del_stream() calls or else the refcount will be unbalanced.
716 */
717 if (channel->monitor) {
718 uatomic_inc(&new_stream->chan->refcount);
719 }
720
721 /*
722 * The buffer flush is done on the session daemon side for the kernel
723 * so no need for the stream "hangup_flush_done" variable to be
724 * tracked. This is important for a kernel stream since we don't rely
725 * on the flush state of the stream to read data. It's not the case for
726 * user space tracing.
727 */
728 new_stream->hangup_flush_done = 0;
729
730 health_code_update();
731
732 pthread_mutex_lock(&new_stream->lock);
733 if (ctx->on_recv_stream) {
734 ret = ctx->on_recv_stream(new_stream);
735 if (ret < 0) {
736 pthread_mutex_unlock(&new_stream->lock);
737 pthread_mutex_unlock(&channel->lock);
738 consumer_stream_free(new_stream);
739 goto error_add_stream_nosignal;
740 }
741 }
742 health_code_update();
743
744 if (new_stream->metadata_flag) {
745 channel->metadata_stream = new_stream;
746 }
747
748 /* Do not monitor this stream. */
749 if (!channel->monitor) {
750 DBG("Kernel consumer add stream %s in no monitor mode with "
751 "relayd id %" PRIu64, new_stream->name,
752 new_stream->net_seq_idx);
753 cds_list_add(&new_stream->send_node, &channel->streams.head);
754 pthread_mutex_unlock(&new_stream->lock);
755 pthread_mutex_unlock(&channel->lock);
756 goto end_add_stream;
757 }
758
759 /* Send stream to relayd if the stream has an ID. */
760 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
761 ret = consumer_send_relayd_stream(new_stream,
762 new_stream->chan->pathname);
763 if (ret < 0) {
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766 consumer_stream_free(new_stream);
767 goto error_add_stream_nosignal;
768 }
769
770 /*
771 * If adding an extra stream to an already
772 * existing channel (e.g. cpu hotplug), we need
773 * to send the "streams_sent" command to relayd.
774 */
775 if (channel->streams_sent_to_relayd) {
776 ret = consumer_send_relayd_streams_sent(
777 new_stream->net_seq_idx);
778 if (ret < 0) {
779 pthread_mutex_unlock(&new_stream->lock);
780 pthread_mutex_unlock(&channel->lock);
781 goto error_add_stream_nosignal;
782 }
783 }
784 }
785 pthread_mutex_unlock(&new_stream->lock);
786 pthread_mutex_unlock(&channel->lock);
787
788 /* Get the right pipe where the stream will be sent. */
789 if (new_stream->metadata_flag) {
790 consumer_add_metadata_stream(new_stream);
791 stream_pipe = ctx->consumer_metadata_pipe;
792 } else {
793 consumer_add_data_stream(new_stream);
794 stream_pipe = ctx->consumer_data_pipe;
795 }
796
797 /* Visible to other threads */
798 new_stream->globally_visible = 1;
799
800 health_code_update();
801
802 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
803 if (ret < 0) {
804 ERR("Consumer write %s stream to pipe %d",
805 new_stream->metadata_flag ? "metadata" : "data",
806 lttng_pipe_get_writefd(stream_pipe));
807 if (new_stream->metadata_flag) {
808 consumer_del_stream_for_metadata(new_stream);
809 } else {
810 consumer_del_stream_for_data(new_stream);
811 }
812 goto error_add_stream_nosignal;
813 }
814
815 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
816 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
817 end_add_stream:
818 break;
819 error_add_stream_nosignal:
820 goto end_nosignal;
821 error_add_stream_fatal:
822 goto error_fatal;
823 }
824 case LTTNG_CONSUMER_STREAMS_SENT:
825 {
826 struct lttng_consumer_channel *channel;
827
828 /*
829 * Get stream's channel reference. Needed when adding the stream to the
830 * global hash table.
831 */
832 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
833 if (!channel) {
834 /*
835 * We could not find the channel. Can happen if cpu hotplug
836 * happens while tearing down.
837 */
838 ERR("Unable to find channel key %" PRIu64,
839 msg.u.sent_streams.channel_key);
840 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
841 }
842
843 health_code_update();
844
845 /*
846 * Send status code to session daemon.
847 */
848 ret = consumer_send_status_msg(sock, ret_code);
849 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
850 /* Somehow, the session daemon is not responding anymore. */
851 goto error_streams_sent_nosignal;
852 }
853
854 health_code_update();
855
856 /*
857 * We should not send this message if we don't monitor the
858 * streams in this channel.
859 */
860 if (!channel->monitor) {
861 goto end_error_streams_sent;
862 }
863
864 health_code_update();
865 /* Send stream to relayd if the stream has an ID. */
866 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
867 ret = consumer_send_relayd_streams_sent(
868 msg.u.sent_streams.net_seq_idx);
869 if (ret < 0) {
870 goto error_streams_sent_nosignal;
871 }
872 channel->streams_sent_to_relayd = true;
873 }
874 end_error_streams_sent:
875 break;
876 error_streams_sent_nosignal:
877 goto end_nosignal;
878 }
879 case LTTNG_CONSUMER_UPDATE_STREAM:
880 {
881 rcu_read_unlock();
882 return -ENOSYS;
883 }
884 case LTTNG_CONSUMER_DESTROY_RELAYD:
885 {
886 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
887 struct consumer_relayd_sock_pair *relayd;
888
889 DBG("Kernel consumer destroying relayd %" PRIu64, index);
890
891 /* Get relayd reference if exists. */
892 relayd = consumer_find_relayd(index);
893 if (relayd == NULL) {
894 DBG("Unable to find relayd %" PRIu64, index);
895 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
896 }
897
898 /*
899 * Each relayd socket pair has a refcount of stream attached to it
900 * which tells if the relayd is still active or not depending on the
901 * refcount value.
902 *
903 * This will set the destroy flag of the relayd object and destroy it
904 * if the refcount reaches zero when called.
905 *
906 * The destroy can happen either here or when a stream fd hangs up.
907 */
908 if (relayd) {
909 consumer_flag_relayd_for_destroy(relayd);
910 }
911
912 health_code_update();
913
914 ret = consumer_send_status_msg(sock, ret_code);
915 if (ret < 0) {
916 /* Somehow, the session daemon is not responding anymore. */
917 goto error_fatal;
918 }
919
920 goto end_nosignal;
921 }
922 case LTTNG_CONSUMER_DATA_PENDING:
923 {
924 int32_t ret;
925 uint64_t id = msg.u.data_pending.session_id;
926
927 DBG("Kernel consumer data pending command for id %" PRIu64, id);
928
929 ret = consumer_data_pending(id);
930
931 health_code_update();
932
933 /* Send back returned value to session daemon */
934 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
935 if (ret < 0) {
936 PERROR("send data pending ret code");
937 goto error_fatal;
938 }
939
940 /*
941 * No need to send back a status message since the data pending
942 * returned value is the response.
943 */
944 break;
945 }
946 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
947 {
948 struct lttng_consumer_channel *channel;
949 uint64_t key = msg.u.snapshot_channel.key;
950
951 channel = consumer_find_channel(key);
952 if (!channel) {
953 ERR("Channel %" PRIu64 " not found", key);
954 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
955 } else {
956 pthread_mutex_lock(&channel->lock);
957 if (msg.u.snapshot_channel.metadata == 1) {
958 ret = lttng_kconsumer_snapshot_metadata(channel, key,
959 msg.u.snapshot_channel.pathname,
960 msg.u.snapshot_channel.relayd_id, ctx);
961 if (ret < 0) {
962 ERR("Snapshot metadata failed");
963 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
964 }
965 } else {
966 ret = lttng_kconsumer_snapshot_channel(channel, key,
967 msg.u.snapshot_channel.pathname,
968 msg.u.snapshot_channel.relayd_id,
969 msg.u.snapshot_channel.nb_packets_per_stream,
970 ctx);
971 if (ret < 0) {
972 ERR("Snapshot channel failed");
973 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
974 }
975 }
976 pthread_mutex_unlock(&channel->lock);
977 }
978 health_code_update();
979
980 ret = consumer_send_status_msg(sock, ret_code);
981 if (ret < 0) {
982 /* Somehow, the session daemon is not responding anymore. */
983 goto end_nosignal;
984 }
985 break;
986 }
987 case LTTNG_CONSUMER_DESTROY_CHANNEL:
988 {
989 uint64_t key = msg.u.destroy_channel.key;
990 struct lttng_consumer_channel *channel;
991
992 channel = consumer_find_channel(key);
993 if (!channel) {
994 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
995 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
996 }
997
998 health_code_update();
999
1000 ret = consumer_send_status_msg(sock, ret_code);
1001 if (ret < 0) {
1002 /* Somehow, the session daemon is not responding anymore. */
1003 goto end_destroy_channel;
1004 }
1005
1006 health_code_update();
1007
1008 /* Stop right now if no channel was found. */
1009 if (!channel) {
1010 goto end_destroy_channel;
1011 }
1012
1013 /*
1014 * This command should ONLY be issued for channel with streams set in
1015 * no monitor mode.
1016 */
1017 assert(!channel->monitor);
1018
1019 /*
1020 * The refcount should ALWAYS be 0 in the case of a channel in no
1021 * monitor mode.
1022 */
1023 assert(!uatomic_sub_return(&channel->refcount, 1));
1024
1025 consumer_del_channel(channel);
1026 end_destroy_channel:
1027 goto end_nosignal;
1028 }
1029 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1030 {
1031 ssize_t ret;
1032 uint64_t count;
1033 struct lttng_consumer_channel *channel;
1034 uint64_t id = msg.u.discarded_events.session_id;
1035 uint64_t key = msg.u.discarded_events.channel_key;
1036
1037 DBG("Kernel consumer discarded events command for session id %"
1038 PRIu64 ", channel key %" PRIu64, id, key);
1039
1040 channel = consumer_find_channel(key);
1041 if (!channel) {
1042 ERR("Kernel consumer discarded events channel %"
1043 PRIu64 " not found", key);
1044 count = 0;
1045 } else {
1046 count = channel->discarded_events;
1047 }
1048
1049 health_code_update();
1050
1051 /* Send back returned value to session daemon */
1052 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1053 if (ret < 0) {
1054 PERROR("send discarded events");
1055 goto error_fatal;
1056 }
1057
1058 break;
1059 }
1060 case LTTNG_CONSUMER_LOST_PACKETS:
1061 {
1062 ssize_t ret;
1063 uint64_t count;
1064 struct lttng_consumer_channel *channel;
1065 uint64_t id = msg.u.lost_packets.session_id;
1066 uint64_t key = msg.u.lost_packets.channel_key;
1067
1068 DBG("Kernel consumer lost packets command for session id %"
1069 PRIu64 ", channel key %" PRIu64, id, key);
1070
1071 channel = consumer_find_channel(key);
1072 if (!channel) {
1073 ERR("Kernel consumer lost packets channel %"
1074 PRIu64 " not found", key);
1075 count = 0;
1076 } else {
1077 count = channel->lost_packets;
1078 }
1079
1080 health_code_update();
1081
1082 /* Send back returned value to session daemon */
1083 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1084 if (ret < 0) {
1085 PERROR("send lost packets");
1086 goto error_fatal;
1087 }
1088
1089 break;
1090 }
1091 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1092 {
1093 int channel_monitor_pipe;
1094
1095 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1096 /* Successfully received the command's type. */
1097 ret = consumer_send_status_msg(sock, ret_code);
1098 if (ret < 0) {
1099 goto error_fatal;
1100 }
1101
1102 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1103 1);
1104 if (ret != sizeof(channel_monitor_pipe)) {
1105 ERR("Failed to receive channel monitor pipe");
1106 goto error_fatal;
1107 }
1108
1109 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1110 ret = consumer_timer_thread_set_channel_monitor_pipe(
1111 channel_monitor_pipe);
1112 if (!ret) {
1113 int flags;
1114
1115 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1116 /* Set the pipe as non-blocking. */
1117 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1118 if (ret == -1) {
1119 PERROR("fcntl get flags of the channel monitoring pipe");
1120 goto error_fatal;
1121 }
1122 flags = ret;
1123
1124 ret = fcntl(channel_monitor_pipe, F_SETFL,
1125 flags | O_NONBLOCK);
1126 if (ret == -1) {
1127 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1128 goto error_fatal;
1129 }
1130 DBG("Channel monitor pipe set as non-blocking");
1131 } else {
1132 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1133 }
1134 ret = consumer_send_status_msg(sock, ret_code);
1135 if (ret < 0) {
1136 goto error_fatal;
1137 }
1138 break;
1139 }
1140 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1141 {
1142 struct lttng_consumer_channel *channel;
1143 uint64_t key = msg.u.rotate_channel.key;
1144
1145 DBG("Consumer rotate channel %" PRIu64, key);
1146
1147 channel = consumer_find_channel(key);
1148 if (!channel) {
1149 ERR("Channel %" PRIu64 " not found", key);
1150 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1151 } else {
1152 /*
1153 * Sample the rotate position of all the streams in this channel.
1154 */
1155 ret = lttng_consumer_rotate_channel(channel, key,
1156 msg.u.rotate_channel.relayd_id,
1157 msg.u.rotate_channel.metadata,
1158 ctx);
1159 if (ret < 0) {
1160 ERR("Rotate channel failed");
1161 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1162 }
1163
1164 health_code_update();
1165 }
1166 ret = consumer_send_status_msg(sock, ret_code);
1167 if (ret < 0) {
1168 /* Somehow, the session daemon is not responding anymore. */
1169 goto error_rotate_channel;
1170 }
1171 if (channel) {
1172 /* Rotate the streams that are ready right now. */
1173 ret = lttng_consumer_rotate_ready_streams(
1174 channel, key, ctx);
1175 if (ret < 0) {
1176 ERR("Rotate ready streams failed");
1177 }
1178 }
1179 break;
1180 error_rotate_channel:
1181 goto end_nosignal;
1182 }
1183 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1184 {
1185 struct lttng_consumer_channel *channel;
1186 uint64_t key = msg.u.clear_channel.key;
1187
1188 channel = consumer_find_channel(key);
1189 if (!channel) {
1190 DBG("Channel %" PRIu64 " not found", key);
1191 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1192 } else {
1193 ret = lttng_consumer_clear_channel(channel);
1194 if (ret) {
1195 ERR("Clear channel failed");
1196 ret_code = ret;
1197 }
1198
1199 health_code_update();
1200 }
1201 ret = consumer_send_status_msg(sock, ret_code);
1202 if (ret < 0) {
1203 /* Somehow, the session daemon is not responding anymore. */
1204 goto end_nosignal;
1205 }
1206
1207 break;
1208 }
1209 case LTTNG_CONSUMER_INIT:
1210 {
1211 ret_code = lttng_consumer_init_command(ctx,
1212 msg.u.init.sessiond_uuid);
1213 health_code_update();
1214 ret = consumer_send_status_msg(sock, ret_code);
1215 if (ret < 0) {
1216 /* Somehow, the session daemon is not responding anymore. */
1217 goto end_nosignal;
1218 }
1219 break;
1220 }
1221 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1222 {
1223 const struct lttng_credentials credentials = {
1224 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1225 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1226 };
1227 const bool is_local_trace =
1228 !msg.u.create_trace_chunk.relayd_id.is_set;
1229 const uint64_t relayd_id =
1230 msg.u.create_trace_chunk.relayd_id.value;
1231 const char *chunk_override_name =
1232 *msg.u.create_trace_chunk.override_name ?
1233 msg.u.create_trace_chunk.override_name :
1234 NULL;
1235 struct lttng_directory_handle *chunk_directory_handle = NULL;
1236
1237 /*
1238 * The session daemon will only provide a chunk directory file
1239 * descriptor for local traces.
1240 */
1241 if (is_local_trace) {
1242 int chunk_dirfd;
1243
1244 /* Acnowledge the reception of the command. */
1245 ret = consumer_send_status_msg(sock,
1246 LTTCOMM_CONSUMERD_SUCCESS);
1247 if (ret < 0) {
1248 /* Somehow, the session daemon is not responding anymore. */
1249 goto end_nosignal;
1250 }
1251
1252 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1253 if (ret != sizeof(chunk_dirfd)) {
1254 ERR("Failed to receive trace chunk directory file descriptor");
1255 goto error_fatal;
1256 }
1257
1258 DBG("Received trace chunk directory fd (%d)",
1259 chunk_dirfd);
1260 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1261 chunk_dirfd);
1262 if (!chunk_directory_handle) {
1263 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1264 if (close(chunk_dirfd)) {
1265 PERROR("Failed to close chunk directory file descriptor");
1266 }
1267 goto error_fatal;
1268 }
1269 }
1270
1271 ret_code = lttng_consumer_create_trace_chunk(
1272 !is_local_trace ? &relayd_id : NULL,
1273 msg.u.create_trace_chunk.session_id,
1274 msg.u.create_trace_chunk.chunk_id,
1275 (time_t) msg.u.create_trace_chunk
1276 .creation_timestamp,
1277 chunk_override_name,
1278 msg.u.create_trace_chunk.credentials.is_set ?
1279 &credentials :
1280 NULL,
1281 chunk_directory_handle);
1282 lttng_directory_handle_put(chunk_directory_handle);
1283 goto end_msg_sessiond;
1284 }
1285 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1286 {
1287 enum lttng_trace_chunk_command_type close_command =
1288 msg.u.close_trace_chunk.close_command.value;
1289 const uint64_t relayd_id =
1290 msg.u.close_trace_chunk.relayd_id.value;
1291 struct lttcomm_consumer_close_trace_chunk_reply reply;
1292 char path[LTTNG_PATH_MAX];
1293
1294 ret_code = lttng_consumer_close_trace_chunk(
1295 msg.u.close_trace_chunk.relayd_id.is_set ?
1296 &relayd_id :
1297 NULL,
1298 msg.u.close_trace_chunk.session_id,
1299 msg.u.close_trace_chunk.chunk_id,
1300 (time_t) msg.u.close_trace_chunk.close_timestamp,
1301 msg.u.close_trace_chunk.close_command.is_set ?
1302 &close_command :
1303 NULL, path);
1304 reply.ret_code = ret_code;
1305 reply.path_length = strlen(path) + 1;
1306 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1307 if (ret != sizeof(reply)) {
1308 goto error_fatal;
1309 }
1310 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1311 if (ret != reply.path_length) {
1312 goto error_fatal;
1313 }
1314 goto end_nosignal;
1315 }
1316 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1317 {
1318 const uint64_t relayd_id =
1319 msg.u.trace_chunk_exists.relayd_id.value;
1320
1321 ret_code = lttng_consumer_trace_chunk_exists(
1322 msg.u.trace_chunk_exists.relayd_id.is_set ?
1323 &relayd_id : NULL,
1324 msg.u.trace_chunk_exists.session_id,
1325 msg.u.trace_chunk_exists.chunk_id);
1326 goto end_msg_sessiond;
1327 }
1328 default:
1329 goto end_nosignal;
1330 }
1331
1332 end_nosignal:
1333 /*
1334 * Return 1 to indicate success since the 0 value can be a socket
1335 * shutdown during the recv() or send() call.
1336 */
1337 ret = 1;
1338 goto end;
1339 error_fatal:
1340 /* This will issue a consumer stop. */
1341 ret = -1;
1342 goto end;
1343 end_msg_sessiond:
1344 /*
1345 * The returned value here is not useful since either way we'll return 1 to
1346 * the caller because the session daemon socket management is done
1347 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1348 */
1349 ret = consumer_send_status_msg(sock, ret_code);
1350 if (ret < 0) {
1351 goto error_fatal;
1352 }
1353 ret = 1;
1354 end:
1355 health_code_update();
1356 rcu_read_unlock();
1357 return ret;
1358 }
1359
1360 /*
1361 * Populate index values of a kernel stream. Values are set in big endian order.
1362 *
1363 * Return 0 on success or else a negative value.
1364 */
1365 static int get_index_values(struct ctf_packet_index *index, int infd)
1366 {
1367 int ret;
1368 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1369 events_discarded, stream_id, stream_instance_id,
1370 packet_seq_num;
1371
1372 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1373 if (ret < 0) {
1374 PERROR("kernctl_get_timestamp_begin");
1375 goto error;
1376 }
1377
1378 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1379 if (ret < 0) {
1380 PERROR("kernctl_get_timestamp_end");
1381 goto error;
1382 }
1383
1384 ret = kernctl_get_events_discarded(infd, &events_discarded);
1385 if (ret < 0) {
1386 PERROR("kernctl_get_events_discarded");
1387 goto error;
1388 }
1389
1390 ret = kernctl_get_content_size(infd, &content_size);
1391 if (ret < 0) {
1392 PERROR("kernctl_get_content_size");
1393 goto error;
1394 }
1395
1396 ret = kernctl_get_packet_size(infd, &packet_size);
1397 if (ret < 0) {
1398 PERROR("kernctl_get_packet_size");
1399 goto error;
1400 }
1401
1402 ret = kernctl_get_stream_id(infd, &stream_id);
1403 if (ret < 0) {
1404 PERROR("kernctl_get_stream_id");
1405 goto error;
1406 }
1407
1408 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1409 if (ret < 0) {
1410 if (ret == -ENOTTY) {
1411 /* Command not implemented by lttng-modules. */
1412 stream_instance_id = -1ULL;
1413 } else {
1414 PERROR("kernctl_get_instance_id");
1415 goto error;
1416 }
1417 }
1418
1419 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1420 if (ret < 0) {
1421 if (ret == -ENOTTY) {
1422 /* Command not implemented by lttng-modules. */
1423 packet_seq_num = -1ULL;
1424 ret = 0;
1425 } else {
1426 PERROR("kernctl_get_sequence_number");
1427 goto error;
1428 }
1429 }
1430 index->packet_seq_num = htobe64(index->packet_seq_num);
1431
1432 *index = (typeof(*index)) {
1433 .offset = index->offset,
1434 .packet_size = htobe64(packet_size),
1435 .content_size = htobe64(content_size),
1436 .timestamp_begin = htobe64(timestamp_begin),
1437 .timestamp_end = htobe64(timestamp_end),
1438 .events_discarded = htobe64(events_discarded),
1439 .stream_id = htobe64(stream_id),
1440 .stream_instance_id = htobe64(stream_instance_id),
1441 .packet_seq_num = htobe64(packet_seq_num),
1442 };
1443
1444 error:
1445 return ret;
1446 }
1447 /*
1448 * Sync metadata meaning request them to the session daemon and snapshot to the
1449 * metadata thread can consumer them.
1450 *
1451 * Metadata stream lock MUST be acquired.
1452 *
1453 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1454 * is empty or a negative value on error.
1455 */
1456 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1457 {
1458 int ret;
1459
1460 assert(metadata);
1461
1462 ret = kernctl_buffer_flush(metadata->wait_fd);
1463 if (ret < 0) {
1464 ERR("Failed to flush kernel stream");
1465 goto end;
1466 }
1467
1468 ret = kernctl_snapshot(metadata->wait_fd);
1469 if (ret < 0) {
1470 if (ret != -EAGAIN) {
1471 ERR("Sync metadata, taking kernel snapshot failed.");
1472 goto end;
1473 }
1474 DBG("Sync metadata, no new kernel metadata");
1475 /* No new metadata, exit. */
1476 ret = ENODATA;
1477 goto end;
1478 }
1479
1480 end:
1481 return ret;
1482 }
1483
1484 static
1485 int update_stream_stats(struct lttng_consumer_stream *stream)
1486 {
1487 int ret;
1488 uint64_t seq, discarded;
1489
1490 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1491 if (ret < 0) {
1492 if (ret == -ENOTTY) {
1493 /* Command not implemented by lttng-modules. */
1494 seq = -1ULL;
1495 stream->sequence_number_unavailable = true;
1496 } else {
1497 PERROR("kernctl_get_sequence_number");
1498 goto end;
1499 }
1500 }
1501
1502 /*
1503 * Start the sequence when we extract the first packet in case we don't
1504 * start at 0 (for example if a consumer is not connected to the
1505 * session immediately after the beginning).
1506 */
1507 if (stream->last_sequence_number == -1ULL) {
1508 stream->last_sequence_number = seq;
1509 } else if (seq > stream->last_sequence_number) {
1510 stream->chan->lost_packets += seq -
1511 stream->last_sequence_number - 1;
1512 } else {
1513 /* seq <= last_sequence_number */
1514 ERR("Sequence number inconsistent : prev = %" PRIu64
1515 ", current = %" PRIu64,
1516 stream->last_sequence_number, seq);
1517 ret = -1;
1518 goto end;
1519 }
1520 stream->last_sequence_number = seq;
1521
1522 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1523 if (ret < 0) {
1524 PERROR("kernctl_get_events_discarded");
1525 goto end;
1526 }
1527 if (discarded < stream->last_discarded_events) {
1528 /*
1529 * Overflow has occurred. We assume only one wrap-around
1530 * has occurred.
1531 */
1532 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1533 stream->last_discarded_events + discarded;
1534 } else {
1535 stream->chan->discarded_events += discarded -
1536 stream->last_discarded_events;
1537 }
1538 stream->last_discarded_events = discarded;
1539 ret = 0;
1540
1541 end:
1542 return ret;
1543 }
1544
1545 /*
1546 * Check if the local version of the metadata stream matches with the version
1547 * of the metadata stream in the kernel. If it was updated, set the reset flag
1548 * on the stream.
1549 */
1550 static
1551 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1552 {
1553 int ret;
1554 uint64_t cur_version;
1555
1556 ret = kernctl_get_metadata_version(infd, &cur_version);
1557 if (ret < 0) {
1558 if (ret == -ENOTTY) {
1559 /*
1560 * LTTng-modules does not implement this
1561 * command.
1562 */
1563 ret = 0;
1564 goto end;
1565 }
1566 ERR("Failed to get the metadata version");
1567 goto end;
1568 }
1569
1570 if (stream->metadata_version == cur_version) {
1571 ret = 0;
1572 goto end;
1573 }
1574
1575 DBG("New metadata version detected");
1576 stream->metadata_version = cur_version;
1577 stream->reset_metadata_flag = 1;
1578 ret = 0;
1579
1580 end:
1581 return ret;
1582 }
1583
1584 /*
1585 * Consume data on a file descriptor and write it on a trace file.
1586 * The stream and channel locks must be held by the caller.
1587 */
1588 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1589 struct lttng_consumer_local_data *ctx)
1590 {
1591 unsigned long len, subbuf_size, padding;
1592 int err, write_index = 1, rotation_ret;
1593 ssize_t ret = 0;
1594 int infd = stream->wait_fd;
1595 struct ctf_packet_index index = {};
1596
1597 DBG("In read_subbuffer (infd : %d)", infd);
1598
1599 /*
1600 * If the stream was flagged to be ready for rotation before we extract the
1601 * next packet, rotate it now.
1602 */
1603 if (stream->rotate_ready) {
1604 DBG("Rotate stream before extracting data");
1605 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1606 if (rotation_ret < 0) {
1607 ERR("Stream rotation error");
1608 ret = -1;
1609 goto error;
1610 }
1611 }
1612
1613 /* Get the next subbuffer */
1614 err = kernctl_get_next_subbuf(infd);
1615 if (err != 0) {
1616 /*
1617 * This is a debug message even for single-threaded consumer,
1618 * because poll() have more relaxed criterions than get subbuf,
1619 * so get_subbuf may fail for short race windows where poll()
1620 * would issue wakeups.
1621 */
1622 DBG("Reserving sub buffer failed (everything is normal, "
1623 "it is due to concurrency)");
1624 ret = err;
1625 goto error;
1626 }
1627
1628 /* Get the full subbuffer size including padding */
1629 err = kernctl_get_padded_subbuf_size(infd, &len);
1630 if (err != 0) {
1631 PERROR("Getting sub-buffer len failed.");
1632 err = kernctl_put_subbuf(infd);
1633 if (err != 0) {
1634 if (err == -EFAULT) {
1635 PERROR("Error in unreserving sub buffer\n");
1636 } else if (err == -EIO) {
1637 /* Should never happen with newer LTTng versions */
1638 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1639 }
1640 ret = err;
1641 goto error;
1642 }
1643 ret = err;
1644 goto error;
1645 }
1646
1647 if (!stream->metadata_flag) {
1648 ret = get_index_values(&index, infd);
1649 if (ret < 0) {
1650 err = kernctl_put_subbuf(infd);
1651 if (err != 0) {
1652 if (err == -EFAULT) {
1653 PERROR("Error in unreserving sub buffer\n");
1654 } else if (err == -EIO) {
1655 /* Should never happen with newer LTTng versions */
1656 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1657 }
1658 ret = err;
1659 goto error;
1660 }
1661 goto error;
1662 }
1663 ret = update_stream_stats(stream);
1664 if (ret < 0) {
1665 err = kernctl_put_subbuf(infd);
1666 if (err != 0) {
1667 if (err == -EFAULT) {
1668 PERROR("Error in unreserving sub buffer\n");
1669 } else if (err == -EIO) {
1670 /* Should never happen with newer LTTng versions */
1671 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1672 }
1673 ret = err;
1674 goto error;
1675 }
1676 goto error;
1677 }
1678 } else {
1679 write_index = 0;
1680 ret = metadata_stream_check_version(infd, stream);
1681 if (ret < 0) {
1682 err = kernctl_put_subbuf(infd);
1683 if (err != 0) {
1684 if (err == -EFAULT) {
1685 PERROR("Error in unreserving sub buffer\n");
1686 } else if (err == -EIO) {
1687 /* Should never happen with newer LTTng versions */
1688 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1689 }
1690 ret = err;
1691 goto error;
1692 }
1693 goto error;
1694 }
1695 }
1696
1697 switch (stream->chan->output) {
1698 case CONSUMER_CHANNEL_SPLICE:
1699 /*
1700 * XXX: The lttng-modules splice "actor" does not handle copying
1701 * partial pages hence only using the subbuffer size without the
1702 * padding makes the splice fail.
1703 */
1704 subbuf_size = len;
1705 padding = 0;
1706
1707 /* splice the subbuffer to the tracefile */
1708 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1709 padding, &index);
1710 /*
1711 * XXX: Splice does not support network streaming so the return value
1712 * is simply checked against subbuf_size and not like the mmap() op.
1713 */
1714 if (ret != subbuf_size) {
1715 /*
1716 * display the error but continue processing to try
1717 * to release the subbuffer
1718 */
1719 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1720 ret, subbuf_size);
1721 write_index = 0;
1722 }
1723 break;
1724 case CONSUMER_CHANNEL_MMAP:
1725 {
1726 const char *subbuf_addr;
1727 struct lttng_buffer_view subbuf_view;
1728
1729 /* Get subbuffer size without padding */
1730 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1731 if (err != 0) {
1732 PERROR("Getting sub-buffer len failed.");
1733 err = kernctl_put_subbuf(infd);
1734 if (err != 0) {
1735 if (err == -EFAULT) {
1736 PERROR("Error in unreserving sub buffer\n");
1737 } else if (err == -EIO) {
1738 /* Should never happen with newer LTTng versions */
1739 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1740 }
1741 ret = err;
1742 goto error;
1743 }
1744 ret = err;
1745 goto error;
1746 }
1747
1748 ret = get_current_subbuf_addr(stream, &subbuf_addr);
1749 if (ret) {
1750 goto error_put_subbuf;
1751 }
1752
1753 /* Make sure the tracer is not gone mad on us! */
1754 assert(len >= subbuf_size);
1755
1756 padding = len - subbuf_size;
1757
1758 subbuf_view = lttng_buffer_view_init(subbuf_addr, 0, len);
1759
1760 /* write the subbuffer to the tracefile */
1761 ret = lttng_consumer_on_read_subbuffer_mmap(
1762 ctx, stream, &subbuf_view, padding, &index);
1763 /*
1764 * The mmap operation should write subbuf_size amount of data
1765 * when network streaming or the full padding (len) size when we
1766 * are _not_ streaming.
1767 */
1768 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1769 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1770 /*
1771 * Display the error but continue processing to try to release the
1772 * subbuffer. This is a DBG statement since this is possible to
1773 * happen without being a critical error.
1774 */
1775 DBG("Error writing to tracefile "
1776 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1777 ret, len, subbuf_size);
1778 write_index = 0;
1779 }
1780 break;
1781 }
1782 default:
1783 ERR("Unknown output method");
1784 ret = -EPERM;
1785 }
1786 error_put_subbuf:
1787 err = kernctl_put_next_subbuf(infd);
1788 if (err != 0) {
1789 if (err == -EFAULT) {
1790 PERROR("Error in unreserving sub buffer\n");
1791 } else if (err == -EIO) {
1792 /* Should never happen with newer LTTng versions */
1793 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1794 }
1795 ret = err;
1796 goto error;
1797 }
1798
1799 /* Write index if needed. */
1800 if (!write_index) {
1801 goto rotate;
1802 }
1803
1804 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1805 /*
1806 * In live, block until all the metadata is sent.
1807 */
1808 pthread_mutex_lock(&stream->metadata_timer_lock);
1809 assert(!stream->missed_metadata_flush);
1810 stream->waiting_on_metadata = true;
1811 pthread_mutex_unlock(&stream->metadata_timer_lock);
1812
1813 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1814
1815 pthread_mutex_lock(&stream->metadata_timer_lock);
1816 stream->waiting_on_metadata = false;
1817 if (stream->missed_metadata_flush) {
1818 stream->missed_metadata_flush = false;
1819 pthread_mutex_unlock(&stream->metadata_timer_lock);
1820 (void) consumer_flush_kernel_index(stream);
1821 } else {
1822 pthread_mutex_unlock(&stream->metadata_timer_lock);
1823 }
1824 if (err < 0) {
1825 goto error;
1826 }
1827 }
1828
1829 err = consumer_stream_write_index(stream, &index);
1830 if (err < 0) {
1831 goto error;
1832 }
1833
1834 rotate:
1835 /*
1836 * After extracting the packet, we check if the stream is now ready to be
1837 * rotated and perform the action immediately.
1838 */
1839 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1840 if (rotation_ret == 1) {
1841 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1842 if (rotation_ret < 0) {
1843 ERR("Stream rotation error");
1844 ret = -1;
1845 goto error;
1846 }
1847 } else if (rotation_ret < 0) {
1848 ERR("Checking if stream is ready to rotate");
1849 ret = -1;
1850 goto error;
1851 }
1852
1853 error:
1854 return ret;
1855 }
1856
1857 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1858 {
1859 int ret;
1860
1861 assert(stream);
1862
1863 /*
1864 * Don't create anything if this is set for streaming or if there is
1865 * no current trace chunk on the parent channel.
1866 */
1867 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1868 stream->chan->trace_chunk) {
1869 ret = consumer_stream_create_output_files(stream, true);
1870 if (ret) {
1871 goto error;
1872 }
1873 }
1874
1875 if (stream->output == LTTNG_EVENT_MMAP) {
1876 /* get the len of the mmap region */
1877 unsigned long mmap_len;
1878
1879 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1880 if (ret != 0) {
1881 PERROR("kernctl_get_mmap_len");
1882 goto error_close_fd;
1883 }
1884 stream->mmap_len = (size_t) mmap_len;
1885
1886 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1887 MAP_PRIVATE, stream->wait_fd, 0);
1888 if (stream->mmap_base == MAP_FAILED) {
1889 PERROR("Error mmaping");
1890 ret = -1;
1891 goto error_close_fd;
1892 }
1893 }
1894
1895 /* we return 0 to let the library handle the FD internally */
1896 return 0;
1897
1898 error_close_fd:
1899 if (stream->out_fd >= 0) {
1900 int err;
1901
1902 err = close(stream->out_fd);
1903 assert(!err);
1904 stream->out_fd = -1;
1905 }
1906 error:
1907 return ret;
1908 }
1909
1910 /*
1911 * Check if data is still being extracted from the buffers for a specific
1912 * stream. Consumer data lock MUST be acquired before calling this function
1913 * and the stream lock.
1914 *
1915 * Return 1 if the traced data are still getting read else 0 meaning that the
1916 * data is available for trace viewer reading.
1917 */
1918 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1919 {
1920 int ret;
1921
1922 assert(stream);
1923
1924 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1925 ret = 0;
1926 goto end;
1927 }
1928
1929 ret = kernctl_get_next_subbuf(stream->wait_fd);
1930 if (ret == 0) {
1931 /* There is still data so let's put back this subbuffer. */
1932 ret = kernctl_put_subbuf(stream->wait_fd);
1933 assert(ret == 0);
1934 ret = 1; /* Data is pending */
1935 goto end;
1936 }
1937
1938 /* Data is NOT pending and ready to be read. */
1939 ret = 0;
1940
1941 end:
1942 return ret;
1943 }
This page took 0.107767 seconds and 5 git commands to generate.