Fix: consumerd: use-after-free of metadata bucket
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.cpp
1 /*
2 * Copyright (C) 2011 EfficiOS Inc.
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #define _LGPL_SOURCE
11 #include <poll.h>
12 #include <pthread.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/mman.h>
16 #include <sys/socket.h>
17 #include <sys/types.h>
18 #include <inttypes.h>
19 #include <unistd.h>
20 #include <sys/stat.h>
21 #include <stdint.h>
22
23 #include <bin/lttng-consumerd/health-consumerd.h>
24 #include <common/common.h>
25 #include <common/kernel-ctl/kernel-ctl.h>
26 #include <common/sessiond-comm/sessiond-comm.h>
27 #include <common/sessiond-comm/relayd.h>
28 #include <common/compat/fcntl.h>
29 #include <common/compat/endian.h>
30 #include <common/pipe.h>
31 #include <common/relayd/relayd.h>
32 #include <common/utils.h>
33 #include <common/consumer/consumer-stream.h>
34 #include <common/index/index.h>
35 #include <common/consumer/consumer-timer.h>
36 #include <common/optional.h>
37 #include <common/buffer-view.h>
38 #include <common/consumer/consumer.h>
39 #include <common/consumer/metadata-bucket.h>
40
41 #include "kernel-consumer.h"
42
43 extern struct lttng_consumer_global_data the_consumer_data;
44 extern int consumer_poll_timeout;
45
46 /*
47 * Take a snapshot for a specific fd
48 *
49 * Returns 0 on success, < 0 on error
50 */
51 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
52 {
53 int ret = 0;
54 int infd = stream->wait_fd;
55
56 ret = kernctl_snapshot(infd);
57 /*
58 * -EAGAIN is not an error, it just means that there is no data to
59 * be read.
60 */
61 if (ret != 0 && ret != -EAGAIN) {
62 PERROR("Getting sub-buffer snapshot.");
63 }
64
65 return ret;
66 }
67
68 /*
69 * Sample consumed and produced positions for a specific fd.
70 *
71 * Returns 0 on success, < 0 on error.
72 */
73 int lttng_kconsumer_sample_snapshot_positions(
74 struct lttng_consumer_stream *stream)
75 {
76 LTTNG_ASSERT(stream);
77
78 return kernctl_snapshot_sample_positions(stream->wait_fd);
79 }
80
81 /*
82 * Get the produced position
83 *
84 * Returns 0 on success, < 0 on error
85 */
86 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
87 unsigned long *pos)
88 {
89 int ret;
90 int infd = stream->wait_fd;
91
92 ret = kernctl_snapshot_get_produced(infd, pos);
93 if (ret != 0) {
94 PERROR("kernctl_snapshot_get_produced");
95 }
96
97 return ret;
98 }
99
100 /*
101 * Get the consumerd position
102 *
103 * Returns 0 on success, < 0 on error
104 */
105 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
106 unsigned long *pos)
107 {
108 int ret;
109 int infd = stream->wait_fd;
110
111 ret = kernctl_snapshot_get_consumed(infd, pos);
112 if (ret != 0) {
113 PERROR("kernctl_snapshot_get_consumed");
114 }
115
116 return ret;
117 }
118
119 static
120 int get_current_subbuf_addr(struct lttng_consumer_stream *stream,
121 const char **addr)
122 {
123 int ret;
124 unsigned long mmap_offset;
125 const char *mmap_base = (const char *) stream->mmap_base;
126
127 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
128 if (ret < 0) {
129 PERROR("Failed to get mmap read offset");
130 goto error;
131 }
132
133 *addr = mmap_base + mmap_offset;
134 error:
135 return ret;
136 }
137
138 /*
139 * Take a snapshot of all the stream of a channel
140 * RCU read-side lock must be held across this function to ensure existence of
141 * channel.
142 *
143 * Returns 0 on success, < 0 on error
144 */
145 static int lttng_kconsumer_snapshot_channel(
146 struct lttng_consumer_channel *channel,
147 uint64_t key, char *path, uint64_t relayd_id,
148 uint64_t nb_packets_per_stream,
149 struct lttng_consumer_local_data *ctx)
150 {
151 int ret;
152 struct lttng_consumer_stream *stream;
153
154 DBG("Kernel consumer snapshot channel %" PRIu64, key);
155
156 /* Prevent channel modifications while we perform the snapshot.*/
157 pthread_mutex_lock(&channel->lock);
158
159 rcu_read_lock();
160
161 /* Splice is not supported yet for channel snapshot. */
162 if (channel->output != CONSUMER_CHANNEL_MMAP) {
163 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
164 channel->name);
165 ret = -1;
166 goto end;
167 }
168
169 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
170 unsigned long consumed_pos, produced_pos;
171
172 health_code_update();
173
174 /*
175 * Lock stream because we are about to change its state.
176 */
177 pthread_mutex_lock(&stream->lock);
178
179 LTTNG_ASSERT(channel->trace_chunk);
180 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
181 /*
182 * Can't happen barring an internal error as the channel
183 * holds a reference to the trace chunk.
184 */
185 ERR("Failed to acquire reference to channel's trace chunk");
186 ret = -1;
187 goto end_unlock;
188 }
189 LTTNG_ASSERT(!stream->trace_chunk);
190 stream->trace_chunk = channel->trace_chunk;
191
192 /*
193 * Assign the received relayd ID so we can use it for streaming. The streams
194 * are not visible to anyone so this is OK to change it.
195 */
196 stream->net_seq_idx = relayd_id;
197 channel->relayd_id = relayd_id;
198 if (relayd_id != (uint64_t) -1ULL) {
199 ret = consumer_send_relayd_stream(stream, path);
200 if (ret < 0) {
201 ERR("sending stream to relayd");
202 goto end_unlock;
203 }
204 } else {
205 ret = consumer_stream_create_output_files(stream,
206 false);
207 if (ret < 0) {
208 goto end_unlock;
209 }
210 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
211 stream->key);
212 }
213
214 ret = kernctl_buffer_flush_empty(stream->wait_fd);
215 if (ret < 0) {
216 /*
217 * Doing a buffer flush which does not take into
218 * account empty packets. This is not perfect
219 * for stream intersection, but required as a
220 * fall-back when "flush_empty" is not
221 * implemented by lttng-modules.
222 */
223 ret = kernctl_buffer_flush(stream->wait_fd);
224 if (ret < 0) {
225 ERR("Failed to flush kernel stream");
226 goto end_unlock;
227 }
228 goto end_unlock;
229 }
230
231 ret = lttng_kconsumer_take_snapshot(stream);
232 if (ret < 0) {
233 ERR("Taking kernel snapshot");
234 goto end_unlock;
235 }
236
237 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
238 if (ret < 0) {
239 ERR("Produced kernel snapshot position");
240 goto end_unlock;
241 }
242
243 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
244 if (ret < 0) {
245 ERR("Consumerd kernel snapshot position");
246 goto end_unlock;
247 }
248
249 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
250 produced_pos, nb_packets_per_stream,
251 stream->max_sb_size);
252
253 while ((long) (consumed_pos - produced_pos) < 0) {
254 ssize_t read_len;
255 unsigned long len, padded_len;
256 const char *subbuf_addr;
257 struct lttng_buffer_view subbuf_view;
258
259 health_code_update();
260 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
261
262 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
263 if (ret < 0) {
264 if (ret != -EAGAIN) {
265 PERROR("kernctl_get_subbuf snapshot");
266 goto end_unlock;
267 }
268 DBG("Kernel consumer get subbuf failed. Skipping it.");
269 consumed_pos += stream->max_sb_size;
270 stream->chan->lost_packets++;
271 continue;
272 }
273
274 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
275 if (ret < 0) {
276 ERR("Snapshot kernctl_get_subbuf_size");
277 goto error_put_subbuf;
278 }
279
280 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
281 if (ret < 0) {
282 ERR("Snapshot kernctl_get_padded_subbuf_size");
283 goto error_put_subbuf;
284 }
285
286 ret = get_current_subbuf_addr(stream, &subbuf_addr);
287 if (ret) {
288 goto error_put_subbuf;
289 }
290
291 subbuf_view = lttng_buffer_view_init(
292 subbuf_addr, 0, padded_len);
293 read_len = lttng_consumer_on_read_subbuffer_mmap(
294 stream, &subbuf_view,
295 padded_len - len);
296 /*
297 * We write the padded len in local tracefiles but the data len
298 * when using a relay. Display the error but continue processing
299 * to try to release the subbuffer.
300 */
301 if (relayd_id != (uint64_t) -1ULL) {
302 if (read_len != len) {
303 ERR("Error sending to the relay (ret: %zd != len: %lu)",
304 read_len, len);
305 }
306 } else {
307 if (read_len != padded_len) {
308 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
309 read_len, padded_len);
310 }
311 }
312
313 ret = kernctl_put_subbuf(stream->wait_fd);
314 if (ret < 0) {
315 ERR("Snapshot kernctl_put_subbuf");
316 goto end_unlock;
317 }
318 consumed_pos += stream->max_sb_size;
319 }
320
321 if (relayd_id == (uint64_t) -1ULL) {
322 if (stream->out_fd >= 0) {
323 ret = close(stream->out_fd);
324 if (ret < 0) {
325 PERROR("Kernel consumer snapshot close out_fd");
326 goto end_unlock;
327 }
328 stream->out_fd = -1;
329 }
330 } else {
331 close_relayd_stream(stream);
332 stream->net_seq_idx = (uint64_t) -1ULL;
333 }
334 lttng_trace_chunk_put(stream->trace_chunk);
335 stream->trace_chunk = NULL;
336 pthread_mutex_unlock(&stream->lock);
337 }
338
339 /* All good! */
340 ret = 0;
341 goto end;
342
343 error_put_subbuf:
344 ret = kernctl_put_subbuf(stream->wait_fd);
345 if (ret < 0) {
346 ERR("Snapshot kernctl_put_subbuf error path");
347 }
348 end_unlock:
349 pthread_mutex_unlock(&stream->lock);
350 end:
351 rcu_read_unlock();
352 pthread_mutex_unlock(&channel->lock);
353 return ret;
354 }
355
356 /*
357 * Read the whole metadata available for a snapshot.
358 * RCU read-side lock must be held across this function to ensure existence of
359 * metadata_channel.
360 *
361 * Returns 0 on success, < 0 on error
362 */
363 static int lttng_kconsumer_snapshot_metadata(
364 struct lttng_consumer_channel *metadata_channel,
365 uint64_t key, char *path, uint64_t relayd_id,
366 struct lttng_consumer_local_data *ctx)
367 {
368 int ret, use_relayd = 0;
369 ssize_t ret_read;
370 struct lttng_consumer_stream *metadata_stream;
371
372 LTTNG_ASSERT(ctx);
373
374 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
375 key, path);
376
377 rcu_read_lock();
378
379 metadata_stream = metadata_channel->metadata_stream;
380 LTTNG_ASSERT(metadata_stream);
381
382 metadata_stream->read_subbuffer_ops.lock(metadata_stream);
383 LTTNG_ASSERT(metadata_channel->trace_chunk);
384 LTTNG_ASSERT(metadata_stream->trace_chunk);
385
386 /* Flag once that we have a valid relayd for the stream. */
387 if (relayd_id != (uint64_t) -1ULL) {
388 use_relayd = 1;
389 }
390
391 if (use_relayd) {
392 ret = consumer_send_relayd_stream(metadata_stream, path);
393 if (ret < 0) {
394 goto error_snapshot;
395 }
396 } else {
397 ret = consumer_stream_create_output_files(metadata_stream,
398 false);
399 if (ret < 0) {
400 goto error_snapshot;
401 }
402 }
403
404 do {
405 health_code_update();
406
407 ret_read = lttng_consumer_read_subbuffer(metadata_stream, ctx, true);
408 if (ret_read < 0) {
409 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
410 ret_read);
411 ret = ret_read;
412 goto error_snapshot;
413 }
414 } while (ret_read > 0);
415
416 if (use_relayd) {
417 close_relayd_stream(metadata_stream);
418 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
419 } else {
420 if (metadata_stream->out_fd >= 0) {
421 ret = close(metadata_stream->out_fd);
422 if (ret < 0) {
423 PERROR("Kernel consumer snapshot metadata close out_fd");
424 /*
425 * Don't go on error here since the snapshot was successful at this
426 * point but somehow the close failed.
427 */
428 }
429 metadata_stream->out_fd = -1;
430 lttng_trace_chunk_put(metadata_stream->trace_chunk);
431 metadata_stream->trace_chunk = NULL;
432 }
433 }
434
435 ret = 0;
436 error_snapshot:
437 metadata_stream->read_subbuffer_ops.unlock(metadata_stream);
438 consumer_stream_destroy(metadata_stream, NULL);
439 metadata_channel->metadata_stream = NULL;
440 rcu_read_unlock();
441 return ret;
442 }
443
444 /*
445 * Receive command from session daemon and process it.
446 *
447 * Return 1 on success else a negative value or 0.
448 */
449 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
450 int sock, struct pollfd *consumer_sockpoll)
451 {
452 int ret_func;
453 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
454 struct lttcomm_consumer_msg msg;
455
456 health_code_update();
457
458 {
459 ssize_t ret_recv;
460
461 ret_recv = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
462 if (ret_recv != sizeof(msg)) {
463 if (ret_recv > 0) {
464 lttng_consumer_send_error(ctx,
465 LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
466 ret_recv = -1;
467 }
468 return ret_recv;
469 }
470 }
471
472 health_code_update();
473
474 /* Deprecated command */
475 LTTNG_ASSERT(msg.cmd_type != LTTNG_CONSUMER_STOP);
476
477 health_code_update();
478
479 /* relayd needs RCU read-side protection */
480 rcu_read_lock();
481
482 switch (msg.cmd_type) {
483 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
484 {
485 uint32_t major = msg.u.relayd_sock.major;
486 uint32_t minor = msg.u.relayd_sock.minor;
487 enum lttcomm_sock_proto protocol = (enum lttcomm_sock_proto)
488 msg.u.relayd_sock.relayd_socket_protocol;
489
490 /* Session daemon status message are handled in the following call. */
491 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
492 msg.u.relayd_sock.type, ctx, sock,
493 consumer_sockpoll, msg.u.relayd_sock.session_id,
494 msg.u.relayd_sock.relayd_session_id, major,
495 minor, protocol);
496 goto end_nosignal;
497 }
498 case LTTNG_CONSUMER_ADD_CHANNEL:
499 {
500 struct lttng_consumer_channel *new_channel;
501 int ret_send_status, ret_add_channel = 0;
502 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
503
504 health_code_update();
505
506 /* First send a status message before receiving the fds. */
507 ret_send_status = consumer_send_status_msg(sock, ret_code);
508 if (ret_send_status < 0) {
509 /* Somehow, the session daemon is not responding anymore. */
510 goto error_fatal;
511 }
512
513 health_code_update();
514
515 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
516 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
517 msg.u.channel.session_id,
518 msg.u.channel.chunk_id.is_set ?
519 &chunk_id : NULL,
520 msg.u.channel.pathname,
521 msg.u.channel.name,
522 msg.u.channel.relayd_id, msg.u.channel.output,
523 msg.u.channel.tracefile_size,
524 msg.u.channel.tracefile_count, 0,
525 msg.u.channel.monitor,
526 msg.u.channel.live_timer_interval,
527 msg.u.channel.is_live,
528 NULL, NULL);
529 if (new_channel == NULL) {
530 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
531 goto end_nosignal;
532 }
533 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
534 switch (msg.u.channel.output) {
535 case LTTNG_EVENT_SPLICE:
536 new_channel->output = CONSUMER_CHANNEL_SPLICE;
537 break;
538 case LTTNG_EVENT_MMAP:
539 new_channel->output = CONSUMER_CHANNEL_MMAP;
540 break;
541 default:
542 ERR("Channel output unknown %d", msg.u.channel.output);
543 goto end_nosignal;
544 }
545
546 /* Translate and save channel type. */
547 switch (msg.u.channel.type) {
548 case CONSUMER_CHANNEL_TYPE_DATA:
549 case CONSUMER_CHANNEL_TYPE_METADATA:
550 new_channel->type = (consumer_channel_type) msg.u.channel.type;
551 break;
552 default:
553 abort();
554 goto end_nosignal;
555 };
556
557 health_code_update();
558
559 if (ctx->on_recv_channel != NULL) {
560 int ret_recv_channel =
561 ctx->on_recv_channel(new_channel);
562 if (ret_recv_channel == 0) {
563 ret_add_channel = consumer_add_channel(
564 new_channel, ctx);
565 } else if (ret_recv_channel < 0) {
566 goto end_nosignal;
567 }
568 } else {
569 ret_add_channel =
570 consumer_add_channel(new_channel, ctx);
571 }
572 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA &&
573 !ret_add_channel) {
574 int monitor_start_ret;
575
576 DBG("Consumer starting monitor timer");
577 consumer_timer_live_start(new_channel,
578 msg.u.channel.live_timer_interval);
579 monitor_start_ret = consumer_timer_monitor_start(
580 new_channel,
581 msg.u.channel.monitor_timer_interval);
582 if (monitor_start_ret < 0) {
583 ERR("Starting channel monitoring timer failed");
584 goto end_nosignal;
585 }
586 }
587
588 health_code_update();
589
590 /* If we received an error in add_channel, we need to report it. */
591 if (ret_add_channel < 0) {
592 ret_send_status = consumer_send_status_msg(
593 sock, ret_add_channel);
594 if (ret_send_status < 0) {
595 goto error_fatal;
596 }
597 goto end_nosignal;
598 }
599
600 goto end_nosignal;
601 }
602 case LTTNG_CONSUMER_ADD_STREAM:
603 {
604 int fd;
605 struct lttng_pipe *stream_pipe;
606 struct lttng_consumer_stream *new_stream;
607 struct lttng_consumer_channel *channel;
608 int alloc_ret = 0;
609 int ret_send_status, ret_poll, ret_get_max_subbuf_size;
610 ssize_t ret_pipe_write, ret_recv;
611
612 /*
613 * Get stream's channel reference. Needed when adding the stream to the
614 * global hash table.
615 */
616 channel = consumer_find_channel(msg.u.stream.channel_key);
617 if (!channel) {
618 /*
619 * We could not find the channel. Can happen if cpu hotplug
620 * happens while tearing down.
621 */
622 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
623 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
624 }
625
626 health_code_update();
627
628 /* First send a status message before receiving the fds. */
629 ret_send_status = consumer_send_status_msg(sock, ret_code);
630 if (ret_send_status < 0) {
631 /* Somehow, the session daemon is not responding anymore. */
632 goto error_add_stream_fatal;
633 }
634
635 health_code_update();
636
637 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
638 /* Channel was not found. */
639 goto error_add_stream_nosignal;
640 }
641
642 /* Blocking call */
643 health_poll_entry();
644 ret_poll = lttng_consumer_poll_socket(consumer_sockpoll);
645 health_poll_exit();
646 if (ret_poll) {
647 goto error_add_stream_fatal;
648 }
649
650 health_code_update();
651
652 /* Get stream file descriptor from socket */
653 ret_recv = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
654 if (ret_recv != sizeof(fd)) {
655 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
656 ret_func = ret_recv;
657 goto end;
658 }
659
660 health_code_update();
661
662 /*
663 * Send status code to session daemon only if the recv works. If the
664 * above recv() failed, the session daemon is notified through the
665 * error socket and the teardown is eventually done.
666 */
667 ret_send_status = consumer_send_status_msg(sock, ret_code);
668 if (ret_send_status < 0) {
669 /* Somehow, the session daemon is not responding anymore. */
670 goto error_add_stream_nosignal;
671 }
672
673 health_code_update();
674
675 pthread_mutex_lock(&channel->lock);
676 new_stream = consumer_stream_create(
677 channel,
678 channel->key,
679 fd,
680 channel->name,
681 channel->relayd_id,
682 channel->session_id,
683 channel->trace_chunk,
684 msg.u.stream.cpu,
685 &alloc_ret,
686 channel->type,
687 channel->monitor);
688 if (new_stream == NULL) {
689 switch (alloc_ret) {
690 case -ENOMEM:
691 case -EINVAL:
692 default:
693 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
694 break;
695 }
696 pthread_mutex_unlock(&channel->lock);
697 goto error_add_stream_nosignal;
698 }
699
700 new_stream->wait_fd = fd;
701 ret_get_max_subbuf_size = kernctl_get_max_subbuf_size(
702 new_stream->wait_fd, &new_stream->max_sb_size);
703 if (ret_get_max_subbuf_size < 0) {
704 pthread_mutex_unlock(&channel->lock);
705 ERR("Failed to get kernel maximal subbuffer size");
706 goto error_add_stream_nosignal;
707 }
708
709 consumer_stream_update_channel_attributes(new_stream,
710 channel);
711
712 /*
713 * We've just assigned the channel to the stream so increment the
714 * refcount right now. We don't need to increment the refcount for
715 * streams in no monitor because we handle manually the cleanup of
716 * those. It is very important to make sure there is NO prior
717 * consumer_del_stream() calls or else the refcount will be unbalanced.
718 */
719 if (channel->monitor) {
720 uatomic_inc(&new_stream->chan->refcount);
721 }
722
723 /*
724 * The buffer flush is done on the session daemon side for the kernel
725 * so no need for the stream "hangup_flush_done" variable to be
726 * tracked. This is important for a kernel stream since we don't rely
727 * on the flush state of the stream to read data. It's not the case for
728 * user space tracing.
729 */
730 new_stream->hangup_flush_done = 0;
731
732 health_code_update();
733
734 pthread_mutex_lock(&new_stream->lock);
735 if (ctx->on_recv_stream) {
736 int ret_recv_stream = ctx->on_recv_stream(new_stream);
737 if (ret_recv_stream < 0) {
738 pthread_mutex_unlock(&new_stream->lock);
739 pthread_mutex_unlock(&channel->lock);
740 consumer_stream_free(new_stream);
741 goto error_add_stream_nosignal;
742 }
743 }
744 health_code_update();
745
746 if (new_stream->metadata_flag) {
747 channel->metadata_stream = new_stream;
748 }
749
750 /* Do not monitor this stream. */
751 if (!channel->monitor) {
752 DBG("Kernel consumer add stream %s in no monitor mode with "
753 "relayd id %" PRIu64, new_stream->name,
754 new_stream->net_seq_idx);
755 cds_list_add(&new_stream->send_node, &channel->streams.head);
756 pthread_mutex_unlock(&new_stream->lock);
757 pthread_mutex_unlock(&channel->lock);
758 goto end_add_stream;
759 }
760
761 /* Send stream to relayd if the stream has an ID. */
762 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
763 int ret_send_relayd_stream;
764
765 ret_send_relayd_stream = consumer_send_relayd_stream(
766 new_stream, new_stream->chan->pathname);
767 if (ret_send_relayd_stream < 0) {
768 pthread_mutex_unlock(&new_stream->lock);
769 pthread_mutex_unlock(&channel->lock);
770 consumer_stream_free(new_stream);
771 goto error_add_stream_nosignal;
772 }
773
774 /*
775 * If adding an extra stream to an already
776 * existing channel (e.g. cpu hotplug), we need
777 * to send the "streams_sent" command to relayd.
778 */
779 if (channel->streams_sent_to_relayd) {
780 int ret_send_relayd_streams_sent;
781
782 ret_send_relayd_streams_sent =
783 consumer_send_relayd_streams_sent(
784 new_stream->net_seq_idx);
785 if (ret_send_relayd_streams_sent < 0) {
786 pthread_mutex_unlock(&new_stream->lock);
787 pthread_mutex_unlock(&channel->lock);
788 goto error_add_stream_nosignal;
789 }
790 }
791 }
792 pthread_mutex_unlock(&new_stream->lock);
793 pthread_mutex_unlock(&channel->lock);
794
795 /* Get the right pipe where the stream will be sent. */
796 if (new_stream->metadata_flag) {
797 consumer_add_metadata_stream(new_stream);
798 stream_pipe = ctx->consumer_metadata_pipe;
799 } else {
800 consumer_add_data_stream(new_stream);
801 stream_pipe = ctx->consumer_data_pipe;
802 }
803
804 /* Visible to other threads */
805 new_stream->globally_visible = 1;
806
807 health_code_update();
808
809 ret_pipe_write = lttng_pipe_write(
810 stream_pipe, &new_stream, sizeof(new_stream));
811 if (ret_pipe_write < 0) {
812 ERR("Consumer write %s stream to pipe %d",
813 new_stream->metadata_flag ? "metadata" : "data",
814 lttng_pipe_get_writefd(stream_pipe));
815 if (new_stream->metadata_flag) {
816 consumer_del_stream_for_metadata(new_stream);
817 } else {
818 consumer_del_stream_for_data(new_stream);
819 }
820 goto error_add_stream_nosignal;
821 }
822
823 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
824 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
825 end_add_stream:
826 break;
827 error_add_stream_nosignal:
828 goto end_nosignal;
829 error_add_stream_fatal:
830 goto error_fatal;
831 }
832 case LTTNG_CONSUMER_STREAMS_SENT:
833 {
834 struct lttng_consumer_channel *channel;
835 int ret_send_status;
836
837 /*
838 * Get stream's channel reference. Needed when adding the stream to the
839 * global hash table.
840 */
841 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
842 if (!channel) {
843 /*
844 * We could not find the channel. Can happen if cpu hotplug
845 * happens while tearing down.
846 */
847 ERR("Unable to find channel key %" PRIu64,
848 msg.u.sent_streams.channel_key);
849 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
850 }
851
852 health_code_update();
853
854 /*
855 * Send status code to session daemon.
856 */
857 ret_send_status = consumer_send_status_msg(sock, ret_code);
858 if (ret_send_status < 0 ||
859 ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
860 /* Somehow, the session daemon is not responding anymore. */
861 goto error_streams_sent_nosignal;
862 }
863
864 health_code_update();
865
866 /*
867 * We should not send this message if we don't monitor the
868 * streams in this channel.
869 */
870 if (!channel->monitor) {
871 goto end_error_streams_sent;
872 }
873
874 health_code_update();
875 /* Send stream to relayd if the stream has an ID. */
876 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
877 int ret_send_relay_streams;
878
879 ret_send_relay_streams = consumer_send_relayd_streams_sent(
880 msg.u.sent_streams.net_seq_idx);
881 if (ret_send_relay_streams < 0) {
882 goto error_streams_sent_nosignal;
883 }
884 channel->streams_sent_to_relayd = true;
885 }
886 end_error_streams_sent:
887 break;
888 error_streams_sent_nosignal:
889 goto end_nosignal;
890 }
891 case LTTNG_CONSUMER_UPDATE_STREAM:
892 {
893 rcu_read_unlock();
894 return -ENOSYS;
895 }
896 case LTTNG_CONSUMER_DESTROY_RELAYD:
897 {
898 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
899 struct consumer_relayd_sock_pair *relayd;
900 int ret_send_status;
901
902 DBG("Kernel consumer destroying relayd %" PRIu64, index);
903
904 /* Get relayd reference if exists. */
905 relayd = consumer_find_relayd(index);
906 if (relayd == NULL) {
907 DBG("Unable to find relayd %" PRIu64, index);
908 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
909 }
910
911 /*
912 * Each relayd socket pair has a refcount of stream attached to it
913 * which tells if the relayd is still active or not depending on the
914 * refcount value.
915 *
916 * This will set the destroy flag of the relayd object and destroy it
917 * if the refcount reaches zero when called.
918 *
919 * The destroy can happen either here or when a stream fd hangs up.
920 */
921 if (relayd) {
922 consumer_flag_relayd_for_destroy(relayd);
923 }
924
925 health_code_update();
926
927 ret_send_status = consumer_send_status_msg(sock, ret_code);
928 if (ret_send_status < 0) {
929 /* Somehow, the session daemon is not responding anymore. */
930 goto error_fatal;
931 }
932
933 goto end_nosignal;
934 }
935 case LTTNG_CONSUMER_DATA_PENDING:
936 {
937 int32_t ret_data_pending;
938 uint64_t id = msg.u.data_pending.session_id;
939 ssize_t ret_send;
940
941 DBG("Kernel consumer data pending command for id %" PRIu64, id);
942
943 ret_data_pending = consumer_data_pending(id);
944
945 health_code_update();
946
947 /* Send back returned value to session daemon */
948 ret_send = lttcomm_send_unix_sock(sock, &ret_data_pending,
949 sizeof(ret_data_pending));
950 if (ret_send < 0) {
951 PERROR("send data pending ret code");
952 goto error_fatal;
953 }
954
955 /*
956 * No need to send back a status message since the data pending
957 * returned value is the response.
958 */
959 break;
960 }
961 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
962 {
963 struct lttng_consumer_channel *channel;
964 uint64_t key = msg.u.snapshot_channel.key;
965 int ret_send_status;
966
967 channel = consumer_find_channel(key);
968 if (!channel) {
969 ERR("Channel %" PRIu64 " not found", key);
970 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
971 } else {
972 if (msg.u.snapshot_channel.metadata == 1) {
973 int ret_snapshot;
974
975 ret_snapshot = lttng_kconsumer_snapshot_metadata(
976 channel, key,
977 msg.u.snapshot_channel.pathname,
978 msg.u.snapshot_channel.relayd_id,
979 ctx);
980 if (ret_snapshot < 0) {
981 ERR("Snapshot metadata failed");
982 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
983 }
984 } else {
985 int ret_snapshot;
986
987 ret_snapshot = lttng_kconsumer_snapshot_channel(
988 channel, key,
989 msg.u.snapshot_channel.pathname,
990 msg.u.snapshot_channel.relayd_id,
991 msg.u.snapshot_channel
992 .nb_packets_per_stream,
993 ctx);
994 if (ret_snapshot < 0) {
995 ERR("Snapshot channel failed");
996 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
997 }
998 }
999 }
1000 health_code_update();
1001
1002 ret_send_status = consumer_send_status_msg(sock, ret_code);
1003 if (ret_send_status < 0) {
1004 /* Somehow, the session daemon is not responding anymore. */
1005 goto end_nosignal;
1006 }
1007 break;
1008 }
1009 case LTTNG_CONSUMER_DESTROY_CHANNEL:
1010 {
1011 uint64_t key = msg.u.destroy_channel.key;
1012 struct lttng_consumer_channel *channel;
1013 int ret_send_status;
1014
1015 channel = consumer_find_channel(key);
1016 if (!channel) {
1017 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
1018 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1019 }
1020
1021 health_code_update();
1022
1023 ret_send_status = consumer_send_status_msg(sock, ret_code);
1024 if (ret_send_status < 0) {
1025 /* Somehow, the session daemon is not responding anymore. */
1026 goto end_destroy_channel;
1027 }
1028
1029 health_code_update();
1030
1031 /* Stop right now if no channel was found. */
1032 if (!channel) {
1033 goto end_destroy_channel;
1034 }
1035
1036 /*
1037 * This command should ONLY be issued for channel with streams set in
1038 * no monitor mode.
1039 */
1040 LTTNG_ASSERT(!channel->monitor);
1041
1042 /*
1043 * The refcount should ALWAYS be 0 in the case of a channel in no
1044 * monitor mode.
1045 */
1046 LTTNG_ASSERT(!uatomic_sub_return(&channel->refcount, 1));
1047
1048 consumer_del_channel(channel);
1049 end_destroy_channel:
1050 goto end_nosignal;
1051 }
1052 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1053 {
1054 ssize_t ret;
1055 uint64_t count;
1056 struct lttng_consumer_channel *channel;
1057 uint64_t id = msg.u.discarded_events.session_id;
1058 uint64_t key = msg.u.discarded_events.channel_key;
1059
1060 DBG("Kernel consumer discarded events command for session id %"
1061 PRIu64 ", channel key %" PRIu64, id, key);
1062
1063 channel = consumer_find_channel(key);
1064 if (!channel) {
1065 ERR("Kernel consumer discarded events channel %"
1066 PRIu64 " not found", key);
1067 count = 0;
1068 } else {
1069 count = channel->discarded_events;
1070 }
1071
1072 health_code_update();
1073
1074 /* Send back returned value to session daemon */
1075 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1076 if (ret < 0) {
1077 PERROR("send discarded events");
1078 goto error_fatal;
1079 }
1080
1081 break;
1082 }
1083 case LTTNG_CONSUMER_LOST_PACKETS:
1084 {
1085 ssize_t ret;
1086 uint64_t count;
1087 struct lttng_consumer_channel *channel;
1088 uint64_t id = msg.u.lost_packets.session_id;
1089 uint64_t key = msg.u.lost_packets.channel_key;
1090
1091 DBG("Kernel consumer lost packets command for session id %"
1092 PRIu64 ", channel key %" PRIu64, id, key);
1093
1094 channel = consumer_find_channel(key);
1095 if (!channel) {
1096 ERR("Kernel consumer lost packets channel %"
1097 PRIu64 " not found", key);
1098 count = 0;
1099 } else {
1100 count = channel->lost_packets;
1101 }
1102
1103 health_code_update();
1104
1105 /* Send back returned value to session daemon */
1106 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1107 if (ret < 0) {
1108 PERROR("send lost packets");
1109 goto error_fatal;
1110 }
1111
1112 break;
1113 }
1114 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1115 {
1116 int channel_monitor_pipe;
1117 int ret_send_status, ret_set_channel_monitor_pipe;
1118 ssize_t ret_recv;
1119
1120 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1121 /* Successfully received the command's type. */
1122 ret_send_status = consumer_send_status_msg(sock, ret_code);
1123 if (ret_send_status < 0) {
1124 goto error_fatal;
1125 }
1126
1127 ret_recv = lttcomm_recv_fds_unix_sock(
1128 sock, &channel_monitor_pipe, 1);
1129 if (ret_recv != sizeof(channel_monitor_pipe)) {
1130 ERR("Failed to receive channel monitor pipe");
1131 goto error_fatal;
1132 }
1133
1134 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1135 ret_set_channel_monitor_pipe =
1136 consumer_timer_thread_set_channel_monitor_pipe(
1137 channel_monitor_pipe);
1138 if (!ret_set_channel_monitor_pipe) {
1139 int flags;
1140 int ret_fcntl;
1141
1142 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1143 /* Set the pipe as non-blocking. */
1144 ret_fcntl = fcntl(channel_monitor_pipe, F_GETFL, 0);
1145 if (ret_fcntl == -1) {
1146 PERROR("fcntl get flags of the channel monitoring pipe");
1147 goto error_fatal;
1148 }
1149 flags = ret_fcntl;
1150
1151 ret_fcntl = fcntl(channel_monitor_pipe, F_SETFL,
1152 flags | O_NONBLOCK);
1153 if (ret_fcntl == -1) {
1154 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1155 goto error_fatal;
1156 }
1157 DBG("Channel monitor pipe set as non-blocking");
1158 } else {
1159 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1160 }
1161 ret_send_status = consumer_send_status_msg(sock, ret_code);
1162 if (ret_send_status < 0) {
1163 goto error_fatal;
1164 }
1165 break;
1166 }
1167 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1168 {
1169 struct lttng_consumer_channel *channel;
1170 uint64_t key = msg.u.rotate_channel.key;
1171 int ret_send_status;
1172
1173 DBG("Consumer rotate channel %" PRIu64, key);
1174
1175 channel = consumer_find_channel(key);
1176 if (!channel) {
1177 ERR("Channel %" PRIu64 " not found", key);
1178 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1179 } else {
1180 /*
1181 * Sample the rotate position of all the streams in this channel.
1182 */
1183 int ret_rotate_channel;
1184
1185 ret_rotate_channel = lttng_consumer_rotate_channel(
1186 channel, key,
1187 msg.u.rotate_channel.relayd_id,
1188 msg.u.rotate_channel.metadata, ctx);
1189 if (ret_rotate_channel < 0) {
1190 ERR("Rotate channel failed");
1191 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1192 }
1193
1194 health_code_update();
1195 }
1196
1197 ret_send_status = consumer_send_status_msg(sock, ret_code);
1198 if (ret_send_status < 0) {
1199 /* Somehow, the session daemon is not responding anymore. */
1200 goto error_rotate_channel;
1201 }
1202 if (channel) {
1203 /* Rotate the streams that are ready right now. */
1204 int ret_rotate;
1205
1206 ret_rotate = lttng_consumer_rotate_ready_streams(
1207 channel, key, ctx);
1208 if (ret_rotate < 0) {
1209 ERR("Rotate ready streams failed");
1210 }
1211 }
1212 break;
1213 error_rotate_channel:
1214 goto end_nosignal;
1215 }
1216 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1217 {
1218 struct lttng_consumer_channel *channel;
1219 uint64_t key = msg.u.clear_channel.key;
1220 int ret_send_status;
1221
1222 channel = consumer_find_channel(key);
1223 if (!channel) {
1224 DBG("Channel %" PRIu64 " not found", key);
1225 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1226 } else {
1227 int ret_clear_channel;
1228
1229 ret_clear_channel =
1230 lttng_consumer_clear_channel(channel);
1231 if (ret_clear_channel) {
1232 ERR("Clear channel failed");
1233 ret_code = (lttcomm_return_code) ret_clear_channel;
1234 }
1235
1236 health_code_update();
1237 }
1238
1239 ret_send_status = consumer_send_status_msg(sock, ret_code);
1240 if (ret_send_status < 0) {
1241 /* Somehow, the session daemon is not responding anymore. */
1242 goto end_nosignal;
1243 }
1244
1245 break;
1246 }
1247 case LTTNG_CONSUMER_INIT:
1248 {
1249 int ret_send_status;
1250
1251 ret_code = lttng_consumer_init_command(ctx,
1252 msg.u.init.sessiond_uuid);
1253 health_code_update();
1254 ret_send_status = consumer_send_status_msg(sock, ret_code);
1255 if (ret_send_status < 0) {
1256 /* Somehow, the session daemon is not responding anymore. */
1257 goto end_nosignal;
1258 }
1259 break;
1260 }
1261 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1262 {
1263 const struct lttng_credentials credentials = {
1264 .uid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.create_trace_chunk.credentials.value.uid),
1265 .gid = LTTNG_OPTIONAL_INIT_VALUE(msg.u.create_trace_chunk.credentials.value.gid),
1266 };
1267 const bool is_local_trace =
1268 !msg.u.create_trace_chunk.relayd_id.is_set;
1269 const uint64_t relayd_id =
1270 msg.u.create_trace_chunk.relayd_id.value;
1271 const char *chunk_override_name =
1272 *msg.u.create_trace_chunk.override_name ?
1273 msg.u.create_trace_chunk.override_name :
1274 NULL;
1275 struct lttng_directory_handle *chunk_directory_handle = NULL;
1276
1277 /*
1278 * The session daemon will only provide a chunk directory file
1279 * descriptor for local traces.
1280 */
1281 if (is_local_trace) {
1282 int chunk_dirfd;
1283 int ret_send_status;
1284 ssize_t ret_recv;
1285
1286 /* Acnowledge the reception of the command. */
1287 ret_send_status = consumer_send_status_msg(
1288 sock, LTTCOMM_CONSUMERD_SUCCESS);
1289 if (ret_send_status < 0) {
1290 /* Somehow, the session daemon is not responding anymore. */
1291 goto end_nosignal;
1292 }
1293
1294 ret_recv = lttcomm_recv_fds_unix_sock(
1295 sock, &chunk_dirfd, 1);
1296 if (ret_recv != sizeof(chunk_dirfd)) {
1297 ERR("Failed to receive trace chunk directory file descriptor");
1298 goto error_fatal;
1299 }
1300
1301 DBG("Received trace chunk directory fd (%d)",
1302 chunk_dirfd);
1303 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1304 chunk_dirfd);
1305 if (!chunk_directory_handle) {
1306 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1307 if (close(chunk_dirfd)) {
1308 PERROR("Failed to close chunk directory file descriptor");
1309 }
1310 goto error_fatal;
1311 }
1312 }
1313
1314 ret_code = lttng_consumer_create_trace_chunk(
1315 !is_local_trace ? &relayd_id : NULL,
1316 msg.u.create_trace_chunk.session_id,
1317 msg.u.create_trace_chunk.chunk_id,
1318 (time_t) msg.u.create_trace_chunk
1319 .creation_timestamp,
1320 chunk_override_name,
1321 msg.u.create_trace_chunk.credentials.is_set ?
1322 &credentials :
1323 NULL,
1324 chunk_directory_handle);
1325 lttng_directory_handle_put(chunk_directory_handle);
1326 goto end_msg_sessiond;
1327 }
1328 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1329 {
1330 enum lttng_trace_chunk_command_type close_command =
1331 (lttng_trace_chunk_command_type) msg.u.close_trace_chunk.close_command.value;
1332 const uint64_t relayd_id =
1333 msg.u.close_trace_chunk.relayd_id.value;
1334 struct lttcomm_consumer_close_trace_chunk_reply reply;
1335 char path[LTTNG_PATH_MAX];
1336 ssize_t ret_send;
1337
1338 ret_code = lttng_consumer_close_trace_chunk(
1339 msg.u.close_trace_chunk.relayd_id.is_set ?
1340 &relayd_id :
1341 NULL,
1342 msg.u.close_trace_chunk.session_id,
1343 msg.u.close_trace_chunk.chunk_id,
1344 (time_t) msg.u.close_trace_chunk.close_timestamp,
1345 msg.u.close_trace_chunk.close_command.is_set ?
1346 &close_command :
1347 NULL, path);
1348 reply.ret_code = ret_code;
1349 reply.path_length = strlen(path) + 1;
1350 ret_send = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1351 if (ret_send != sizeof(reply)) {
1352 goto error_fatal;
1353 }
1354 ret_send = lttcomm_send_unix_sock(
1355 sock, path, reply.path_length);
1356 if (ret_send != reply.path_length) {
1357 goto error_fatal;
1358 }
1359 goto end_nosignal;
1360 }
1361 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1362 {
1363 const uint64_t relayd_id =
1364 msg.u.trace_chunk_exists.relayd_id.value;
1365
1366 ret_code = lttng_consumer_trace_chunk_exists(
1367 msg.u.trace_chunk_exists.relayd_id.is_set ?
1368 &relayd_id : NULL,
1369 msg.u.trace_chunk_exists.session_id,
1370 msg.u.trace_chunk_exists.chunk_id);
1371 goto end_msg_sessiond;
1372 }
1373 case LTTNG_CONSUMER_OPEN_CHANNEL_PACKETS:
1374 {
1375 const uint64_t key = msg.u.open_channel_packets.key;
1376 struct lttng_consumer_channel *channel =
1377 consumer_find_channel(key);
1378
1379 if (channel) {
1380 pthread_mutex_lock(&channel->lock);
1381 ret_code = lttng_consumer_open_channel_packets(channel);
1382 pthread_mutex_unlock(&channel->lock);
1383 } else {
1384 WARN("Channel %" PRIu64 " not found", key);
1385 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1386 }
1387
1388 health_code_update();
1389 goto end_msg_sessiond;
1390 }
1391 default:
1392 goto end_nosignal;
1393 }
1394
1395 end_nosignal:
1396 /*
1397 * Return 1 to indicate success since the 0 value can be a socket
1398 * shutdown during the recv() or send() call.
1399 */
1400 ret_func = 1;
1401 goto end;
1402 error_fatal:
1403 /* This will issue a consumer stop. */
1404 ret_func = -1;
1405 goto end;
1406 end_msg_sessiond:
1407 /*
1408 * The returned value here is not useful since either way we'll return 1 to
1409 * the caller because the session daemon socket management is done
1410 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1411 */
1412 {
1413 int ret_send_status;
1414
1415 ret_send_status = consumer_send_status_msg(sock, ret_code);
1416 if (ret_send_status < 0) {
1417 goto error_fatal;
1418 }
1419 }
1420
1421 ret_func = 1;
1422
1423 end:
1424 health_code_update();
1425 rcu_read_unlock();
1426 return ret_func;
1427 }
1428
1429 /*
1430 * Sync metadata meaning request them to the session daemon and snapshot to the
1431 * metadata thread can consumer them.
1432 *
1433 * Metadata stream lock MUST be acquired.
1434 */
1435 enum sync_metadata_status lttng_kconsumer_sync_metadata(
1436 struct lttng_consumer_stream *metadata)
1437 {
1438 int ret;
1439 enum sync_metadata_status status;
1440
1441 LTTNG_ASSERT(metadata);
1442
1443 ret = kernctl_buffer_flush(metadata->wait_fd);
1444 if (ret < 0) {
1445 ERR("Failed to flush kernel stream");
1446 status = SYNC_METADATA_STATUS_ERROR;
1447 goto end;
1448 }
1449
1450 ret = kernctl_snapshot(metadata->wait_fd);
1451 if (ret < 0) {
1452 if (errno == EAGAIN) {
1453 /* No new metadata, exit. */
1454 DBG("Sync metadata, no new kernel metadata");
1455 status = SYNC_METADATA_STATUS_NO_DATA;
1456 } else {
1457 ERR("Sync metadata, taking kernel snapshot failed.");
1458 status = SYNC_METADATA_STATUS_ERROR;
1459 }
1460 } else {
1461 status = SYNC_METADATA_STATUS_NEW_DATA;
1462 }
1463
1464 end:
1465 return status;
1466 }
1467
1468 static
1469 int extract_common_subbuffer_info(struct lttng_consumer_stream *stream,
1470 struct stream_subbuffer *subbuf)
1471 {
1472 int ret;
1473
1474 ret = kernctl_get_subbuf_size(
1475 stream->wait_fd, &subbuf->info.data.subbuf_size);
1476 if (ret) {
1477 goto end;
1478 }
1479
1480 ret = kernctl_get_padded_subbuf_size(
1481 stream->wait_fd, &subbuf->info.data.padded_subbuf_size);
1482 if (ret) {
1483 goto end;
1484 }
1485
1486 end:
1487 return ret;
1488 }
1489
1490 static
1491 int extract_metadata_subbuffer_info(struct lttng_consumer_stream *stream,
1492 struct stream_subbuffer *subbuf)
1493 {
1494 int ret;
1495
1496 ret = extract_common_subbuffer_info(stream, subbuf);
1497 if (ret) {
1498 goto end;
1499 }
1500
1501 ret = kernctl_get_metadata_version(
1502 stream->wait_fd, &subbuf->info.metadata.version);
1503 if (ret) {
1504 goto end;
1505 }
1506
1507 end:
1508 return ret;
1509 }
1510
1511 static
1512 int extract_data_subbuffer_info(struct lttng_consumer_stream *stream,
1513 struct stream_subbuffer *subbuf)
1514 {
1515 int ret;
1516
1517 ret = extract_common_subbuffer_info(stream, subbuf);
1518 if (ret) {
1519 goto end;
1520 }
1521
1522 ret = kernctl_get_packet_size(
1523 stream->wait_fd, &subbuf->info.data.packet_size);
1524 if (ret < 0) {
1525 PERROR("Failed to get sub-buffer packet size");
1526 goto end;
1527 }
1528
1529 ret = kernctl_get_content_size(
1530 stream->wait_fd, &subbuf->info.data.content_size);
1531 if (ret < 0) {
1532 PERROR("Failed to get sub-buffer content size");
1533 goto end;
1534 }
1535
1536 ret = kernctl_get_timestamp_begin(
1537 stream->wait_fd, &subbuf->info.data.timestamp_begin);
1538 if (ret < 0) {
1539 PERROR("Failed to get sub-buffer begin timestamp");
1540 goto end;
1541 }
1542
1543 ret = kernctl_get_timestamp_end(
1544 stream->wait_fd, &subbuf->info.data.timestamp_end);
1545 if (ret < 0) {
1546 PERROR("Failed to get sub-buffer end timestamp");
1547 goto end;
1548 }
1549
1550 ret = kernctl_get_events_discarded(
1551 stream->wait_fd, &subbuf->info.data.events_discarded);
1552 if (ret) {
1553 PERROR("Failed to get sub-buffer events discarded count");
1554 goto end;
1555 }
1556
1557 ret = kernctl_get_sequence_number(stream->wait_fd,
1558 &subbuf->info.data.sequence_number.value);
1559 if (ret) {
1560 /* May not be supported by older LTTng-modules. */
1561 if (ret != -ENOTTY) {
1562 PERROR("Failed to get sub-buffer sequence number");
1563 goto end;
1564 }
1565 } else {
1566 subbuf->info.data.sequence_number.is_set = true;
1567 }
1568
1569 ret = kernctl_get_stream_id(
1570 stream->wait_fd, &subbuf->info.data.stream_id);
1571 if (ret < 0) {
1572 PERROR("Failed to get stream id");
1573 goto end;
1574 }
1575
1576 ret = kernctl_get_instance_id(stream->wait_fd,
1577 &subbuf->info.data.stream_instance_id.value);
1578 if (ret) {
1579 /* May not be supported by older LTTng-modules. */
1580 if (ret != -ENOTTY) {
1581 PERROR("Failed to get stream instance id");
1582 goto end;
1583 }
1584 } else {
1585 subbuf->info.data.stream_instance_id.is_set = true;
1586 }
1587 end:
1588 return ret;
1589 }
1590
1591 static
1592 enum get_next_subbuffer_status get_subbuffer_common(
1593 struct lttng_consumer_stream *stream,
1594 struct stream_subbuffer *subbuffer)
1595 {
1596 int ret;
1597 enum get_next_subbuffer_status status;
1598
1599 ret = kernctl_get_next_subbuf(stream->wait_fd);
1600 switch (ret) {
1601 case 0:
1602 status = GET_NEXT_SUBBUFFER_STATUS_OK;
1603 break;
1604 case -ENODATA:
1605 case -EAGAIN:
1606 /*
1607 * The caller only expects -ENODATA when there is no data to
1608 * read, but the kernel tracer returns -EAGAIN when there is
1609 * currently no data for a non-finalized stream, and -ENODATA
1610 * when there is no data for a finalized stream. Those can be
1611 * combined into a -ENODATA return value.
1612 */
1613 status = GET_NEXT_SUBBUFFER_STATUS_NO_DATA;
1614 goto end;
1615 default:
1616 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1617 goto end;
1618 }
1619
1620 ret = stream->read_subbuffer_ops.extract_subbuffer_info(
1621 stream, subbuffer);
1622 if (ret) {
1623 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1624 }
1625 end:
1626 return status;
1627 }
1628
1629 static
1630 enum get_next_subbuffer_status get_next_subbuffer_splice(
1631 struct lttng_consumer_stream *stream,
1632 struct stream_subbuffer *subbuffer)
1633 {
1634 const enum get_next_subbuffer_status status =
1635 get_subbuffer_common(stream, subbuffer);
1636
1637 if (status != GET_NEXT_SUBBUFFER_STATUS_OK) {
1638 goto end;
1639 }
1640
1641 subbuffer->buffer.fd = stream->wait_fd;
1642 end:
1643 return status;
1644 }
1645
1646 static
1647 enum get_next_subbuffer_status get_next_subbuffer_mmap(
1648 struct lttng_consumer_stream *stream,
1649 struct stream_subbuffer *subbuffer)
1650 {
1651 int ret;
1652 enum get_next_subbuffer_status status;
1653 const char *addr;
1654
1655 status = get_subbuffer_common(stream, subbuffer);
1656 if (status != GET_NEXT_SUBBUFFER_STATUS_OK) {
1657 goto end;
1658 }
1659
1660 ret = get_current_subbuf_addr(stream, &addr);
1661 if (ret) {
1662 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1663 goto end;
1664 }
1665
1666 subbuffer->buffer.buffer = lttng_buffer_view_init(
1667 addr, 0, subbuffer->info.data.padded_subbuf_size);
1668 end:
1669 return status;
1670 }
1671
1672 static
1673 enum get_next_subbuffer_status get_next_subbuffer_metadata_check(struct lttng_consumer_stream *stream,
1674 struct stream_subbuffer *subbuffer)
1675 {
1676 int ret;
1677 const char *addr;
1678 bool coherent;
1679 enum get_next_subbuffer_status status;
1680
1681 ret = kernctl_get_next_subbuf_metadata_check(stream->wait_fd,
1682 &coherent);
1683 if (ret) {
1684 goto end;
1685 }
1686
1687 ret = stream->read_subbuffer_ops.extract_subbuffer_info(
1688 stream, subbuffer);
1689 if (ret) {
1690 goto end;
1691 }
1692
1693 LTTNG_OPTIONAL_SET(&subbuffer->info.metadata.coherent, coherent);
1694
1695 ret = get_current_subbuf_addr(stream, &addr);
1696 if (ret) {
1697 goto end;
1698 }
1699
1700 subbuffer->buffer.buffer = lttng_buffer_view_init(
1701 addr, 0, subbuffer->info.data.padded_subbuf_size);
1702 DBG("Got metadata packet with padded_subbuf_size = %lu, coherent = %s",
1703 subbuffer->info.metadata.padded_subbuf_size,
1704 coherent ? "true" : "false");
1705 end:
1706 /*
1707 * The caller only expects -ENODATA when there is no data to read, but
1708 * the kernel tracer returns -EAGAIN when there is currently no data
1709 * for a non-finalized stream, and -ENODATA when there is no data for a
1710 * finalized stream. Those can be combined into a -ENODATA return value.
1711 */
1712 switch (ret) {
1713 case 0:
1714 status = GET_NEXT_SUBBUFFER_STATUS_OK;
1715 break;
1716 case -ENODATA:
1717 case -EAGAIN:
1718 /*
1719 * The caller only expects -ENODATA when there is no data to
1720 * read, but the kernel tracer returns -EAGAIN when there is
1721 * currently no data for a non-finalized stream, and -ENODATA
1722 * when there is no data for a finalized stream. Those can be
1723 * combined into a -ENODATA return value.
1724 */
1725 status = GET_NEXT_SUBBUFFER_STATUS_NO_DATA;
1726 break;
1727 default:
1728 status = GET_NEXT_SUBBUFFER_STATUS_ERROR;
1729 break;
1730 }
1731
1732 return status;
1733 }
1734
1735 static
1736 int put_next_subbuffer(struct lttng_consumer_stream *stream,
1737 struct stream_subbuffer *subbuffer)
1738 {
1739 const int ret = kernctl_put_next_subbuf(stream->wait_fd);
1740
1741 if (ret) {
1742 if (ret == -EFAULT) {
1743 PERROR("Error in unreserving sub buffer");
1744 } else if (ret == -EIO) {
1745 /* Should never happen with newer LTTng versions */
1746 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted");
1747 }
1748 }
1749
1750 return ret;
1751 }
1752
1753 static
1754 bool is_get_next_check_metadata_available(int tracer_fd)
1755 {
1756 const int ret = kernctl_get_next_subbuf_metadata_check(tracer_fd, NULL);
1757 const bool available = ret != -ENOTTY;
1758
1759 if (ret == 0) {
1760 /* get succeeded, make sure to put the subbuffer. */
1761 kernctl_put_subbuf(tracer_fd);
1762 }
1763
1764 return available;
1765 }
1766
1767 static
1768 int signal_metadata(struct lttng_consumer_stream *stream,
1769 struct lttng_consumer_local_data *ctx)
1770 {
1771 ASSERT_LOCKED(stream->metadata_rdv_lock);
1772 return pthread_cond_broadcast(&stream->metadata_rdv) ? -errno : 0;
1773 }
1774
1775 static
1776 int lttng_kconsumer_set_stream_ops(
1777 struct lttng_consumer_stream *stream)
1778 {
1779 int ret = 0;
1780
1781 if (stream->metadata_flag && stream->chan->is_live) {
1782 DBG("Attempting to enable metadata bucketization for live consumers");
1783 if (is_get_next_check_metadata_available(stream->wait_fd)) {
1784 DBG("Kernel tracer supports get_next_subbuffer_metadata_check, metadata will be accumulated until a coherent state is reached");
1785 stream->read_subbuffer_ops.get_next_subbuffer =
1786 get_next_subbuffer_metadata_check;
1787 ret = consumer_stream_enable_metadata_bucketization(
1788 stream);
1789 if (ret) {
1790 goto end;
1791 }
1792 } else {
1793 /*
1794 * The kernel tracer version is too old to indicate
1795 * when the metadata stream has reached a "coherent"
1796 * (parseable) point.
1797 *
1798 * This means that a live viewer may see an incoherent
1799 * sequence of metadata and fail to parse it.
1800 */
1801 WARN("Kernel tracer does not support get_next_subbuffer_metadata_check which may cause live clients to fail to parse the metadata stream");
1802 metadata_bucket_destroy(stream->metadata_bucket);
1803 stream->metadata_bucket = NULL;
1804 }
1805
1806 stream->read_subbuffer_ops.on_sleep = signal_metadata;
1807 }
1808
1809 if (!stream->read_subbuffer_ops.get_next_subbuffer) {
1810 if (stream->chan->output == CONSUMER_CHANNEL_MMAP) {
1811 stream->read_subbuffer_ops.get_next_subbuffer =
1812 get_next_subbuffer_mmap;
1813 } else {
1814 stream->read_subbuffer_ops.get_next_subbuffer =
1815 get_next_subbuffer_splice;
1816 }
1817 }
1818
1819 if (stream->metadata_flag) {
1820 stream->read_subbuffer_ops.extract_subbuffer_info =
1821 extract_metadata_subbuffer_info;
1822 } else {
1823 stream->read_subbuffer_ops.extract_subbuffer_info =
1824 extract_data_subbuffer_info;
1825 if (stream->chan->is_live) {
1826 stream->read_subbuffer_ops.send_live_beacon =
1827 consumer_flush_kernel_index;
1828 }
1829 }
1830
1831 stream->read_subbuffer_ops.put_next_subbuffer = put_next_subbuffer;
1832 end:
1833 return ret;
1834 }
1835
1836 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1837 {
1838 int ret;
1839
1840 LTTNG_ASSERT(stream);
1841
1842 /*
1843 * Don't create anything if this is set for streaming or if there is
1844 * no current trace chunk on the parent channel.
1845 */
1846 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1847 stream->chan->trace_chunk) {
1848 ret = consumer_stream_create_output_files(stream, true);
1849 if (ret) {
1850 goto error;
1851 }
1852 }
1853
1854 if (stream->output == LTTNG_EVENT_MMAP) {
1855 /* get the len of the mmap region */
1856 unsigned long mmap_len;
1857
1858 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1859 if (ret != 0) {
1860 PERROR("kernctl_get_mmap_len");
1861 goto error_close_fd;
1862 }
1863 stream->mmap_len = (size_t) mmap_len;
1864
1865 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1866 MAP_PRIVATE, stream->wait_fd, 0);
1867 if (stream->mmap_base == MAP_FAILED) {
1868 PERROR("Error mmaping");
1869 ret = -1;
1870 goto error_close_fd;
1871 }
1872 }
1873
1874 ret = lttng_kconsumer_set_stream_ops(stream);
1875 if (ret) {
1876 goto error_close_fd;
1877 }
1878
1879 /* we return 0 to let the library handle the FD internally */
1880 return 0;
1881
1882 error_close_fd:
1883 if (stream->out_fd >= 0) {
1884 int err;
1885
1886 err = close(stream->out_fd);
1887 LTTNG_ASSERT(!err);
1888 stream->out_fd = -1;
1889 }
1890 error:
1891 return ret;
1892 }
1893
1894 /*
1895 * Check if data is still being extracted from the buffers for a specific
1896 * stream. Consumer data lock MUST be acquired before calling this function
1897 * and the stream lock.
1898 *
1899 * Return 1 if the traced data are still getting read else 0 meaning that the
1900 * data is available for trace viewer reading.
1901 */
1902 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1903 {
1904 int ret;
1905
1906 LTTNG_ASSERT(stream);
1907
1908 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1909 ret = 0;
1910 goto end;
1911 }
1912
1913 ret = kernctl_get_next_subbuf(stream->wait_fd);
1914 if (ret == 0) {
1915 /* There is still data so let's put back this subbuffer. */
1916 ret = kernctl_put_subbuf(stream->wait_fd);
1917 LTTNG_ASSERT(ret == 0);
1918 ret = 1; /* Data is pending */
1919 goto end;
1920 }
1921
1922 /* Data is NOT pending and ready to be read. */
1923 ret = 0;
1924
1925 end:
1926 return ret;
1927 }
This page took 0.108643 seconds and 4 git commands to generate.