consumer: implement clear channel
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_add_stream_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto error_add_stream_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_add_stream_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 goto end;
619 }
620
621 health_code_update();
622
623 /*
624 * Send status code to session daemon only if the recv works. If the
625 * above recv() failed, the session daemon is notified through the
626 * error socket and the teardown is eventually done.
627 */
628 ret = consumer_send_status_msg(sock, ret_code);
629 if (ret < 0) {
630 /* Somehow, the session daemon is not responding anymore. */
631 goto error_add_stream_nosignal;
632 }
633
634 health_code_update();
635
636 pthread_mutex_lock(&channel->lock);
637 new_stream = consumer_allocate_stream(channel->key,
638 fd,
639 channel->name,
640 channel->relayd_id,
641 channel->session_id,
642 channel->trace_chunk,
643 msg.u.stream.cpu,
644 &alloc_ret,
645 channel->type,
646 channel->monitor);
647 if (new_stream == NULL) {
648 switch (alloc_ret) {
649 case -ENOMEM:
650 case -EINVAL:
651 default:
652 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
653 break;
654 }
655 pthread_mutex_unlock(&channel->lock);
656 goto error_add_stream_nosignal;
657 }
658
659 new_stream->chan = channel;
660 new_stream->wait_fd = fd;
661 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
662 &new_stream->max_sb_size);
663 if (ret < 0) {
664 pthread_mutex_unlock(&channel->lock);
665 ERR("Failed to get kernel maximal subbuffer size");
666 goto error_add_stream_nosignal;
667 }
668
669 consumer_stream_update_channel_attributes(new_stream,
670 channel);
671 switch (channel->output) {
672 case CONSUMER_CHANNEL_SPLICE:
673 new_stream->output = LTTNG_EVENT_SPLICE;
674 ret = utils_create_pipe(new_stream->splice_pipe);
675 if (ret < 0) {
676 pthread_mutex_unlock(&channel->lock);
677 goto error_add_stream_nosignal;
678 }
679 break;
680 case CONSUMER_CHANNEL_MMAP:
681 new_stream->output = LTTNG_EVENT_MMAP;
682 break;
683 default:
684 ERR("Stream output unknown %d", channel->output);
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 /*
690 * We've just assigned the channel to the stream so increment the
691 * refcount right now. We don't need to increment the refcount for
692 * streams in no monitor because we handle manually the cleanup of
693 * those. It is very important to make sure there is NO prior
694 * consumer_del_stream() calls or else the refcount will be unbalanced.
695 */
696 if (channel->monitor) {
697 uatomic_inc(&new_stream->chan->refcount);
698 }
699
700 /*
701 * The buffer flush is done on the session daemon side for the kernel
702 * so no need for the stream "hangup_flush_done" variable to be
703 * tracked. This is important for a kernel stream since we don't rely
704 * on the flush state of the stream to read data. It's not the case for
705 * user space tracing.
706 */
707 new_stream->hangup_flush_done = 0;
708
709 health_code_update();
710
711 pthread_mutex_lock(&new_stream->lock);
712 if (ctx->on_recv_stream) {
713 ret = ctx->on_recv_stream(new_stream);
714 if (ret < 0) {
715 pthread_mutex_unlock(&new_stream->lock);
716 pthread_mutex_unlock(&channel->lock);
717 consumer_stream_free(new_stream);
718 goto error_add_stream_nosignal;
719 }
720 }
721 health_code_update();
722
723 if (new_stream->metadata_flag) {
724 channel->metadata_stream = new_stream;
725 }
726
727 /* Do not monitor this stream. */
728 if (!channel->monitor) {
729 DBG("Kernel consumer add stream %s in no monitor mode with "
730 "relayd id %" PRIu64, new_stream->name,
731 new_stream->net_seq_idx);
732 cds_list_add(&new_stream->send_node, &channel->streams.head);
733 pthread_mutex_unlock(&new_stream->lock);
734 pthread_mutex_unlock(&channel->lock);
735 goto end_add_stream;
736 }
737
738 /* Send stream to relayd if the stream has an ID. */
739 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
740 ret = consumer_send_relayd_stream(new_stream,
741 new_stream->chan->pathname);
742 if (ret < 0) {
743 pthread_mutex_unlock(&new_stream->lock);
744 pthread_mutex_unlock(&channel->lock);
745 consumer_stream_free(new_stream);
746 goto error_add_stream_nosignal;
747 }
748
749 /*
750 * If adding an extra stream to an already
751 * existing channel (e.g. cpu hotplug), we need
752 * to send the "streams_sent" command to relayd.
753 */
754 if (channel->streams_sent_to_relayd) {
755 ret = consumer_send_relayd_streams_sent(
756 new_stream->net_seq_idx);
757 if (ret < 0) {
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto error_add_stream_nosignal;
761 }
762 }
763 }
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766
767 /* Get the right pipe where the stream will be sent. */
768 if (new_stream->metadata_flag) {
769 consumer_add_metadata_stream(new_stream);
770 stream_pipe = ctx->consumer_metadata_pipe;
771 } else {
772 consumer_add_data_stream(new_stream);
773 stream_pipe = ctx->consumer_data_pipe;
774 }
775
776 /* Visible to other threads */
777 new_stream->globally_visible = 1;
778
779 health_code_update();
780
781 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
782 if (ret < 0) {
783 ERR("Consumer write %s stream to pipe %d",
784 new_stream->metadata_flag ? "metadata" : "data",
785 lttng_pipe_get_writefd(stream_pipe));
786 if (new_stream->metadata_flag) {
787 consumer_del_stream_for_metadata(new_stream);
788 } else {
789 consumer_del_stream_for_data(new_stream);
790 }
791 goto error_add_stream_nosignal;
792 }
793
794 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
795 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
796 end_add_stream:
797 break;
798 error_add_stream_nosignal:
799 goto end_nosignal;
800 error_add_stream_fatal:
801 goto error_fatal;
802 }
803 case LTTNG_CONSUMER_STREAMS_SENT:
804 {
805 struct lttng_consumer_channel *channel;
806
807 /*
808 * Get stream's channel reference. Needed when adding the stream to the
809 * global hash table.
810 */
811 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
812 if (!channel) {
813 /*
814 * We could not find the channel. Can happen if cpu hotplug
815 * happens while tearing down.
816 */
817 ERR("Unable to find channel key %" PRIu64,
818 msg.u.sent_streams.channel_key);
819 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
820 }
821
822 health_code_update();
823
824 /*
825 * Send status code to session daemon.
826 */
827 ret = consumer_send_status_msg(sock, ret_code);
828 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
829 /* Somehow, the session daemon is not responding anymore. */
830 goto error_streams_sent_nosignal;
831 }
832
833 health_code_update();
834
835 /*
836 * We should not send this message if we don't monitor the
837 * streams in this channel.
838 */
839 if (!channel->monitor) {
840 goto end_error_streams_sent;
841 }
842
843 health_code_update();
844 /* Send stream to relayd if the stream has an ID. */
845 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
846 ret = consumer_send_relayd_streams_sent(
847 msg.u.sent_streams.net_seq_idx);
848 if (ret < 0) {
849 goto error_streams_sent_nosignal;
850 }
851 channel->streams_sent_to_relayd = true;
852 }
853 end_error_streams_sent:
854 break;
855 error_streams_sent_nosignal:
856 goto end_nosignal;
857 }
858 case LTTNG_CONSUMER_UPDATE_STREAM:
859 {
860 rcu_read_unlock();
861 return -ENOSYS;
862 }
863 case LTTNG_CONSUMER_DESTROY_RELAYD:
864 {
865 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
866 struct consumer_relayd_sock_pair *relayd;
867
868 DBG("Kernel consumer destroying relayd %" PRIu64, index);
869
870 /* Get relayd reference if exists. */
871 relayd = consumer_find_relayd(index);
872 if (relayd == NULL) {
873 DBG("Unable to find relayd %" PRIu64, index);
874 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
875 }
876
877 /*
878 * Each relayd socket pair has a refcount of stream attached to it
879 * which tells if the relayd is still active or not depending on the
880 * refcount value.
881 *
882 * This will set the destroy flag of the relayd object and destroy it
883 * if the refcount reaches zero when called.
884 *
885 * The destroy can happen either here or when a stream fd hangs up.
886 */
887 if (relayd) {
888 consumer_flag_relayd_for_destroy(relayd);
889 }
890
891 health_code_update();
892
893 ret = consumer_send_status_msg(sock, ret_code);
894 if (ret < 0) {
895 /* Somehow, the session daemon is not responding anymore. */
896 goto error_fatal;
897 }
898
899 goto end_nosignal;
900 }
901 case LTTNG_CONSUMER_DATA_PENDING:
902 {
903 int32_t ret;
904 uint64_t id = msg.u.data_pending.session_id;
905
906 DBG("Kernel consumer data pending command for id %" PRIu64, id);
907
908 ret = consumer_data_pending(id);
909
910 health_code_update();
911
912 /* Send back returned value to session daemon */
913 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
914 if (ret < 0) {
915 PERROR("send data pending ret code");
916 goto error_fatal;
917 }
918
919 /*
920 * No need to send back a status message since the data pending
921 * returned value is the response.
922 */
923 break;
924 }
925 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
926 {
927 struct lttng_consumer_channel *channel;
928 uint64_t key = msg.u.snapshot_channel.key;
929
930 channel = consumer_find_channel(key);
931 if (!channel) {
932 ERR("Channel %" PRIu64 " not found", key);
933 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
934 } else {
935 pthread_mutex_lock(&channel->lock);
936 if (msg.u.snapshot_channel.metadata == 1) {
937 ret = lttng_kconsumer_snapshot_metadata(channel, key,
938 msg.u.snapshot_channel.pathname,
939 msg.u.snapshot_channel.relayd_id, ctx);
940 if (ret < 0) {
941 ERR("Snapshot metadata failed");
942 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
943 }
944 } else {
945 ret = lttng_kconsumer_snapshot_channel(channel, key,
946 msg.u.snapshot_channel.pathname,
947 msg.u.snapshot_channel.relayd_id,
948 msg.u.snapshot_channel.nb_packets_per_stream,
949 ctx);
950 if (ret < 0) {
951 ERR("Snapshot channel failed");
952 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
953 }
954 }
955 pthread_mutex_unlock(&channel->lock);
956 }
957 health_code_update();
958
959 ret = consumer_send_status_msg(sock, ret_code);
960 if (ret < 0) {
961 /* Somehow, the session daemon is not responding anymore. */
962 goto end_nosignal;
963 }
964 break;
965 }
966 case LTTNG_CONSUMER_DESTROY_CHANNEL:
967 {
968 uint64_t key = msg.u.destroy_channel.key;
969 struct lttng_consumer_channel *channel;
970
971 channel = consumer_find_channel(key);
972 if (!channel) {
973 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
974 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
975 }
976
977 health_code_update();
978
979 ret = consumer_send_status_msg(sock, ret_code);
980 if (ret < 0) {
981 /* Somehow, the session daemon is not responding anymore. */
982 goto end_destroy_channel;
983 }
984
985 health_code_update();
986
987 /* Stop right now if no channel was found. */
988 if (!channel) {
989 goto end_destroy_channel;
990 }
991
992 /*
993 * This command should ONLY be issued for channel with streams set in
994 * no monitor mode.
995 */
996 assert(!channel->monitor);
997
998 /*
999 * The refcount should ALWAYS be 0 in the case of a channel in no
1000 * monitor mode.
1001 */
1002 assert(!uatomic_sub_return(&channel->refcount, 1));
1003
1004 consumer_del_channel(channel);
1005 end_destroy_channel:
1006 goto end_nosignal;
1007 }
1008 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1009 {
1010 ssize_t ret;
1011 uint64_t count;
1012 struct lttng_consumer_channel *channel;
1013 uint64_t id = msg.u.discarded_events.session_id;
1014 uint64_t key = msg.u.discarded_events.channel_key;
1015
1016 DBG("Kernel consumer discarded events command for session id %"
1017 PRIu64 ", channel key %" PRIu64, id, key);
1018
1019 channel = consumer_find_channel(key);
1020 if (!channel) {
1021 ERR("Kernel consumer discarded events channel %"
1022 PRIu64 " not found", key);
1023 count = 0;
1024 } else {
1025 count = channel->discarded_events;
1026 }
1027
1028 health_code_update();
1029
1030 /* Send back returned value to session daemon */
1031 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1032 if (ret < 0) {
1033 PERROR("send discarded events");
1034 goto error_fatal;
1035 }
1036
1037 break;
1038 }
1039 case LTTNG_CONSUMER_LOST_PACKETS:
1040 {
1041 ssize_t ret;
1042 uint64_t count;
1043 struct lttng_consumer_channel *channel;
1044 uint64_t id = msg.u.lost_packets.session_id;
1045 uint64_t key = msg.u.lost_packets.channel_key;
1046
1047 DBG("Kernel consumer lost packets command for session id %"
1048 PRIu64 ", channel key %" PRIu64, id, key);
1049
1050 channel = consumer_find_channel(key);
1051 if (!channel) {
1052 ERR("Kernel consumer lost packets channel %"
1053 PRIu64 " not found", key);
1054 count = 0;
1055 } else {
1056 count = channel->lost_packets;
1057 }
1058
1059 health_code_update();
1060
1061 /* Send back returned value to session daemon */
1062 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1063 if (ret < 0) {
1064 PERROR("send lost packets");
1065 goto error_fatal;
1066 }
1067
1068 break;
1069 }
1070 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1071 {
1072 int channel_monitor_pipe;
1073
1074 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1075 /* Successfully received the command's type. */
1076 ret = consumer_send_status_msg(sock, ret_code);
1077 if (ret < 0) {
1078 goto error_fatal;
1079 }
1080
1081 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1082 1);
1083 if (ret != sizeof(channel_monitor_pipe)) {
1084 ERR("Failed to receive channel monitor pipe");
1085 goto error_fatal;
1086 }
1087
1088 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1089 ret = consumer_timer_thread_set_channel_monitor_pipe(
1090 channel_monitor_pipe);
1091 if (!ret) {
1092 int flags;
1093
1094 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1095 /* Set the pipe as non-blocking. */
1096 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1097 if (ret == -1) {
1098 PERROR("fcntl get flags of the channel monitoring pipe");
1099 goto error_fatal;
1100 }
1101 flags = ret;
1102
1103 ret = fcntl(channel_monitor_pipe, F_SETFL,
1104 flags | O_NONBLOCK);
1105 if (ret == -1) {
1106 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1107 goto error_fatal;
1108 }
1109 DBG("Channel monitor pipe set as non-blocking");
1110 } else {
1111 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1112 }
1113 ret = consumer_send_status_msg(sock, ret_code);
1114 if (ret < 0) {
1115 goto error_fatal;
1116 }
1117 break;
1118 }
1119 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1120 {
1121 struct lttng_consumer_channel *channel;
1122 uint64_t key = msg.u.rotate_channel.key;
1123
1124 DBG("Consumer rotate channel %" PRIu64, key);
1125
1126 channel = consumer_find_channel(key);
1127 if (!channel) {
1128 ERR("Channel %" PRIu64 " not found", key);
1129 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1130 } else {
1131 /*
1132 * Sample the rotate position of all the streams in this channel.
1133 */
1134 ret = lttng_consumer_rotate_channel(channel, key,
1135 msg.u.rotate_channel.relayd_id,
1136 msg.u.rotate_channel.metadata,
1137 ctx);
1138 if (ret < 0) {
1139 ERR("Rotate channel failed");
1140 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1141 }
1142
1143 health_code_update();
1144 }
1145 ret = consumer_send_status_msg(sock, ret_code);
1146 if (ret < 0) {
1147 /* Somehow, the session daemon is not responding anymore. */
1148 goto error_rotate_channel;
1149 }
1150 if (channel) {
1151 /* Rotate the streams that are ready right now. */
1152 ret = lttng_consumer_rotate_ready_streams(
1153 channel, key, ctx);
1154 if (ret < 0) {
1155 ERR("Rotate ready streams failed");
1156 }
1157 }
1158 break;
1159 error_rotate_channel:
1160 goto end_nosignal;
1161 }
1162 case LTTNG_CONSUMER_CLEAR_CHANNEL:
1163 {
1164 struct lttng_consumer_channel *channel;
1165 uint64_t key = msg.u.clear_channel.key;
1166
1167 channel = consumer_find_channel(key);
1168 if (!channel) {
1169 DBG("Channel %" PRIu64 " not found", key);
1170 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1171 } else {
1172 ret = lttng_consumer_clear_channel(channel);
1173 if (ret) {
1174 ERR("Clear channel failed");
1175 ret_code = ret;
1176 }
1177
1178 health_code_update();
1179 }
1180 ret = consumer_send_status_msg(sock, ret_code);
1181 if (ret < 0) {
1182 /* Somehow, the session daemon is not responding anymore. */
1183 goto end_nosignal;
1184 }
1185
1186 break;
1187 }
1188 case LTTNG_CONSUMER_INIT:
1189 {
1190 ret_code = lttng_consumer_init_command(ctx,
1191 msg.u.init.sessiond_uuid);
1192 health_code_update();
1193 ret = consumer_send_status_msg(sock, ret_code);
1194 if (ret < 0) {
1195 /* Somehow, the session daemon is not responding anymore. */
1196 goto end_nosignal;
1197 }
1198 break;
1199 }
1200 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1201 {
1202 const struct lttng_credentials credentials = {
1203 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1204 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1205 };
1206 const bool is_local_trace =
1207 !msg.u.create_trace_chunk.relayd_id.is_set;
1208 const uint64_t relayd_id =
1209 msg.u.create_trace_chunk.relayd_id.value;
1210 const char *chunk_override_name =
1211 *msg.u.create_trace_chunk.override_name ?
1212 msg.u.create_trace_chunk.override_name :
1213 NULL;
1214 struct lttng_directory_handle *chunk_directory_handle = NULL;
1215
1216 /*
1217 * The session daemon will only provide a chunk directory file
1218 * descriptor for local traces.
1219 */
1220 if (is_local_trace) {
1221 int chunk_dirfd;
1222
1223 /* Acnowledge the reception of the command. */
1224 ret = consumer_send_status_msg(sock,
1225 LTTCOMM_CONSUMERD_SUCCESS);
1226 if (ret < 0) {
1227 /* Somehow, the session daemon is not responding anymore. */
1228 goto end_nosignal;
1229 }
1230
1231 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1232 if (ret != sizeof(chunk_dirfd)) {
1233 ERR("Failed to receive trace chunk directory file descriptor");
1234 goto error_fatal;
1235 }
1236
1237 DBG("Received trace chunk directory fd (%d)",
1238 chunk_dirfd);
1239 chunk_directory_handle = lttng_directory_handle_create_from_dirfd(
1240 chunk_dirfd);
1241 if (!chunk_directory_handle) {
1242 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1243 if (close(chunk_dirfd)) {
1244 PERROR("Failed to close chunk directory file descriptor");
1245 }
1246 goto error_fatal;
1247 }
1248 }
1249
1250 ret_code = lttng_consumer_create_trace_chunk(
1251 !is_local_trace ? &relayd_id : NULL,
1252 msg.u.create_trace_chunk.session_id,
1253 msg.u.create_trace_chunk.chunk_id,
1254 (time_t) msg.u.create_trace_chunk
1255 .creation_timestamp,
1256 chunk_override_name,
1257 msg.u.create_trace_chunk.credentials.is_set ?
1258 &credentials :
1259 NULL,
1260 chunk_directory_handle);
1261 lttng_directory_handle_put(chunk_directory_handle);
1262 goto end_msg_sessiond;
1263 }
1264 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1265 {
1266 enum lttng_trace_chunk_command_type close_command =
1267 msg.u.close_trace_chunk.close_command.value;
1268 const uint64_t relayd_id =
1269 msg.u.close_trace_chunk.relayd_id.value;
1270 struct lttcomm_consumer_close_trace_chunk_reply reply;
1271 char path[LTTNG_PATH_MAX];
1272
1273 ret_code = lttng_consumer_close_trace_chunk(
1274 msg.u.close_trace_chunk.relayd_id.is_set ?
1275 &relayd_id :
1276 NULL,
1277 msg.u.close_trace_chunk.session_id,
1278 msg.u.close_trace_chunk.chunk_id,
1279 (time_t) msg.u.close_trace_chunk.close_timestamp,
1280 msg.u.close_trace_chunk.close_command.is_set ?
1281 &close_command :
1282 NULL, path);
1283 reply.ret_code = ret_code;
1284 reply.path_length = strlen(path) + 1;
1285 ret = lttcomm_send_unix_sock(sock, &reply, sizeof(reply));
1286 if (ret != sizeof(reply)) {
1287 goto error_fatal;
1288 }
1289 ret = lttcomm_send_unix_sock(sock, path, reply.path_length);
1290 if (ret != reply.path_length) {
1291 goto error_fatal;
1292 }
1293 goto end_nosignal;
1294 }
1295 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1296 {
1297 const uint64_t relayd_id =
1298 msg.u.trace_chunk_exists.relayd_id.value;
1299
1300 ret_code = lttng_consumer_trace_chunk_exists(
1301 msg.u.trace_chunk_exists.relayd_id.is_set ?
1302 &relayd_id : NULL,
1303 msg.u.trace_chunk_exists.session_id,
1304 msg.u.trace_chunk_exists.chunk_id);
1305 goto end_msg_sessiond;
1306 }
1307 default:
1308 goto end_nosignal;
1309 }
1310
1311 end_nosignal:
1312 /*
1313 * Return 1 to indicate success since the 0 value can be a socket
1314 * shutdown during the recv() or send() call.
1315 */
1316 ret = 1;
1317 goto end;
1318 error_fatal:
1319 /* This will issue a consumer stop. */
1320 ret = -1;
1321 goto end;
1322 end_msg_sessiond:
1323 /*
1324 * The returned value here is not useful since either way we'll return 1 to
1325 * the caller because the session daemon socket management is done
1326 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1327 */
1328 ret = consumer_send_status_msg(sock, ret_code);
1329 if (ret < 0) {
1330 goto error_fatal;
1331 }
1332 ret = 1;
1333 end:
1334 health_code_update();
1335 rcu_read_unlock();
1336 return ret;
1337 }
1338
1339 /*
1340 * Populate index values of a kernel stream. Values are set in big endian order.
1341 *
1342 * Return 0 on success or else a negative value.
1343 */
1344 static int get_index_values(struct ctf_packet_index *index, int infd)
1345 {
1346 int ret;
1347 uint64_t packet_size, content_size, timestamp_begin, timestamp_end,
1348 events_discarded, stream_id, stream_instance_id,
1349 packet_seq_num;
1350
1351 ret = kernctl_get_timestamp_begin(infd, &timestamp_begin);
1352 if (ret < 0) {
1353 PERROR("kernctl_get_timestamp_begin");
1354 goto error;
1355 }
1356
1357 ret = kernctl_get_timestamp_end(infd, &timestamp_end);
1358 if (ret < 0) {
1359 PERROR("kernctl_get_timestamp_end");
1360 goto error;
1361 }
1362
1363 ret = kernctl_get_events_discarded(infd, &events_discarded);
1364 if (ret < 0) {
1365 PERROR("kernctl_get_events_discarded");
1366 goto error;
1367 }
1368
1369 ret = kernctl_get_content_size(infd, &content_size);
1370 if (ret < 0) {
1371 PERROR("kernctl_get_content_size");
1372 goto error;
1373 }
1374
1375 ret = kernctl_get_packet_size(infd, &packet_size);
1376 if (ret < 0) {
1377 PERROR("kernctl_get_packet_size");
1378 goto error;
1379 }
1380
1381 ret = kernctl_get_stream_id(infd, &stream_id);
1382 if (ret < 0) {
1383 PERROR("kernctl_get_stream_id");
1384 goto error;
1385 }
1386
1387 ret = kernctl_get_instance_id(infd, &stream_instance_id);
1388 if (ret < 0) {
1389 if (ret == -ENOTTY) {
1390 /* Command not implemented by lttng-modules. */
1391 stream_instance_id = -1ULL;
1392 } else {
1393 PERROR("kernctl_get_instance_id");
1394 goto error;
1395 }
1396 }
1397
1398 ret = kernctl_get_sequence_number(infd, &packet_seq_num);
1399 if (ret < 0) {
1400 if (ret == -ENOTTY) {
1401 /* Command not implemented by lttng-modules. */
1402 packet_seq_num = -1ULL;
1403 ret = 0;
1404 } else {
1405 PERROR("kernctl_get_sequence_number");
1406 goto error;
1407 }
1408 }
1409 index->packet_seq_num = htobe64(index->packet_seq_num);
1410
1411 *index = (typeof(*index)) {
1412 .offset = index->offset,
1413 .packet_size = htobe64(packet_size),
1414 .content_size = htobe64(content_size),
1415 .timestamp_begin = htobe64(timestamp_begin),
1416 .timestamp_end = htobe64(timestamp_end),
1417 .events_discarded = htobe64(events_discarded),
1418 .stream_id = htobe64(stream_id),
1419 .stream_instance_id = htobe64(stream_instance_id),
1420 .packet_seq_num = htobe64(packet_seq_num),
1421 };
1422
1423 error:
1424 return ret;
1425 }
1426 /*
1427 * Sync metadata meaning request them to the session daemon and snapshot to the
1428 * metadata thread can consumer them.
1429 *
1430 * Metadata stream lock MUST be acquired.
1431 *
1432 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1433 * is empty or a negative value on error.
1434 */
1435 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1436 {
1437 int ret;
1438
1439 assert(metadata);
1440
1441 ret = kernctl_buffer_flush(metadata->wait_fd);
1442 if (ret < 0) {
1443 ERR("Failed to flush kernel stream");
1444 goto end;
1445 }
1446
1447 ret = kernctl_snapshot(metadata->wait_fd);
1448 if (ret < 0) {
1449 if (ret != -EAGAIN) {
1450 ERR("Sync metadata, taking kernel snapshot failed.");
1451 goto end;
1452 }
1453 DBG("Sync metadata, no new kernel metadata");
1454 /* No new metadata, exit. */
1455 ret = ENODATA;
1456 goto end;
1457 }
1458
1459 end:
1460 return ret;
1461 }
1462
1463 static
1464 int update_stream_stats(struct lttng_consumer_stream *stream)
1465 {
1466 int ret;
1467 uint64_t seq, discarded;
1468
1469 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1470 if (ret < 0) {
1471 if (ret == -ENOTTY) {
1472 /* Command not implemented by lttng-modules. */
1473 seq = -1ULL;
1474 stream->sequence_number_unavailable = true;
1475 } else {
1476 PERROR("kernctl_get_sequence_number");
1477 goto end;
1478 }
1479 }
1480
1481 /*
1482 * Start the sequence when we extract the first packet in case we don't
1483 * start at 0 (for example if a consumer is not connected to the
1484 * session immediately after the beginning).
1485 */
1486 if (stream->last_sequence_number == -1ULL) {
1487 stream->last_sequence_number = seq;
1488 } else if (seq > stream->last_sequence_number) {
1489 stream->chan->lost_packets += seq -
1490 stream->last_sequence_number - 1;
1491 } else {
1492 /* seq <= last_sequence_number */
1493 ERR("Sequence number inconsistent : prev = %" PRIu64
1494 ", current = %" PRIu64,
1495 stream->last_sequence_number, seq);
1496 ret = -1;
1497 goto end;
1498 }
1499 stream->last_sequence_number = seq;
1500
1501 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1502 if (ret < 0) {
1503 PERROR("kernctl_get_events_discarded");
1504 goto end;
1505 }
1506 if (discarded < stream->last_discarded_events) {
1507 /*
1508 * Overflow has occurred. We assume only one wrap-around
1509 * has occurred.
1510 */
1511 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1512 stream->last_discarded_events + discarded;
1513 } else {
1514 stream->chan->discarded_events += discarded -
1515 stream->last_discarded_events;
1516 }
1517 stream->last_discarded_events = discarded;
1518 ret = 0;
1519
1520 end:
1521 return ret;
1522 }
1523
1524 /*
1525 * Check if the local version of the metadata stream matches with the version
1526 * of the metadata stream in the kernel. If it was updated, set the reset flag
1527 * on the stream.
1528 */
1529 static
1530 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1531 {
1532 int ret;
1533 uint64_t cur_version;
1534
1535 ret = kernctl_get_metadata_version(infd, &cur_version);
1536 if (ret < 0) {
1537 if (ret == -ENOTTY) {
1538 /*
1539 * LTTng-modules does not implement this
1540 * command.
1541 */
1542 ret = 0;
1543 goto end;
1544 }
1545 ERR("Failed to get the metadata version");
1546 goto end;
1547 }
1548
1549 if (stream->metadata_version == cur_version) {
1550 ret = 0;
1551 goto end;
1552 }
1553
1554 DBG("New metadata version detected");
1555 stream->metadata_version = cur_version;
1556 stream->reset_metadata_flag = 1;
1557 ret = 0;
1558
1559 end:
1560 return ret;
1561 }
1562
1563 /*
1564 * Consume data on a file descriptor and write it on a trace file.
1565 * The stream and channel locks must be held by the caller.
1566 */
1567 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1568 struct lttng_consumer_local_data *ctx)
1569 {
1570 unsigned long len, subbuf_size, padding;
1571 int err, write_index = 1, rotation_ret;
1572 ssize_t ret = 0;
1573 int infd = stream->wait_fd;
1574 struct ctf_packet_index index = {};
1575
1576 DBG("In read_subbuffer (infd : %d)", infd);
1577
1578 /*
1579 * If the stream was flagged to be ready for rotation before we extract the
1580 * next packet, rotate it now.
1581 */
1582 if (stream->rotate_ready) {
1583 DBG("Rotate stream before extracting data");
1584 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1585 if (rotation_ret < 0) {
1586 ERR("Stream rotation error");
1587 ret = -1;
1588 goto error;
1589 }
1590 }
1591
1592 /* Get the next subbuffer */
1593 err = kernctl_get_next_subbuf(infd);
1594 if (err != 0) {
1595 /*
1596 * This is a debug message even for single-threaded consumer,
1597 * because poll() have more relaxed criterions than get subbuf,
1598 * so get_subbuf may fail for short race windows where poll()
1599 * would issue wakeups.
1600 */
1601 DBG("Reserving sub buffer failed (everything is normal, "
1602 "it is due to concurrency)");
1603 ret = err;
1604 goto error;
1605 }
1606
1607 /* Get the full subbuffer size including padding */
1608 err = kernctl_get_padded_subbuf_size(infd, &len);
1609 if (err != 0) {
1610 PERROR("Getting sub-buffer len failed.");
1611 err = kernctl_put_subbuf(infd);
1612 if (err != 0) {
1613 if (err == -EFAULT) {
1614 PERROR("Error in unreserving sub buffer\n");
1615 } else if (err == -EIO) {
1616 /* Should never happen with newer LTTng versions */
1617 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1618 }
1619 ret = err;
1620 goto error;
1621 }
1622 ret = err;
1623 goto error;
1624 }
1625
1626 if (!stream->metadata_flag) {
1627 ret = get_index_values(&index, infd);
1628 if (ret < 0) {
1629 err = kernctl_put_subbuf(infd);
1630 if (err != 0) {
1631 if (err == -EFAULT) {
1632 PERROR("Error in unreserving sub buffer\n");
1633 } else if (err == -EIO) {
1634 /* Should never happen with newer LTTng versions */
1635 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1636 }
1637 ret = err;
1638 goto error;
1639 }
1640 goto error;
1641 }
1642 ret = update_stream_stats(stream);
1643 if (ret < 0) {
1644 err = kernctl_put_subbuf(infd);
1645 if (err != 0) {
1646 if (err == -EFAULT) {
1647 PERROR("Error in unreserving sub buffer\n");
1648 } else if (err == -EIO) {
1649 /* Should never happen with newer LTTng versions */
1650 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1651 }
1652 ret = err;
1653 goto error;
1654 }
1655 goto error;
1656 }
1657 } else {
1658 write_index = 0;
1659 ret = metadata_stream_check_version(infd, stream);
1660 if (ret < 0) {
1661 err = kernctl_put_subbuf(infd);
1662 if (err != 0) {
1663 if (err == -EFAULT) {
1664 PERROR("Error in unreserving sub buffer\n");
1665 } else if (err == -EIO) {
1666 /* Should never happen with newer LTTng versions */
1667 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1668 }
1669 ret = err;
1670 goto error;
1671 }
1672 goto error;
1673 }
1674 }
1675
1676 switch (stream->chan->output) {
1677 case CONSUMER_CHANNEL_SPLICE:
1678 /*
1679 * XXX: The lttng-modules splice "actor" does not handle copying
1680 * partial pages hence only using the subbuffer size without the
1681 * padding makes the splice fail.
1682 */
1683 subbuf_size = len;
1684 padding = 0;
1685
1686 /* splice the subbuffer to the tracefile */
1687 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1688 padding, &index);
1689 /*
1690 * XXX: Splice does not support network streaming so the return value
1691 * is simply checked against subbuf_size and not like the mmap() op.
1692 */
1693 if (ret != subbuf_size) {
1694 /*
1695 * display the error but continue processing to try
1696 * to release the subbuffer
1697 */
1698 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1699 ret, subbuf_size);
1700 write_index = 0;
1701 }
1702 break;
1703 case CONSUMER_CHANNEL_MMAP:
1704 /* Get subbuffer size without padding */
1705 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1706 if (err != 0) {
1707 PERROR("Getting sub-buffer len failed.");
1708 err = kernctl_put_subbuf(infd);
1709 if (err != 0) {
1710 if (err == -EFAULT) {
1711 PERROR("Error in unreserving sub buffer\n");
1712 } else if (err == -EIO) {
1713 /* Should never happen with newer LTTng versions */
1714 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1715 }
1716 ret = err;
1717 goto error;
1718 }
1719 ret = err;
1720 goto error;
1721 }
1722
1723 /* Make sure the tracer is not gone mad on us! */
1724 assert(len >= subbuf_size);
1725
1726 padding = len - subbuf_size;
1727
1728 /* write the subbuffer to the tracefile */
1729 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1730 padding, &index);
1731 /*
1732 * The mmap operation should write subbuf_size amount of data when
1733 * network streaming or the full padding (len) size when we are _not_
1734 * streaming.
1735 */
1736 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1737 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1738 /*
1739 * Display the error but continue processing to try to release the
1740 * subbuffer. This is a DBG statement since this is possible to
1741 * happen without being a critical error.
1742 */
1743 DBG("Error writing to tracefile "
1744 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1745 ret, len, subbuf_size);
1746 write_index = 0;
1747 }
1748 break;
1749 default:
1750 ERR("Unknown output method");
1751 ret = -EPERM;
1752 }
1753
1754 err = kernctl_put_next_subbuf(infd);
1755 if (err != 0) {
1756 if (err == -EFAULT) {
1757 PERROR("Error in unreserving sub buffer\n");
1758 } else if (err == -EIO) {
1759 /* Should never happen with newer LTTng versions */
1760 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1761 }
1762 ret = err;
1763 goto error;
1764 }
1765
1766 /* Write index if needed. */
1767 if (!write_index) {
1768 goto rotate;
1769 }
1770
1771 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1772 /*
1773 * In live, block until all the metadata is sent.
1774 */
1775 pthread_mutex_lock(&stream->metadata_timer_lock);
1776 assert(!stream->missed_metadata_flush);
1777 stream->waiting_on_metadata = true;
1778 pthread_mutex_unlock(&stream->metadata_timer_lock);
1779
1780 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1781
1782 pthread_mutex_lock(&stream->metadata_timer_lock);
1783 stream->waiting_on_metadata = false;
1784 if (stream->missed_metadata_flush) {
1785 stream->missed_metadata_flush = false;
1786 pthread_mutex_unlock(&stream->metadata_timer_lock);
1787 (void) consumer_flush_kernel_index(stream);
1788 } else {
1789 pthread_mutex_unlock(&stream->metadata_timer_lock);
1790 }
1791 if (err < 0) {
1792 goto error;
1793 }
1794 }
1795
1796 err = consumer_stream_write_index(stream, &index);
1797 if (err < 0) {
1798 goto error;
1799 }
1800
1801 rotate:
1802 /*
1803 * After extracting the packet, we check if the stream is now ready to be
1804 * rotated and perform the action immediately.
1805 */
1806 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1807 if (rotation_ret == 1) {
1808 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1809 if (rotation_ret < 0) {
1810 ERR("Stream rotation error");
1811 ret = -1;
1812 goto error;
1813 }
1814 } else if (rotation_ret < 0) {
1815 ERR("Checking if stream is ready to rotate");
1816 ret = -1;
1817 goto error;
1818 }
1819
1820 error:
1821 return ret;
1822 }
1823
1824 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1825 {
1826 int ret;
1827
1828 assert(stream);
1829
1830 /*
1831 * Don't create anything if this is set for streaming or if there is
1832 * no current trace chunk on the parent channel.
1833 */
1834 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1835 stream->chan->trace_chunk) {
1836 ret = consumer_stream_create_output_files(stream, true);
1837 if (ret) {
1838 goto error;
1839 }
1840 }
1841
1842 if (stream->output == LTTNG_EVENT_MMAP) {
1843 /* get the len of the mmap region */
1844 unsigned long mmap_len;
1845
1846 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1847 if (ret != 0) {
1848 PERROR("kernctl_get_mmap_len");
1849 goto error_close_fd;
1850 }
1851 stream->mmap_len = (size_t) mmap_len;
1852
1853 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1854 MAP_PRIVATE, stream->wait_fd, 0);
1855 if (stream->mmap_base == MAP_FAILED) {
1856 PERROR("Error mmaping");
1857 ret = -1;
1858 goto error_close_fd;
1859 }
1860 }
1861
1862 /* we return 0 to let the library handle the FD internally */
1863 return 0;
1864
1865 error_close_fd:
1866 if (stream->out_fd >= 0) {
1867 int err;
1868
1869 err = close(stream->out_fd);
1870 assert(!err);
1871 stream->out_fd = -1;
1872 }
1873 error:
1874 return ret;
1875 }
1876
1877 /*
1878 * Check if data is still being extracted from the buffers for a specific
1879 * stream. Consumer data lock MUST be acquired before calling this function
1880 * and the stream lock.
1881 *
1882 * Return 1 if the traced data are still getting read else 0 meaning that the
1883 * data is available for trace viewer reading.
1884 */
1885 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1886 {
1887 int ret;
1888
1889 assert(stream);
1890
1891 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1892 ret = 0;
1893 goto end;
1894 }
1895
1896 ret = kernctl_get_next_subbuf(stream->wait_fd);
1897 if (ret == 0) {
1898 /* There is still data so let's put back this subbuffer. */
1899 ret = kernctl_put_subbuf(stream->wait_fd);
1900 assert(ret == 0);
1901 ret = 1; /* Data is pending */
1902 goto end;
1903 }
1904
1905 /* Data is NOT pending and ready to be read. */
1906 ret = 0;
1907
1908 end:
1909 return ret;
1910 }
This page took 0.108654 seconds and 5 git commands to generate.