Use case-scope error labels when handling LTTNG_CONSUMER_ADD_STREAM
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_add_stream_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto error_add_stream_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_add_stream_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 goto end;
619 }
620
621 health_code_update();
622
623 /*
624 * Send status code to session daemon only if the recv works. If the
625 * above recv() failed, the session daemon is notified through the
626 * error socket and the teardown is eventually done.
627 */
628 ret = consumer_send_status_msg(sock, ret_code);
629 if (ret < 0) {
630 /* Somehow, the session daemon is not responding anymore. */
631 goto error_add_stream_nosignal;
632 }
633
634 health_code_update();
635
636 pthread_mutex_lock(&channel->lock);
637 new_stream = consumer_allocate_stream(channel->key,
638 fd,
639 channel->name,
640 channel->relayd_id,
641 channel->session_id,
642 channel->trace_chunk,
643 msg.u.stream.cpu,
644 &alloc_ret,
645 channel->type,
646 channel->monitor);
647 if (new_stream == NULL) {
648 switch (alloc_ret) {
649 case -ENOMEM:
650 case -EINVAL:
651 default:
652 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
653 break;
654 }
655 pthread_mutex_unlock(&channel->lock);
656 goto error_add_stream_nosignal;
657 }
658
659 new_stream->chan = channel;
660 new_stream->wait_fd = fd;
661 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
662 &new_stream->max_sb_size);
663 if (ret < 0) {
664 pthread_mutex_unlock(&channel->lock);
665 ERR("Failed to get kernel maximal subbuffer size");
666 goto error_add_stream_nosignal;
667 }
668
669 consumer_stream_update_channel_attributes(new_stream,
670 channel);
671 switch (channel->output) {
672 case CONSUMER_CHANNEL_SPLICE:
673 new_stream->output = LTTNG_EVENT_SPLICE;
674 ret = utils_create_pipe(new_stream->splice_pipe);
675 if (ret < 0) {
676 pthread_mutex_unlock(&channel->lock);
677 goto error_add_stream_nosignal;
678 }
679 break;
680 case CONSUMER_CHANNEL_MMAP:
681 new_stream->output = LTTNG_EVENT_MMAP;
682 break;
683 default:
684 ERR("Stream output unknown %d", channel->output);
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 /*
690 * We've just assigned the channel to the stream so increment the
691 * refcount right now. We don't need to increment the refcount for
692 * streams in no monitor because we handle manually the cleanup of
693 * those. It is very important to make sure there is NO prior
694 * consumer_del_stream() calls or else the refcount will be unbalanced.
695 */
696 if (channel->monitor) {
697 uatomic_inc(&new_stream->chan->refcount);
698 }
699
700 /*
701 * The buffer flush is done on the session daemon side for the kernel
702 * so no need for the stream "hangup_flush_done" variable to be
703 * tracked. This is important for a kernel stream since we don't rely
704 * on the flush state of the stream to read data. It's not the case for
705 * user space tracing.
706 */
707 new_stream->hangup_flush_done = 0;
708
709 health_code_update();
710
711 pthread_mutex_lock(&new_stream->lock);
712 if (ctx->on_recv_stream) {
713 ret = ctx->on_recv_stream(new_stream);
714 if (ret < 0) {
715 pthread_mutex_unlock(&new_stream->lock);
716 pthread_mutex_unlock(&channel->lock);
717 consumer_stream_free(new_stream);
718 goto error_add_stream_nosignal;
719 }
720 }
721 health_code_update();
722
723 if (new_stream->metadata_flag) {
724 channel->metadata_stream = new_stream;
725 }
726
727 /* Do not monitor this stream. */
728 if (!channel->monitor) {
729 DBG("Kernel consumer add stream %s in no monitor mode with "
730 "relayd id %" PRIu64, new_stream->name,
731 new_stream->net_seq_idx);
732 cds_list_add(&new_stream->send_node, &channel->streams.head);
733 pthread_mutex_unlock(&new_stream->lock);
734 pthread_mutex_unlock(&channel->lock);
735 goto end_add_stream;
736 }
737
738 /* Send stream to relayd if the stream has an ID. */
739 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
740 ret = consumer_send_relayd_stream(new_stream,
741 new_stream->chan->pathname);
742 if (ret < 0) {
743 pthread_mutex_unlock(&new_stream->lock);
744 pthread_mutex_unlock(&channel->lock);
745 consumer_stream_free(new_stream);
746 goto error_add_stream_nosignal;
747 }
748
749 /*
750 * If adding an extra stream to an already
751 * existing channel (e.g. cpu hotplug), we need
752 * to send the "streams_sent" command to relayd.
753 */
754 if (channel->streams_sent_to_relayd) {
755 ret = consumer_send_relayd_streams_sent(
756 new_stream->net_seq_idx);
757 if (ret < 0) {
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto error_add_stream_nosignal;
761 }
762 }
763 }
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766
767 /* Get the right pipe where the stream will be sent. */
768 if (new_stream->metadata_flag) {
769 consumer_add_metadata_stream(new_stream);
770 stream_pipe = ctx->consumer_metadata_pipe;
771 } else {
772 consumer_add_data_stream(new_stream);
773 stream_pipe = ctx->consumer_data_pipe;
774 }
775
776 /* Visible to other threads */
777 new_stream->globally_visible = 1;
778
779 health_code_update();
780
781 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
782 if (ret < 0) {
783 ERR("Consumer write %s stream to pipe %d",
784 new_stream->metadata_flag ? "metadata" : "data",
785 lttng_pipe_get_writefd(stream_pipe));
786 if (new_stream->metadata_flag) {
787 consumer_del_stream_for_metadata(new_stream);
788 } else {
789 consumer_del_stream_for_data(new_stream);
790 }
791 goto error_add_stream_nosignal;
792 }
793
794 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
795 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
796 end_add_stream:
797 break;
798 error_add_stream_nosignal:
799 goto end_nosignal;
800 error_add_stream_fatal:
801 goto error_fatal;
802 }
803 case LTTNG_CONSUMER_STREAMS_SENT:
804 {
805 struct lttng_consumer_channel *channel;
806
807 /*
808 * Get stream's channel reference. Needed when adding the stream to the
809 * global hash table.
810 */
811 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
812 if (!channel) {
813 /*
814 * We could not find the channel. Can happen if cpu hotplug
815 * happens while tearing down.
816 */
817 ERR("Unable to find channel key %" PRIu64,
818 msg.u.sent_streams.channel_key);
819 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
820 }
821
822 health_code_update();
823
824 /*
825 * Send status code to session daemon.
826 */
827 ret = consumer_send_status_msg(sock, ret_code);
828 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
829 /* Somehow, the session daemon is not responding anymore. */
830 goto end_nosignal;
831 }
832
833 health_code_update();
834
835 /*
836 * We should not send this message if we don't monitor the
837 * streams in this channel.
838 */
839 if (!channel->monitor) {
840 break;
841 }
842
843 health_code_update();
844 /* Send stream to relayd if the stream has an ID. */
845 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
846 ret = consumer_send_relayd_streams_sent(
847 msg.u.sent_streams.net_seq_idx);
848 if (ret < 0) {
849 goto end_nosignal;
850 }
851 channel->streams_sent_to_relayd = true;
852 }
853 break;
854 }
855 case LTTNG_CONSUMER_UPDATE_STREAM:
856 {
857 rcu_read_unlock();
858 return -ENOSYS;
859 }
860 case LTTNG_CONSUMER_DESTROY_RELAYD:
861 {
862 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
863 struct consumer_relayd_sock_pair *relayd;
864
865 DBG("Kernel consumer destroying relayd %" PRIu64, index);
866
867 /* Get relayd reference if exists. */
868 relayd = consumer_find_relayd(index);
869 if (relayd == NULL) {
870 DBG("Unable to find relayd %" PRIu64, index);
871 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
872 }
873
874 /*
875 * Each relayd socket pair has a refcount of stream attached to it
876 * which tells if the relayd is still active or not depending on the
877 * refcount value.
878 *
879 * This will set the destroy flag of the relayd object and destroy it
880 * if the refcount reaches zero when called.
881 *
882 * The destroy can happen either here or when a stream fd hangs up.
883 */
884 if (relayd) {
885 consumer_flag_relayd_for_destroy(relayd);
886 }
887
888 health_code_update();
889
890 ret = consumer_send_status_msg(sock, ret_code);
891 if (ret < 0) {
892 /* Somehow, the session daemon is not responding anymore. */
893 goto error_fatal;
894 }
895
896 goto end_nosignal;
897 }
898 case LTTNG_CONSUMER_DATA_PENDING:
899 {
900 int32_t ret;
901 uint64_t id = msg.u.data_pending.session_id;
902
903 DBG("Kernel consumer data pending command for id %" PRIu64, id);
904
905 ret = consumer_data_pending(id);
906
907 health_code_update();
908
909 /* Send back returned value to session daemon */
910 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
911 if (ret < 0) {
912 PERROR("send data pending ret code");
913 goto error_fatal;
914 }
915
916 /*
917 * No need to send back a status message since the data pending
918 * returned value is the response.
919 */
920 break;
921 }
922 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
923 {
924 struct lttng_consumer_channel *channel;
925 uint64_t key = msg.u.snapshot_channel.key;
926
927 channel = consumer_find_channel(key);
928 if (!channel) {
929 ERR("Channel %" PRIu64 " not found", key);
930 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
931 } else {
932 pthread_mutex_lock(&channel->lock);
933 if (msg.u.snapshot_channel.metadata == 1) {
934 ret = lttng_kconsumer_snapshot_metadata(channel, key,
935 msg.u.snapshot_channel.pathname,
936 msg.u.snapshot_channel.relayd_id, ctx);
937 if (ret < 0) {
938 ERR("Snapshot metadata failed");
939 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
940 }
941 } else {
942 ret = lttng_kconsumer_snapshot_channel(channel, key,
943 msg.u.snapshot_channel.pathname,
944 msg.u.snapshot_channel.relayd_id,
945 msg.u.snapshot_channel.nb_packets_per_stream,
946 ctx);
947 if (ret < 0) {
948 ERR("Snapshot channel failed");
949 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
950 }
951 }
952 pthread_mutex_unlock(&channel->lock);
953 }
954 health_code_update();
955
956 ret = consumer_send_status_msg(sock, ret_code);
957 if (ret < 0) {
958 /* Somehow, the session daemon is not responding anymore. */
959 goto end_nosignal;
960 }
961 break;
962 }
963 case LTTNG_CONSUMER_DESTROY_CHANNEL:
964 {
965 uint64_t key = msg.u.destroy_channel.key;
966 struct lttng_consumer_channel *channel;
967
968 channel = consumer_find_channel(key);
969 if (!channel) {
970 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
971 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
972 }
973
974 health_code_update();
975
976 ret = consumer_send_status_msg(sock, ret_code);
977 if (ret < 0) {
978 /* Somehow, the session daemon is not responding anymore. */
979 goto end_nosignal;
980 }
981
982 health_code_update();
983
984 /* Stop right now if no channel was found. */
985 if (!channel) {
986 goto end_nosignal;
987 }
988
989 /*
990 * This command should ONLY be issued for channel with streams set in
991 * no monitor mode.
992 */
993 assert(!channel->monitor);
994
995 /*
996 * The refcount should ALWAYS be 0 in the case of a channel in no
997 * monitor mode.
998 */
999 assert(!uatomic_sub_return(&channel->refcount, 1));
1000
1001 consumer_del_channel(channel);
1002
1003 goto end_nosignal;
1004 }
1005 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1006 {
1007 ssize_t ret;
1008 uint64_t count;
1009 struct lttng_consumer_channel *channel;
1010 uint64_t id = msg.u.discarded_events.session_id;
1011 uint64_t key = msg.u.discarded_events.channel_key;
1012
1013 DBG("Kernel consumer discarded events command for session id %"
1014 PRIu64 ", channel key %" PRIu64, id, key);
1015
1016 channel = consumer_find_channel(key);
1017 if (!channel) {
1018 ERR("Kernel consumer discarded events channel %"
1019 PRIu64 " not found", key);
1020 count = 0;
1021 } else {
1022 count = channel->discarded_events;
1023 }
1024
1025 health_code_update();
1026
1027 /* Send back returned value to session daemon */
1028 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1029 if (ret < 0) {
1030 PERROR("send discarded events");
1031 goto error_fatal;
1032 }
1033
1034 break;
1035 }
1036 case LTTNG_CONSUMER_LOST_PACKETS:
1037 {
1038 ssize_t ret;
1039 uint64_t count;
1040 struct lttng_consumer_channel *channel;
1041 uint64_t id = msg.u.lost_packets.session_id;
1042 uint64_t key = msg.u.lost_packets.channel_key;
1043
1044 DBG("Kernel consumer lost packets command for session id %"
1045 PRIu64 ", channel key %" PRIu64, id, key);
1046
1047 channel = consumer_find_channel(key);
1048 if (!channel) {
1049 ERR("Kernel consumer lost packets channel %"
1050 PRIu64 " not found", key);
1051 count = 0;
1052 } else {
1053 count = channel->lost_packets;
1054 }
1055
1056 health_code_update();
1057
1058 /* Send back returned value to session daemon */
1059 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1060 if (ret < 0) {
1061 PERROR("send lost packets");
1062 goto error_fatal;
1063 }
1064
1065 break;
1066 }
1067 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1068 {
1069 int channel_monitor_pipe;
1070
1071 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1072 /* Successfully received the command's type. */
1073 ret = consumer_send_status_msg(sock, ret_code);
1074 if (ret < 0) {
1075 goto error_fatal;
1076 }
1077
1078 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1079 1);
1080 if (ret != sizeof(channel_monitor_pipe)) {
1081 ERR("Failed to receive channel monitor pipe");
1082 goto error_fatal;
1083 }
1084
1085 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1086 ret = consumer_timer_thread_set_channel_monitor_pipe(
1087 channel_monitor_pipe);
1088 if (!ret) {
1089 int flags;
1090
1091 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1092 /* Set the pipe as non-blocking. */
1093 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1094 if (ret == -1) {
1095 PERROR("fcntl get flags of the channel monitoring pipe");
1096 goto error_fatal;
1097 }
1098 flags = ret;
1099
1100 ret = fcntl(channel_monitor_pipe, F_SETFL,
1101 flags | O_NONBLOCK);
1102 if (ret == -1) {
1103 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1104 goto error_fatal;
1105 }
1106 DBG("Channel monitor pipe set as non-blocking");
1107 } else {
1108 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1109 }
1110 ret = consumer_send_status_msg(sock, ret_code);
1111 if (ret < 0) {
1112 goto error_fatal;
1113 }
1114 break;
1115 }
1116 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1117 {
1118 struct lttng_consumer_channel *channel;
1119 uint64_t key = msg.u.rotate_channel.key;
1120
1121 DBG("Consumer rotate channel %" PRIu64, key);
1122
1123 channel = consumer_find_channel(key);
1124 if (!channel) {
1125 ERR("Channel %" PRIu64 " not found", key);
1126 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1127 } else {
1128 /*
1129 * Sample the rotate position of all the streams in this channel.
1130 */
1131 ret = lttng_consumer_rotate_channel(channel, key,
1132 msg.u.rotate_channel.relayd_id,
1133 msg.u.rotate_channel.metadata,
1134 ctx);
1135 if (ret < 0) {
1136 ERR("Rotate channel failed");
1137 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1138 }
1139
1140 health_code_update();
1141 }
1142 ret = consumer_send_status_msg(sock, ret_code);
1143 if (ret < 0) {
1144 /* Somehow, the session daemon is not responding anymore. */
1145 goto end_nosignal;
1146 }
1147 if (channel) {
1148 /* Rotate the streams that are ready right now. */
1149 ret = lttng_consumer_rotate_ready_streams(
1150 channel, key, ctx);
1151 if (ret < 0) {
1152 ERR("Rotate ready streams failed");
1153 }
1154 }
1155
1156 break;
1157 }
1158 case LTTNG_CONSUMER_INIT:
1159 {
1160 ret_code = lttng_consumer_init_command(ctx,
1161 msg.u.init.sessiond_uuid);
1162 health_code_update();
1163 ret = consumer_send_status_msg(sock, ret_code);
1164 if (ret < 0) {
1165 /* Somehow, the session daemon is not responding anymore. */
1166 goto end_nosignal;
1167 }
1168 break;
1169 }
1170 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1171 {
1172 const struct lttng_credentials credentials = {
1173 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1174 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1175 };
1176 const bool is_local_trace =
1177 !msg.u.create_trace_chunk.relayd_id.is_set;
1178 const uint64_t relayd_id =
1179 msg.u.create_trace_chunk.relayd_id.value;
1180 const char *chunk_override_name =
1181 *msg.u.create_trace_chunk.override_name ?
1182 msg.u.create_trace_chunk.override_name :
1183 NULL;
1184 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1185 LTTNG_OPTIONAL_INIT;
1186
1187 /*
1188 * The session daemon will only provide a chunk directory file
1189 * descriptor for local traces.
1190 */
1191 if (is_local_trace) {
1192 int chunk_dirfd;
1193
1194 /* Acnowledge the reception of the command. */
1195 ret = consumer_send_status_msg(sock,
1196 LTTCOMM_CONSUMERD_SUCCESS);
1197 if (ret < 0) {
1198 /* Somehow, the session daemon is not responding anymore. */
1199 goto end_nosignal;
1200 }
1201
1202 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1203 if (ret != sizeof(chunk_dirfd)) {
1204 ERR("Failed to receive trace chunk directory file descriptor");
1205 goto error_fatal;
1206 }
1207
1208 DBG("Received trace chunk directory fd (%d)",
1209 chunk_dirfd);
1210 ret = lttng_directory_handle_init_from_dirfd(
1211 &chunk_directory_handle.value,
1212 chunk_dirfd);
1213 if (ret) {
1214 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1215 if (close(chunk_dirfd)) {
1216 PERROR("Failed to close chunk directory file descriptor");
1217 }
1218 goto error_fatal;
1219 }
1220 chunk_directory_handle.is_set = true;
1221 }
1222
1223 ret_code = lttng_consumer_create_trace_chunk(
1224 !is_local_trace ? &relayd_id : NULL,
1225 msg.u.create_trace_chunk.session_id,
1226 msg.u.create_trace_chunk.chunk_id,
1227 (time_t) msg.u.create_trace_chunk
1228 .creation_timestamp,
1229 chunk_override_name,
1230 msg.u.create_trace_chunk.credentials.is_set ?
1231 &credentials :
1232 NULL,
1233 chunk_directory_handle.is_set ?
1234 &chunk_directory_handle.value :
1235 NULL);
1236
1237 if (chunk_directory_handle.is_set) {
1238 lttng_directory_handle_fini(
1239 &chunk_directory_handle.value);
1240 }
1241 goto end_msg_sessiond;
1242 }
1243 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1244 {
1245 enum lttng_trace_chunk_command_type close_command =
1246 msg.u.close_trace_chunk.close_command.value;
1247 const uint64_t relayd_id =
1248 msg.u.close_trace_chunk.relayd_id.value;
1249
1250 ret_code = lttng_consumer_close_trace_chunk(
1251 msg.u.close_trace_chunk.relayd_id.is_set ?
1252 &relayd_id :
1253 NULL,
1254 msg.u.close_trace_chunk.session_id,
1255 msg.u.close_trace_chunk.chunk_id,
1256 (time_t) msg.u.close_trace_chunk.close_timestamp,
1257 msg.u.close_trace_chunk.close_command.is_set ?
1258 &close_command :
1259 NULL);
1260 goto end_msg_sessiond;
1261 }
1262 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1263 {
1264 const uint64_t relayd_id =
1265 msg.u.trace_chunk_exists.relayd_id.value;
1266
1267 ret_code = lttng_consumer_trace_chunk_exists(
1268 msg.u.trace_chunk_exists.relayd_id.is_set ?
1269 &relayd_id : NULL,
1270 msg.u.trace_chunk_exists.session_id,
1271 msg.u.trace_chunk_exists.chunk_id);
1272 goto end_msg_sessiond;
1273 }
1274 default:
1275 goto end_nosignal;
1276 }
1277
1278 end_nosignal:
1279 /*
1280 * Return 1 to indicate success since the 0 value can be a socket
1281 * shutdown during the recv() or send() call.
1282 */
1283 ret = 1;
1284 goto end;
1285 error_fatal:
1286 /* This will issue a consumer stop. */
1287 ret = -1;
1288 goto end;
1289 end_msg_sessiond:
1290 /*
1291 * The returned value here is not useful since either way we'll return 1 to
1292 * the caller because the session daemon socket management is done
1293 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1294 */
1295 ret = consumer_send_status_msg(sock, ret_code);
1296 if (ret < 0) {
1297 goto error_fatal;
1298 }
1299 ret = 1;
1300 end:
1301 health_code_update();
1302 rcu_read_unlock();
1303 return ret;
1304 }
1305
1306 /*
1307 * Populate index values of a kernel stream. Values are set in big endian order.
1308 *
1309 * Return 0 on success or else a negative value.
1310 */
1311 static int get_index_values(struct ctf_packet_index *index, int infd)
1312 {
1313 int ret;
1314
1315 ret = kernctl_get_timestamp_begin(infd, &index->timestamp_begin);
1316 if (ret < 0) {
1317 PERROR("kernctl_get_timestamp_begin");
1318 goto error;
1319 }
1320 index->timestamp_begin = htobe64(index->timestamp_begin);
1321
1322 ret = kernctl_get_timestamp_end(infd, &index->timestamp_end);
1323 if (ret < 0) {
1324 PERROR("kernctl_get_timestamp_end");
1325 goto error;
1326 }
1327 index->timestamp_end = htobe64(index->timestamp_end);
1328
1329 ret = kernctl_get_events_discarded(infd, &index->events_discarded);
1330 if (ret < 0) {
1331 PERROR("kernctl_get_events_discarded");
1332 goto error;
1333 }
1334 index->events_discarded = htobe64(index->events_discarded);
1335
1336 ret = kernctl_get_content_size(infd, &index->content_size);
1337 if (ret < 0) {
1338 PERROR("kernctl_get_content_size");
1339 goto error;
1340 }
1341 index->content_size = htobe64(index->content_size);
1342
1343 ret = kernctl_get_packet_size(infd, &index->packet_size);
1344 if (ret < 0) {
1345 PERROR("kernctl_get_packet_size");
1346 goto error;
1347 }
1348 index->packet_size = htobe64(index->packet_size);
1349
1350 ret = kernctl_get_stream_id(infd, &index->stream_id);
1351 if (ret < 0) {
1352 PERROR("kernctl_get_stream_id");
1353 goto error;
1354 }
1355 index->stream_id = htobe64(index->stream_id);
1356
1357 ret = kernctl_get_instance_id(infd, &index->stream_instance_id);
1358 if (ret < 0) {
1359 if (ret == -ENOTTY) {
1360 /* Command not implemented by lttng-modules. */
1361 index->stream_instance_id = -1ULL;
1362 } else {
1363 PERROR("kernctl_get_instance_id");
1364 goto error;
1365 }
1366 }
1367 index->stream_instance_id = htobe64(index->stream_instance_id);
1368
1369 ret = kernctl_get_sequence_number(infd, &index->packet_seq_num);
1370 if (ret < 0) {
1371 if (ret == -ENOTTY) {
1372 /* Command not implemented by lttng-modules. */
1373 index->packet_seq_num = -1ULL;
1374 ret = 0;
1375 } else {
1376 PERROR("kernctl_get_sequence_number");
1377 goto error;
1378 }
1379 }
1380 index->packet_seq_num = htobe64(index->packet_seq_num);
1381
1382 error:
1383 return ret;
1384 }
1385 /*
1386 * Sync metadata meaning request them to the session daemon and snapshot to the
1387 * metadata thread can consumer them.
1388 *
1389 * Metadata stream lock MUST be acquired.
1390 *
1391 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1392 * is empty or a negative value on error.
1393 */
1394 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1395 {
1396 int ret;
1397
1398 assert(metadata);
1399
1400 ret = kernctl_buffer_flush(metadata->wait_fd);
1401 if (ret < 0) {
1402 ERR("Failed to flush kernel stream");
1403 goto end;
1404 }
1405
1406 ret = kernctl_snapshot(metadata->wait_fd);
1407 if (ret < 0) {
1408 if (ret != -EAGAIN) {
1409 ERR("Sync metadata, taking kernel snapshot failed.");
1410 goto end;
1411 }
1412 DBG("Sync metadata, no new kernel metadata");
1413 /* No new metadata, exit. */
1414 ret = ENODATA;
1415 goto end;
1416 }
1417
1418 end:
1419 return ret;
1420 }
1421
1422 static
1423 int update_stream_stats(struct lttng_consumer_stream *stream)
1424 {
1425 int ret;
1426 uint64_t seq, discarded;
1427
1428 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1429 if (ret < 0) {
1430 if (ret == -ENOTTY) {
1431 /* Command not implemented by lttng-modules. */
1432 seq = -1ULL;
1433 } else {
1434 PERROR("kernctl_get_sequence_number");
1435 goto end;
1436 }
1437 }
1438
1439 /*
1440 * Start the sequence when we extract the first packet in case we don't
1441 * start at 0 (for example if a consumer is not connected to the
1442 * session immediately after the beginning).
1443 */
1444 if (stream->last_sequence_number == -1ULL) {
1445 stream->last_sequence_number = seq;
1446 } else if (seq > stream->last_sequence_number) {
1447 stream->chan->lost_packets += seq -
1448 stream->last_sequence_number - 1;
1449 } else {
1450 /* seq <= last_sequence_number */
1451 ERR("Sequence number inconsistent : prev = %" PRIu64
1452 ", current = %" PRIu64,
1453 stream->last_sequence_number, seq);
1454 ret = -1;
1455 goto end;
1456 }
1457 stream->last_sequence_number = seq;
1458
1459 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1460 if (ret < 0) {
1461 PERROR("kernctl_get_events_discarded");
1462 goto end;
1463 }
1464 if (discarded < stream->last_discarded_events) {
1465 /*
1466 * Overflow has occurred. We assume only one wrap-around
1467 * has occurred.
1468 */
1469 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1470 stream->last_discarded_events + discarded;
1471 } else {
1472 stream->chan->discarded_events += discarded -
1473 stream->last_discarded_events;
1474 }
1475 stream->last_discarded_events = discarded;
1476 ret = 0;
1477
1478 end:
1479 return ret;
1480 }
1481
1482 /*
1483 * Check if the local version of the metadata stream matches with the version
1484 * of the metadata stream in the kernel. If it was updated, set the reset flag
1485 * on the stream.
1486 */
1487 static
1488 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1489 {
1490 int ret;
1491 uint64_t cur_version;
1492
1493 ret = kernctl_get_metadata_version(infd, &cur_version);
1494 if (ret < 0) {
1495 if (ret == -ENOTTY) {
1496 /*
1497 * LTTng-modules does not implement this
1498 * command.
1499 */
1500 ret = 0;
1501 goto end;
1502 }
1503 ERR("Failed to get the metadata version");
1504 goto end;
1505 }
1506
1507 if (stream->metadata_version == cur_version) {
1508 ret = 0;
1509 goto end;
1510 }
1511
1512 DBG("New metadata version detected");
1513 stream->metadata_version = cur_version;
1514 stream->reset_metadata_flag = 1;
1515 ret = 0;
1516
1517 end:
1518 return ret;
1519 }
1520
1521 /*
1522 * Consume data on a file descriptor and write it on a trace file.
1523 * The stream and channel locks must be held by the caller.
1524 */
1525 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1526 struct lttng_consumer_local_data *ctx)
1527 {
1528 unsigned long len, subbuf_size, padding;
1529 int err, write_index = 1, rotation_ret;
1530 ssize_t ret = 0;
1531 int infd = stream->wait_fd;
1532 struct ctf_packet_index index;
1533
1534 DBG("In read_subbuffer (infd : %d)", infd);
1535
1536 /*
1537 * If the stream was flagged to be ready for rotation before we extract the
1538 * next packet, rotate it now.
1539 */
1540 if (stream->rotate_ready) {
1541 DBG("Rotate stream before extracting data");
1542 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1543 if (rotation_ret < 0) {
1544 ERR("Stream rotation error");
1545 ret = -1;
1546 goto error;
1547 }
1548 }
1549
1550 /* Get the next subbuffer */
1551 err = kernctl_get_next_subbuf(infd);
1552 if (err != 0) {
1553 /*
1554 * This is a debug message even for single-threaded consumer,
1555 * because poll() have more relaxed criterions than get subbuf,
1556 * so get_subbuf may fail for short race windows where poll()
1557 * would issue wakeups.
1558 */
1559 DBG("Reserving sub buffer failed (everything is normal, "
1560 "it is due to concurrency)");
1561 ret = err;
1562 goto error;
1563 }
1564
1565 /* Get the full subbuffer size including padding */
1566 err = kernctl_get_padded_subbuf_size(infd, &len);
1567 if (err != 0) {
1568 PERROR("Getting sub-buffer len failed.");
1569 err = kernctl_put_subbuf(infd);
1570 if (err != 0) {
1571 if (err == -EFAULT) {
1572 PERROR("Error in unreserving sub buffer\n");
1573 } else if (err == -EIO) {
1574 /* Should never happen with newer LTTng versions */
1575 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1576 }
1577 ret = err;
1578 goto error;
1579 }
1580 ret = err;
1581 goto error;
1582 }
1583
1584 if (!stream->metadata_flag) {
1585 ret = get_index_values(&index, infd);
1586 if (ret < 0) {
1587 err = kernctl_put_subbuf(infd);
1588 if (err != 0) {
1589 if (err == -EFAULT) {
1590 PERROR("Error in unreserving sub buffer\n");
1591 } else if (err == -EIO) {
1592 /* Should never happen with newer LTTng versions */
1593 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1594 }
1595 ret = err;
1596 goto error;
1597 }
1598 goto error;
1599 }
1600 ret = update_stream_stats(stream);
1601 if (ret < 0) {
1602 err = kernctl_put_subbuf(infd);
1603 if (err != 0) {
1604 if (err == -EFAULT) {
1605 PERROR("Error in unreserving sub buffer\n");
1606 } else if (err == -EIO) {
1607 /* Should never happen with newer LTTng versions */
1608 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1609 }
1610 ret = err;
1611 goto error;
1612 }
1613 goto error;
1614 }
1615 } else {
1616 write_index = 0;
1617 ret = metadata_stream_check_version(infd, stream);
1618 if (ret < 0) {
1619 err = kernctl_put_subbuf(infd);
1620 if (err != 0) {
1621 if (err == -EFAULT) {
1622 PERROR("Error in unreserving sub buffer\n");
1623 } else if (err == -EIO) {
1624 /* Should never happen with newer LTTng versions */
1625 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1626 }
1627 ret = err;
1628 goto error;
1629 }
1630 goto error;
1631 }
1632 }
1633
1634 switch (stream->chan->output) {
1635 case CONSUMER_CHANNEL_SPLICE:
1636 /*
1637 * XXX: The lttng-modules splice "actor" does not handle copying
1638 * partial pages hence only using the subbuffer size without the
1639 * padding makes the splice fail.
1640 */
1641 subbuf_size = len;
1642 padding = 0;
1643
1644 /* splice the subbuffer to the tracefile */
1645 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1646 padding, &index);
1647 /*
1648 * XXX: Splice does not support network streaming so the return value
1649 * is simply checked against subbuf_size and not like the mmap() op.
1650 */
1651 if (ret != subbuf_size) {
1652 /*
1653 * display the error but continue processing to try
1654 * to release the subbuffer
1655 */
1656 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1657 ret, subbuf_size);
1658 write_index = 0;
1659 }
1660 break;
1661 case CONSUMER_CHANNEL_MMAP:
1662 /* Get subbuffer size without padding */
1663 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1664 if (err != 0) {
1665 PERROR("Getting sub-buffer len failed.");
1666 err = kernctl_put_subbuf(infd);
1667 if (err != 0) {
1668 if (err == -EFAULT) {
1669 PERROR("Error in unreserving sub buffer\n");
1670 } else if (err == -EIO) {
1671 /* Should never happen with newer LTTng versions */
1672 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1673 }
1674 ret = err;
1675 goto error;
1676 }
1677 ret = err;
1678 goto error;
1679 }
1680
1681 /* Make sure the tracer is not gone mad on us! */
1682 assert(len >= subbuf_size);
1683
1684 padding = len - subbuf_size;
1685
1686 /* write the subbuffer to the tracefile */
1687 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1688 padding, &index);
1689 /*
1690 * The mmap operation should write subbuf_size amount of data when
1691 * network streaming or the full padding (len) size when we are _not_
1692 * streaming.
1693 */
1694 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1695 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1696 /*
1697 * Display the error but continue processing to try to release the
1698 * subbuffer. This is a DBG statement since this is possible to
1699 * happen without being a critical error.
1700 */
1701 DBG("Error writing to tracefile "
1702 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1703 ret, len, subbuf_size);
1704 write_index = 0;
1705 }
1706 break;
1707 default:
1708 ERR("Unknown output method");
1709 ret = -EPERM;
1710 }
1711
1712 err = kernctl_put_next_subbuf(infd);
1713 if (err != 0) {
1714 if (err == -EFAULT) {
1715 PERROR("Error in unreserving sub buffer\n");
1716 } else if (err == -EIO) {
1717 /* Should never happen with newer LTTng versions */
1718 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1719 }
1720 ret = err;
1721 goto error;
1722 }
1723
1724 /* Write index if needed. */
1725 if (!write_index) {
1726 goto rotate;
1727 }
1728
1729 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1730 /*
1731 * In live, block until all the metadata is sent.
1732 */
1733 pthread_mutex_lock(&stream->metadata_timer_lock);
1734 assert(!stream->missed_metadata_flush);
1735 stream->waiting_on_metadata = true;
1736 pthread_mutex_unlock(&stream->metadata_timer_lock);
1737
1738 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1739
1740 pthread_mutex_lock(&stream->metadata_timer_lock);
1741 stream->waiting_on_metadata = false;
1742 if (stream->missed_metadata_flush) {
1743 stream->missed_metadata_flush = false;
1744 pthread_mutex_unlock(&stream->metadata_timer_lock);
1745 (void) consumer_flush_kernel_index(stream);
1746 } else {
1747 pthread_mutex_unlock(&stream->metadata_timer_lock);
1748 }
1749 if (err < 0) {
1750 goto error;
1751 }
1752 }
1753
1754 err = consumer_stream_write_index(stream, &index);
1755 if (err < 0) {
1756 goto error;
1757 }
1758
1759 rotate:
1760 /*
1761 * After extracting the packet, we check if the stream is now ready to be
1762 * rotated and perform the action immediately.
1763 */
1764 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1765 if (rotation_ret == 1) {
1766 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1767 if (rotation_ret < 0) {
1768 ERR("Stream rotation error");
1769 ret = -1;
1770 goto error;
1771 }
1772 } else if (rotation_ret < 0) {
1773 ERR("Checking if stream is ready to rotate");
1774 ret = -1;
1775 goto error;
1776 }
1777
1778 error:
1779 return ret;
1780 }
1781
1782 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1783 {
1784 int ret;
1785
1786 assert(stream);
1787
1788 /*
1789 * Don't create anything if this is set for streaming or if there is
1790 * no current trace chunk on the parent channel.
1791 */
1792 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1793 stream->chan->trace_chunk) {
1794 ret = consumer_stream_create_output_files(stream, true);
1795 if (ret) {
1796 goto error;
1797 }
1798 }
1799
1800 if (stream->output == LTTNG_EVENT_MMAP) {
1801 /* get the len of the mmap region */
1802 unsigned long mmap_len;
1803
1804 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1805 if (ret != 0) {
1806 PERROR("kernctl_get_mmap_len");
1807 goto error_close_fd;
1808 }
1809 stream->mmap_len = (size_t) mmap_len;
1810
1811 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1812 MAP_PRIVATE, stream->wait_fd, 0);
1813 if (stream->mmap_base == MAP_FAILED) {
1814 PERROR("Error mmaping");
1815 ret = -1;
1816 goto error_close_fd;
1817 }
1818 }
1819
1820 /* we return 0 to let the library handle the FD internally */
1821 return 0;
1822
1823 error_close_fd:
1824 if (stream->out_fd >= 0) {
1825 int err;
1826
1827 err = close(stream->out_fd);
1828 assert(!err);
1829 stream->out_fd = -1;
1830 }
1831 error:
1832 return ret;
1833 }
1834
1835 /*
1836 * Check if data is still being extracted from the buffers for a specific
1837 * stream. Consumer data lock MUST be acquired before calling this function
1838 * and the stream lock.
1839 *
1840 * Return 1 if the traced data are still getting read else 0 meaning that the
1841 * data is available for trace viewer reading.
1842 */
1843 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1844 {
1845 int ret;
1846
1847 assert(stream);
1848
1849 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1850 ret = 0;
1851 goto end;
1852 }
1853
1854 ret = kernctl_get_next_subbuf(stream->wait_fd);
1855 if (ret == 0) {
1856 /* There is still data so let's put back this subbuffer. */
1857 ret = kernctl_put_subbuf(stream->wait_fd);
1858 assert(ret == 0);
1859 ret = 1; /* Data is pending */
1860 goto end;
1861 }
1862
1863 /* Data is NOT pending and ready to be read. */
1864 ret = 0;
1865
1866 end:
1867 return ret;
1868 }
This page took 0.110968 seconds and 4 git commands to generate.