95c92ba9b961d99821a80d21c339dc466af2924d
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_add_stream_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto error_add_stream_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_add_stream_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 goto end;
619 }
620
621 health_code_update();
622
623 /*
624 * Send status code to session daemon only if the recv works. If the
625 * above recv() failed, the session daemon is notified through the
626 * error socket and the teardown is eventually done.
627 */
628 ret = consumer_send_status_msg(sock, ret_code);
629 if (ret < 0) {
630 /* Somehow, the session daemon is not responding anymore. */
631 goto error_add_stream_nosignal;
632 }
633
634 health_code_update();
635
636 pthread_mutex_lock(&channel->lock);
637 new_stream = consumer_allocate_stream(channel->key,
638 fd,
639 channel->name,
640 channel->relayd_id,
641 channel->session_id,
642 channel->trace_chunk,
643 msg.u.stream.cpu,
644 &alloc_ret,
645 channel->type,
646 channel->monitor);
647 if (new_stream == NULL) {
648 switch (alloc_ret) {
649 case -ENOMEM:
650 case -EINVAL:
651 default:
652 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
653 break;
654 }
655 pthread_mutex_unlock(&channel->lock);
656 goto error_add_stream_nosignal;
657 }
658
659 new_stream->chan = channel;
660 new_stream->wait_fd = fd;
661 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
662 &new_stream->max_sb_size);
663 if (ret < 0) {
664 pthread_mutex_unlock(&channel->lock);
665 ERR("Failed to get kernel maximal subbuffer size");
666 goto error_add_stream_nosignal;
667 }
668
669 consumer_stream_update_channel_attributes(new_stream,
670 channel);
671 switch (channel->output) {
672 case CONSUMER_CHANNEL_SPLICE:
673 new_stream->output = LTTNG_EVENT_SPLICE;
674 ret = utils_create_pipe(new_stream->splice_pipe);
675 if (ret < 0) {
676 pthread_mutex_unlock(&channel->lock);
677 goto error_add_stream_nosignal;
678 }
679 break;
680 case CONSUMER_CHANNEL_MMAP:
681 new_stream->output = LTTNG_EVENT_MMAP;
682 break;
683 default:
684 ERR("Stream output unknown %d", channel->output);
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 /*
690 * We've just assigned the channel to the stream so increment the
691 * refcount right now. We don't need to increment the refcount for
692 * streams in no monitor because we handle manually the cleanup of
693 * those. It is very important to make sure there is NO prior
694 * consumer_del_stream() calls or else the refcount will be unbalanced.
695 */
696 if (channel->monitor) {
697 uatomic_inc(&new_stream->chan->refcount);
698 }
699
700 /*
701 * The buffer flush is done on the session daemon side for the kernel
702 * so no need for the stream "hangup_flush_done" variable to be
703 * tracked. This is important for a kernel stream since we don't rely
704 * on the flush state of the stream to read data. It's not the case for
705 * user space tracing.
706 */
707 new_stream->hangup_flush_done = 0;
708
709 health_code_update();
710
711 pthread_mutex_lock(&new_stream->lock);
712 if (ctx->on_recv_stream) {
713 ret = ctx->on_recv_stream(new_stream);
714 if (ret < 0) {
715 pthread_mutex_unlock(&new_stream->lock);
716 pthread_mutex_unlock(&channel->lock);
717 consumer_stream_free(new_stream);
718 goto error_add_stream_nosignal;
719 }
720 }
721 health_code_update();
722
723 if (new_stream->metadata_flag) {
724 channel->metadata_stream = new_stream;
725 }
726
727 /* Do not monitor this stream. */
728 if (!channel->monitor) {
729 DBG("Kernel consumer add stream %s in no monitor mode with "
730 "relayd id %" PRIu64, new_stream->name,
731 new_stream->net_seq_idx);
732 cds_list_add(&new_stream->send_node, &channel->streams.head);
733 pthread_mutex_unlock(&new_stream->lock);
734 pthread_mutex_unlock(&channel->lock);
735 goto end_add_stream;
736 }
737
738 /* Send stream to relayd if the stream has an ID. */
739 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
740 ret = consumer_send_relayd_stream(new_stream,
741 new_stream->chan->pathname);
742 if (ret < 0) {
743 pthread_mutex_unlock(&new_stream->lock);
744 pthread_mutex_unlock(&channel->lock);
745 consumer_stream_free(new_stream);
746 goto error_add_stream_nosignal;
747 }
748
749 /*
750 * If adding an extra stream to an already
751 * existing channel (e.g. cpu hotplug), we need
752 * to send the "streams_sent" command to relayd.
753 */
754 if (channel->streams_sent_to_relayd) {
755 ret = consumer_send_relayd_streams_sent(
756 new_stream->net_seq_idx);
757 if (ret < 0) {
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto error_add_stream_nosignal;
761 }
762 }
763 }
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766
767 /* Get the right pipe where the stream will be sent. */
768 if (new_stream->metadata_flag) {
769 consumer_add_metadata_stream(new_stream);
770 stream_pipe = ctx->consumer_metadata_pipe;
771 } else {
772 consumer_add_data_stream(new_stream);
773 stream_pipe = ctx->consumer_data_pipe;
774 }
775
776 /* Visible to other threads */
777 new_stream->globally_visible = 1;
778
779 health_code_update();
780
781 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
782 if (ret < 0) {
783 ERR("Consumer write %s stream to pipe %d",
784 new_stream->metadata_flag ? "metadata" : "data",
785 lttng_pipe_get_writefd(stream_pipe));
786 if (new_stream->metadata_flag) {
787 consumer_del_stream_for_metadata(new_stream);
788 } else {
789 consumer_del_stream_for_data(new_stream);
790 }
791 goto error_add_stream_nosignal;
792 }
793
794 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
795 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
796 end_add_stream:
797 break;
798 error_add_stream_nosignal:
799 goto end_nosignal;
800 error_add_stream_fatal:
801 goto error_fatal;
802 }
803 case LTTNG_CONSUMER_STREAMS_SENT:
804 {
805 struct lttng_consumer_channel *channel;
806
807 /*
808 * Get stream's channel reference. Needed when adding the stream to the
809 * global hash table.
810 */
811 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
812 if (!channel) {
813 /*
814 * We could not find the channel. Can happen if cpu hotplug
815 * happens while tearing down.
816 */
817 ERR("Unable to find channel key %" PRIu64,
818 msg.u.sent_streams.channel_key);
819 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
820 }
821
822 health_code_update();
823
824 /*
825 * Send status code to session daemon.
826 */
827 ret = consumer_send_status_msg(sock, ret_code);
828 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
829 /* Somehow, the session daemon is not responding anymore. */
830 goto error_streams_sent_nosignal;
831 }
832
833 health_code_update();
834
835 /*
836 * We should not send this message if we don't monitor the
837 * streams in this channel.
838 */
839 if (!channel->monitor) {
840 goto end_error_streams_sent;
841 }
842
843 health_code_update();
844 /* Send stream to relayd if the stream has an ID. */
845 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
846 ret = consumer_send_relayd_streams_sent(
847 msg.u.sent_streams.net_seq_idx);
848 if (ret < 0) {
849 goto error_streams_sent_nosignal;
850 }
851 channel->streams_sent_to_relayd = true;
852 }
853 end_error_streams_sent:
854 break;
855 error_streams_sent_nosignal:
856 goto end_nosignal;
857 }
858 case LTTNG_CONSUMER_UPDATE_STREAM:
859 {
860 rcu_read_unlock();
861 return -ENOSYS;
862 }
863 case LTTNG_CONSUMER_DESTROY_RELAYD:
864 {
865 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
866 struct consumer_relayd_sock_pair *relayd;
867
868 DBG("Kernel consumer destroying relayd %" PRIu64, index);
869
870 /* Get relayd reference if exists. */
871 relayd = consumer_find_relayd(index);
872 if (relayd == NULL) {
873 DBG("Unable to find relayd %" PRIu64, index);
874 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
875 }
876
877 /*
878 * Each relayd socket pair has a refcount of stream attached to it
879 * which tells if the relayd is still active or not depending on the
880 * refcount value.
881 *
882 * This will set the destroy flag of the relayd object and destroy it
883 * if the refcount reaches zero when called.
884 *
885 * The destroy can happen either here or when a stream fd hangs up.
886 */
887 if (relayd) {
888 consumer_flag_relayd_for_destroy(relayd);
889 }
890
891 health_code_update();
892
893 ret = consumer_send_status_msg(sock, ret_code);
894 if (ret < 0) {
895 /* Somehow, the session daemon is not responding anymore. */
896 goto error_fatal;
897 }
898
899 goto end_nosignal;
900 }
901 case LTTNG_CONSUMER_DATA_PENDING:
902 {
903 int32_t ret;
904 uint64_t id = msg.u.data_pending.session_id;
905
906 DBG("Kernel consumer data pending command for id %" PRIu64, id);
907
908 ret = consumer_data_pending(id);
909
910 health_code_update();
911
912 /* Send back returned value to session daemon */
913 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
914 if (ret < 0) {
915 PERROR("send data pending ret code");
916 goto error_fatal;
917 }
918
919 /*
920 * No need to send back a status message since the data pending
921 * returned value is the response.
922 */
923 break;
924 }
925 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
926 {
927 struct lttng_consumer_channel *channel;
928 uint64_t key = msg.u.snapshot_channel.key;
929
930 channel = consumer_find_channel(key);
931 if (!channel) {
932 ERR("Channel %" PRIu64 " not found", key);
933 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
934 } else {
935 pthread_mutex_lock(&channel->lock);
936 if (msg.u.snapshot_channel.metadata == 1) {
937 ret = lttng_kconsumer_snapshot_metadata(channel, key,
938 msg.u.snapshot_channel.pathname,
939 msg.u.snapshot_channel.relayd_id, ctx);
940 if (ret < 0) {
941 ERR("Snapshot metadata failed");
942 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
943 }
944 } else {
945 ret = lttng_kconsumer_snapshot_channel(channel, key,
946 msg.u.snapshot_channel.pathname,
947 msg.u.snapshot_channel.relayd_id,
948 msg.u.snapshot_channel.nb_packets_per_stream,
949 ctx);
950 if (ret < 0) {
951 ERR("Snapshot channel failed");
952 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
953 }
954 }
955 pthread_mutex_unlock(&channel->lock);
956 }
957 health_code_update();
958
959 ret = consumer_send_status_msg(sock, ret_code);
960 if (ret < 0) {
961 /* Somehow, the session daemon is not responding anymore. */
962 goto end_nosignal;
963 }
964 break;
965 }
966 case LTTNG_CONSUMER_DESTROY_CHANNEL:
967 {
968 uint64_t key = msg.u.destroy_channel.key;
969 struct lttng_consumer_channel *channel;
970
971 channel = consumer_find_channel(key);
972 if (!channel) {
973 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
974 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
975 }
976
977 health_code_update();
978
979 ret = consumer_send_status_msg(sock, ret_code);
980 if (ret < 0) {
981 /* Somehow, the session daemon is not responding anymore. */
982 goto end_nosignal;
983 }
984
985 health_code_update();
986
987 /* Stop right now if no channel was found. */
988 if (!channel) {
989 goto end_nosignal;
990 }
991
992 /*
993 * This command should ONLY be issued for channel with streams set in
994 * no monitor mode.
995 */
996 assert(!channel->monitor);
997
998 /*
999 * The refcount should ALWAYS be 0 in the case of a channel in no
1000 * monitor mode.
1001 */
1002 assert(!uatomic_sub_return(&channel->refcount, 1));
1003
1004 consumer_del_channel(channel);
1005
1006 goto end_nosignal;
1007 }
1008 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1009 {
1010 ssize_t ret;
1011 uint64_t count;
1012 struct lttng_consumer_channel *channel;
1013 uint64_t id = msg.u.discarded_events.session_id;
1014 uint64_t key = msg.u.discarded_events.channel_key;
1015
1016 DBG("Kernel consumer discarded events command for session id %"
1017 PRIu64 ", channel key %" PRIu64, id, key);
1018
1019 channel = consumer_find_channel(key);
1020 if (!channel) {
1021 ERR("Kernel consumer discarded events channel %"
1022 PRIu64 " not found", key);
1023 count = 0;
1024 } else {
1025 count = channel->discarded_events;
1026 }
1027
1028 health_code_update();
1029
1030 /* Send back returned value to session daemon */
1031 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1032 if (ret < 0) {
1033 PERROR("send discarded events");
1034 goto error_fatal;
1035 }
1036
1037 break;
1038 }
1039 case LTTNG_CONSUMER_LOST_PACKETS:
1040 {
1041 ssize_t ret;
1042 uint64_t count;
1043 struct lttng_consumer_channel *channel;
1044 uint64_t id = msg.u.lost_packets.session_id;
1045 uint64_t key = msg.u.lost_packets.channel_key;
1046
1047 DBG("Kernel consumer lost packets command for session id %"
1048 PRIu64 ", channel key %" PRIu64, id, key);
1049
1050 channel = consumer_find_channel(key);
1051 if (!channel) {
1052 ERR("Kernel consumer lost packets channel %"
1053 PRIu64 " not found", key);
1054 count = 0;
1055 } else {
1056 count = channel->lost_packets;
1057 }
1058
1059 health_code_update();
1060
1061 /* Send back returned value to session daemon */
1062 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1063 if (ret < 0) {
1064 PERROR("send lost packets");
1065 goto error_fatal;
1066 }
1067
1068 break;
1069 }
1070 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1071 {
1072 int channel_monitor_pipe;
1073
1074 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1075 /* Successfully received the command's type. */
1076 ret = consumer_send_status_msg(sock, ret_code);
1077 if (ret < 0) {
1078 goto error_fatal;
1079 }
1080
1081 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1082 1);
1083 if (ret != sizeof(channel_monitor_pipe)) {
1084 ERR("Failed to receive channel monitor pipe");
1085 goto error_fatal;
1086 }
1087
1088 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1089 ret = consumer_timer_thread_set_channel_monitor_pipe(
1090 channel_monitor_pipe);
1091 if (!ret) {
1092 int flags;
1093
1094 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1095 /* Set the pipe as non-blocking. */
1096 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1097 if (ret == -1) {
1098 PERROR("fcntl get flags of the channel monitoring pipe");
1099 goto error_fatal;
1100 }
1101 flags = ret;
1102
1103 ret = fcntl(channel_monitor_pipe, F_SETFL,
1104 flags | O_NONBLOCK);
1105 if (ret == -1) {
1106 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1107 goto error_fatal;
1108 }
1109 DBG("Channel monitor pipe set as non-blocking");
1110 } else {
1111 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1112 }
1113 ret = consumer_send_status_msg(sock, ret_code);
1114 if (ret < 0) {
1115 goto error_fatal;
1116 }
1117 break;
1118 }
1119 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1120 {
1121 struct lttng_consumer_channel *channel;
1122 uint64_t key = msg.u.rotate_channel.key;
1123
1124 DBG("Consumer rotate channel %" PRIu64, key);
1125
1126 channel = consumer_find_channel(key);
1127 if (!channel) {
1128 ERR("Channel %" PRIu64 " not found", key);
1129 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1130 } else {
1131 /*
1132 * Sample the rotate position of all the streams in this channel.
1133 */
1134 ret = lttng_consumer_rotate_channel(channel, key,
1135 msg.u.rotate_channel.relayd_id,
1136 msg.u.rotate_channel.metadata,
1137 ctx);
1138 if (ret < 0) {
1139 ERR("Rotate channel failed");
1140 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1141 }
1142
1143 health_code_update();
1144 }
1145 ret = consumer_send_status_msg(sock, ret_code);
1146 if (ret < 0) {
1147 /* Somehow, the session daemon is not responding anymore. */
1148 goto end_nosignal;
1149 }
1150 if (channel) {
1151 /* Rotate the streams that are ready right now. */
1152 ret = lttng_consumer_rotate_ready_streams(
1153 channel, key, ctx);
1154 if (ret < 0) {
1155 ERR("Rotate ready streams failed");
1156 }
1157 }
1158
1159 break;
1160 }
1161 case LTTNG_CONSUMER_INIT:
1162 {
1163 ret_code = lttng_consumer_init_command(ctx,
1164 msg.u.init.sessiond_uuid);
1165 health_code_update();
1166 ret = consumer_send_status_msg(sock, ret_code);
1167 if (ret < 0) {
1168 /* Somehow, the session daemon is not responding anymore. */
1169 goto end_nosignal;
1170 }
1171 break;
1172 }
1173 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1174 {
1175 const struct lttng_credentials credentials = {
1176 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1177 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1178 };
1179 const bool is_local_trace =
1180 !msg.u.create_trace_chunk.relayd_id.is_set;
1181 const uint64_t relayd_id =
1182 msg.u.create_trace_chunk.relayd_id.value;
1183 const char *chunk_override_name =
1184 *msg.u.create_trace_chunk.override_name ?
1185 msg.u.create_trace_chunk.override_name :
1186 NULL;
1187 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1188 LTTNG_OPTIONAL_INIT;
1189
1190 /*
1191 * The session daemon will only provide a chunk directory file
1192 * descriptor for local traces.
1193 */
1194 if (is_local_trace) {
1195 int chunk_dirfd;
1196
1197 /* Acnowledge the reception of the command. */
1198 ret = consumer_send_status_msg(sock,
1199 LTTCOMM_CONSUMERD_SUCCESS);
1200 if (ret < 0) {
1201 /* Somehow, the session daemon is not responding anymore. */
1202 goto end_nosignal;
1203 }
1204
1205 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1206 if (ret != sizeof(chunk_dirfd)) {
1207 ERR("Failed to receive trace chunk directory file descriptor");
1208 goto error_fatal;
1209 }
1210
1211 DBG("Received trace chunk directory fd (%d)",
1212 chunk_dirfd);
1213 ret = lttng_directory_handle_init_from_dirfd(
1214 &chunk_directory_handle.value,
1215 chunk_dirfd);
1216 if (ret) {
1217 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1218 if (close(chunk_dirfd)) {
1219 PERROR("Failed to close chunk directory file descriptor");
1220 }
1221 goto error_fatal;
1222 }
1223 chunk_directory_handle.is_set = true;
1224 }
1225
1226 ret_code = lttng_consumer_create_trace_chunk(
1227 !is_local_trace ? &relayd_id : NULL,
1228 msg.u.create_trace_chunk.session_id,
1229 msg.u.create_trace_chunk.chunk_id,
1230 (time_t) msg.u.create_trace_chunk
1231 .creation_timestamp,
1232 chunk_override_name,
1233 msg.u.create_trace_chunk.credentials.is_set ?
1234 &credentials :
1235 NULL,
1236 chunk_directory_handle.is_set ?
1237 &chunk_directory_handle.value :
1238 NULL);
1239
1240 if (chunk_directory_handle.is_set) {
1241 lttng_directory_handle_fini(
1242 &chunk_directory_handle.value);
1243 }
1244 goto end_msg_sessiond;
1245 }
1246 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1247 {
1248 enum lttng_trace_chunk_command_type close_command =
1249 msg.u.close_trace_chunk.close_command.value;
1250 const uint64_t relayd_id =
1251 msg.u.close_trace_chunk.relayd_id.value;
1252
1253 ret_code = lttng_consumer_close_trace_chunk(
1254 msg.u.close_trace_chunk.relayd_id.is_set ?
1255 &relayd_id :
1256 NULL,
1257 msg.u.close_trace_chunk.session_id,
1258 msg.u.close_trace_chunk.chunk_id,
1259 (time_t) msg.u.close_trace_chunk.close_timestamp,
1260 msg.u.close_trace_chunk.close_command.is_set ?
1261 &close_command :
1262 NULL);
1263 goto end_msg_sessiond;
1264 }
1265 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1266 {
1267 const uint64_t relayd_id =
1268 msg.u.trace_chunk_exists.relayd_id.value;
1269
1270 ret_code = lttng_consumer_trace_chunk_exists(
1271 msg.u.trace_chunk_exists.relayd_id.is_set ?
1272 &relayd_id : NULL,
1273 msg.u.trace_chunk_exists.session_id,
1274 msg.u.trace_chunk_exists.chunk_id);
1275 goto end_msg_sessiond;
1276 }
1277 default:
1278 goto end_nosignal;
1279 }
1280
1281 end_nosignal:
1282 /*
1283 * Return 1 to indicate success since the 0 value can be a socket
1284 * shutdown during the recv() or send() call.
1285 */
1286 ret = 1;
1287 goto end;
1288 error_fatal:
1289 /* This will issue a consumer stop. */
1290 ret = -1;
1291 goto end;
1292 end_msg_sessiond:
1293 /*
1294 * The returned value here is not useful since either way we'll return 1 to
1295 * the caller because the session daemon socket management is done
1296 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1297 */
1298 ret = consumer_send_status_msg(sock, ret_code);
1299 if (ret < 0) {
1300 goto error_fatal;
1301 }
1302 ret = 1;
1303 end:
1304 health_code_update();
1305 rcu_read_unlock();
1306 return ret;
1307 }
1308
1309 /*
1310 * Populate index values of a kernel stream. Values are set in big endian order.
1311 *
1312 * Return 0 on success or else a negative value.
1313 */
1314 static int get_index_values(struct ctf_packet_index *index, int infd)
1315 {
1316 int ret;
1317
1318 ret = kernctl_get_timestamp_begin(infd, &index->timestamp_begin);
1319 if (ret < 0) {
1320 PERROR("kernctl_get_timestamp_begin");
1321 goto error;
1322 }
1323 index->timestamp_begin = htobe64(index->timestamp_begin);
1324
1325 ret = kernctl_get_timestamp_end(infd, &index->timestamp_end);
1326 if (ret < 0) {
1327 PERROR("kernctl_get_timestamp_end");
1328 goto error;
1329 }
1330 index->timestamp_end = htobe64(index->timestamp_end);
1331
1332 ret = kernctl_get_events_discarded(infd, &index->events_discarded);
1333 if (ret < 0) {
1334 PERROR("kernctl_get_events_discarded");
1335 goto error;
1336 }
1337 index->events_discarded = htobe64(index->events_discarded);
1338
1339 ret = kernctl_get_content_size(infd, &index->content_size);
1340 if (ret < 0) {
1341 PERROR("kernctl_get_content_size");
1342 goto error;
1343 }
1344 index->content_size = htobe64(index->content_size);
1345
1346 ret = kernctl_get_packet_size(infd, &index->packet_size);
1347 if (ret < 0) {
1348 PERROR("kernctl_get_packet_size");
1349 goto error;
1350 }
1351 index->packet_size = htobe64(index->packet_size);
1352
1353 ret = kernctl_get_stream_id(infd, &index->stream_id);
1354 if (ret < 0) {
1355 PERROR("kernctl_get_stream_id");
1356 goto error;
1357 }
1358 index->stream_id = htobe64(index->stream_id);
1359
1360 ret = kernctl_get_instance_id(infd, &index->stream_instance_id);
1361 if (ret < 0) {
1362 if (ret == -ENOTTY) {
1363 /* Command not implemented by lttng-modules. */
1364 index->stream_instance_id = -1ULL;
1365 } else {
1366 PERROR("kernctl_get_instance_id");
1367 goto error;
1368 }
1369 }
1370 index->stream_instance_id = htobe64(index->stream_instance_id);
1371
1372 ret = kernctl_get_sequence_number(infd, &index->packet_seq_num);
1373 if (ret < 0) {
1374 if (ret == -ENOTTY) {
1375 /* Command not implemented by lttng-modules. */
1376 index->packet_seq_num = -1ULL;
1377 ret = 0;
1378 } else {
1379 PERROR("kernctl_get_sequence_number");
1380 goto error;
1381 }
1382 }
1383 index->packet_seq_num = htobe64(index->packet_seq_num);
1384
1385 error:
1386 return ret;
1387 }
1388 /*
1389 * Sync metadata meaning request them to the session daemon and snapshot to the
1390 * metadata thread can consumer them.
1391 *
1392 * Metadata stream lock MUST be acquired.
1393 *
1394 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1395 * is empty or a negative value on error.
1396 */
1397 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1398 {
1399 int ret;
1400
1401 assert(metadata);
1402
1403 ret = kernctl_buffer_flush(metadata->wait_fd);
1404 if (ret < 0) {
1405 ERR("Failed to flush kernel stream");
1406 goto end;
1407 }
1408
1409 ret = kernctl_snapshot(metadata->wait_fd);
1410 if (ret < 0) {
1411 if (ret != -EAGAIN) {
1412 ERR("Sync metadata, taking kernel snapshot failed.");
1413 goto end;
1414 }
1415 DBG("Sync metadata, no new kernel metadata");
1416 /* No new metadata, exit. */
1417 ret = ENODATA;
1418 goto end;
1419 }
1420
1421 end:
1422 return ret;
1423 }
1424
1425 static
1426 int update_stream_stats(struct lttng_consumer_stream *stream)
1427 {
1428 int ret;
1429 uint64_t seq, discarded;
1430
1431 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1432 if (ret < 0) {
1433 if (ret == -ENOTTY) {
1434 /* Command not implemented by lttng-modules. */
1435 seq = -1ULL;
1436 } else {
1437 PERROR("kernctl_get_sequence_number");
1438 goto end;
1439 }
1440 }
1441
1442 /*
1443 * Start the sequence when we extract the first packet in case we don't
1444 * start at 0 (for example if a consumer is not connected to the
1445 * session immediately after the beginning).
1446 */
1447 if (stream->last_sequence_number == -1ULL) {
1448 stream->last_sequence_number = seq;
1449 } else if (seq > stream->last_sequence_number) {
1450 stream->chan->lost_packets += seq -
1451 stream->last_sequence_number - 1;
1452 } else {
1453 /* seq <= last_sequence_number */
1454 ERR("Sequence number inconsistent : prev = %" PRIu64
1455 ", current = %" PRIu64,
1456 stream->last_sequence_number, seq);
1457 ret = -1;
1458 goto end;
1459 }
1460 stream->last_sequence_number = seq;
1461
1462 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1463 if (ret < 0) {
1464 PERROR("kernctl_get_events_discarded");
1465 goto end;
1466 }
1467 if (discarded < stream->last_discarded_events) {
1468 /*
1469 * Overflow has occurred. We assume only one wrap-around
1470 * has occurred.
1471 */
1472 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1473 stream->last_discarded_events + discarded;
1474 } else {
1475 stream->chan->discarded_events += discarded -
1476 stream->last_discarded_events;
1477 }
1478 stream->last_discarded_events = discarded;
1479 ret = 0;
1480
1481 end:
1482 return ret;
1483 }
1484
1485 /*
1486 * Check if the local version of the metadata stream matches with the version
1487 * of the metadata stream in the kernel. If it was updated, set the reset flag
1488 * on the stream.
1489 */
1490 static
1491 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1492 {
1493 int ret;
1494 uint64_t cur_version;
1495
1496 ret = kernctl_get_metadata_version(infd, &cur_version);
1497 if (ret < 0) {
1498 if (ret == -ENOTTY) {
1499 /*
1500 * LTTng-modules does not implement this
1501 * command.
1502 */
1503 ret = 0;
1504 goto end;
1505 }
1506 ERR("Failed to get the metadata version");
1507 goto end;
1508 }
1509
1510 if (stream->metadata_version == cur_version) {
1511 ret = 0;
1512 goto end;
1513 }
1514
1515 DBG("New metadata version detected");
1516 stream->metadata_version = cur_version;
1517 stream->reset_metadata_flag = 1;
1518 ret = 0;
1519
1520 end:
1521 return ret;
1522 }
1523
1524 /*
1525 * Consume data on a file descriptor and write it on a trace file.
1526 * The stream and channel locks must be held by the caller.
1527 */
1528 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1529 struct lttng_consumer_local_data *ctx)
1530 {
1531 unsigned long len, subbuf_size, padding;
1532 int err, write_index = 1, rotation_ret;
1533 ssize_t ret = 0;
1534 int infd = stream->wait_fd;
1535 struct ctf_packet_index index;
1536
1537 DBG("In read_subbuffer (infd : %d)", infd);
1538
1539 /*
1540 * If the stream was flagged to be ready for rotation before we extract the
1541 * next packet, rotate it now.
1542 */
1543 if (stream->rotate_ready) {
1544 DBG("Rotate stream before extracting data");
1545 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1546 if (rotation_ret < 0) {
1547 ERR("Stream rotation error");
1548 ret = -1;
1549 goto error;
1550 }
1551 }
1552
1553 /* Get the next subbuffer */
1554 err = kernctl_get_next_subbuf(infd);
1555 if (err != 0) {
1556 /*
1557 * This is a debug message even for single-threaded consumer,
1558 * because poll() have more relaxed criterions than get subbuf,
1559 * so get_subbuf may fail for short race windows where poll()
1560 * would issue wakeups.
1561 */
1562 DBG("Reserving sub buffer failed (everything is normal, "
1563 "it is due to concurrency)");
1564 ret = err;
1565 goto error;
1566 }
1567
1568 /* Get the full subbuffer size including padding */
1569 err = kernctl_get_padded_subbuf_size(infd, &len);
1570 if (err != 0) {
1571 PERROR("Getting sub-buffer len failed.");
1572 err = kernctl_put_subbuf(infd);
1573 if (err != 0) {
1574 if (err == -EFAULT) {
1575 PERROR("Error in unreserving sub buffer\n");
1576 } else if (err == -EIO) {
1577 /* Should never happen with newer LTTng versions */
1578 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1579 }
1580 ret = err;
1581 goto error;
1582 }
1583 ret = err;
1584 goto error;
1585 }
1586
1587 if (!stream->metadata_flag) {
1588 ret = get_index_values(&index, infd);
1589 if (ret < 0) {
1590 err = kernctl_put_subbuf(infd);
1591 if (err != 0) {
1592 if (err == -EFAULT) {
1593 PERROR("Error in unreserving sub buffer\n");
1594 } else if (err == -EIO) {
1595 /* Should never happen with newer LTTng versions */
1596 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1597 }
1598 ret = err;
1599 goto error;
1600 }
1601 goto error;
1602 }
1603 ret = update_stream_stats(stream);
1604 if (ret < 0) {
1605 err = kernctl_put_subbuf(infd);
1606 if (err != 0) {
1607 if (err == -EFAULT) {
1608 PERROR("Error in unreserving sub buffer\n");
1609 } else if (err == -EIO) {
1610 /* Should never happen with newer LTTng versions */
1611 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1612 }
1613 ret = err;
1614 goto error;
1615 }
1616 goto error;
1617 }
1618 } else {
1619 write_index = 0;
1620 ret = metadata_stream_check_version(infd, stream);
1621 if (ret < 0) {
1622 err = kernctl_put_subbuf(infd);
1623 if (err != 0) {
1624 if (err == -EFAULT) {
1625 PERROR("Error in unreserving sub buffer\n");
1626 } else if (err == -EIO) {
1627 /* Should never happen with newer LTTng versions */
1628 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1629 }
1630 ret = err;
1631 goto error;
1632 }
1633 goto error;
1634 }
1635 }
1636
1637 switch (stream->chan->output) {
1638 case CONSUMER_CHANNEL_SPLICE:
1639 /*
1640 * XXX: The lttng-modules splice "actor" does not handle copying
1641 * partial pages hence only using the subbuffer size without the
1642 * padding makes the splice fail.
1643 */
1644 subbuf_size = len;
1645 padding = 0;
1646
1647 /* splice the subbuffer to the tracefile */
1648 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1649 padding, &index);
1650 /*
1651 * XXX: Splice does not support network streaming so the return value
1652 * is simply checked against subbuf_size and not like the mmap() op.
1653 */
1654 if (ret != subbuf_size) {
1655 /*
1656 * display the error but continue processing to try
1657 * to release the subbuffer
1658 */
1659 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1660 ret, subbuf_size);
1661 write_index = 0;
1662 }
1663 break;
1664 case CONSUMER_CHANNEL_MMAP:
1665 /* Get subbuffer size without padding */
1666 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1667 if (err != 0) {
1668 PERROR("Getting sub-buffer len failed.");
1669 err = kernctl_put_subbuf(infd);
1670 if (err != 0) {
1671 if (err == -EFAULT) {
1672 PERROR("Error in unreserving sub buffer\n");
1673 } else if (err == -EIO) {
1674 /* Should never happen with newer LTTng versions */
1675 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1676 }
1677 ret = err;
1678 goto error;
1679 }
1680 ret = err;
1681 goto error;
1682 }
1683
1684 /* Make sure the tracer is not gone mad on us! */
1685 assert(len >= subbuf_size);
1686
1687 padding = len - subbuf_size;
1688
1689 /* write the subbuffer to the tracefile */
1690 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1691 padding, &index);
1692 /*
1693 * The mmap operation should write subbuf_size amount of data when
1694 * network streaming or the full padding (len) size when we are _not_
1695 * streaming.
1696 */
1697 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1698 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1699 /*
1700 * Display the error but continue processing to try to release the
1701 * subbuffer. This is a DBG statement since this is possible to
1702 * happen without being a critical error.
1703 */
1704 DBG("Error writing to tracefile "
1705 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1706 ret, len, subbuf_size);
1707 write_index = 0;
1708 }
1709 break;
1710 default:
1711 ERR("Unknown output method");
1712 ret = -EPERM;
1713 }
1714
1715 err = kernctl_put_next_subbuf(infd);
1716 if (err != 0) {
1717 if (err == -EFAULT) {
1718 PERROR("Error in unreserving sub buffer\n");
1719 } else if (err == -EIO) {
1720 /* Should never happen with newer LTTng versions */
1721 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1722 }
1723 ret = err;
1724 goto error;
1725 }
1726
1727 /* Write index if needed. */
1728 if (!write_index) {
1729 goto rotate;
1730 }
1731
1732 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1733 /*
1734 * In live, block until all the metadata is sent.
1735 */
1736 pthread_mutex_lock(&stream->metadata_timer_lock);
1737 assert(!stream->missed_metadata_flush);
1738 stream->waiting_on_metadata = true;
1739 pthread_mutex_unlock(&stream->metadata_timer_lock);
1740
1741 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1742
1743 pthread_mutex_lock(&stream->metadata_timer_lock);
1744 stream->waiting_on_metadata = false;
1745 if (stream->missed_metadata_flush) {
1746 stream->missed_metadata_flush = false;
1747 pthread_mutex_unlock(&stream->metadata_timer_lock);
1748 (void) consumer_flush_kernel_index(stream);
1749 } else {
1750 pthread_mutex_unlock(&stream->metadata_timer_lock);
1751 }
1752 if (err < 0) {
1753 goto error;
1754 }
1755 }
1756
1757 err = consumer_stream_write_index(stream, &index);
1758 if (err < 0) {
1759 goto error;
1760 }
1761
1762 rotate:
1763 /*
1764 * After extracting the packet, we check if the stream is now ready to be
1765 * rotated and perform the action immediately.
1766 */
1767 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1768 if (rotation_ret == 1) {
1769 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1770 if (rotation_ret < 0) {
1771 ERR("Stream rotation error");
1772 ret = -1;
1773 goto error;
1774 }
1775 } else if (rotation_ret < 0) {
1776 ERR("Checking if stream is ready to rotate");
1777 ret = -1;
1778 goto error;
1779 }
1780
1781 error:
1782 return ret;
1783 }
1784
1785 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1786 {
1787 int ret;
1788
1789 assert(stream);
1790
1791 /*
1792 * Don't create anything if this is set for streaming or if there is
1793 * no current trace chunk on the parent channel.
1794 */
1795 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1796 stream->chan->trace_chunk) {
1797 ret = consumer_stream_create_output_files(stream, true);
1798 if (ret) {
1799 goto error;
1800 }
1801 }
1802
1803 if (stream->output == LTTNG_EVENT_MMAP) {
1804 /* get the len of the mmap region */
1805 unsigned long mmap_len;
1806
1807 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1808 if (ret != 0) {
1809 PERROR("kernctl_get_mmap_len");
1810 goto error_close_fd;
1811 }
1812 stream->mmap_len = (size_t) mmap_len;
1813
1814 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1815 MAP_PRIVATE, stream->wait_fd, 0);
1816 if (stream->mmap_base == MAP_FAILED) {
1817 PERROR("Error mmaping");
1818 ret = -1;
1819 goto error_close_fd;
1820 }
1821 }
1822
1823 /* we return 0 to let the library handle the FD internally */
1824 return 0;
1825
1826 error_close_fd:
1827 if (stream->out_fd >= 0) {
1828 int err;
1829
1830 err = close(stream->out_fd);
1831 assert(!err);
1832 stream->out_fd = -1;
1833 }
1834 error:
1835 return ret;
1836 }
1837
1838 /*
1839 * Check if data is still being extracted from the buffers for a specific
1840 * stream. Consumer data lock MUST be acquired before calling this function
1841 * and the stream lock.
1842 *
1843 * Return 1 if the traced data are still getting read else 0 meaning that the
1844 * data is available for trace viewer reading.
1845 */
1846 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1847 {
1848 int ret;
1849
1850 assert(stream);
1851
1852 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1853 ret = 0;
1854 goto end;
1855 }
1856
1857 ret = kernctl_get_next_subbuf(stream->wait_fd);
1858 if (ret == 0) {
1859 /* There is still data so let's put back this subbuffer. */
1860 ret = kernctl_put_subbuf(stream->wait_fd);
1861 assert(ret == 0);
1862 ret = 1; /* Data is pending */
1863 goto end;
1864 }
1865
1866 /* Data is NOT pending and ready to be read. */
1867 ret = 0;
1868
1869 end:
1870 return ret;
1871 }
This page took 0.068124 seconds and 3 git commands to generate.