f9278f75d580d366122ee0e6b2a3216ca5c46895
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_add_stream_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto error_add_stream_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_add_stream_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 goto end;
619 }
620
621 health_code_update();
622
623 /*
624 * Send status code to session daemon only if the recv works. If the
625 * above recv() failed, the session daemon is notified through the
626 * error socket and the teardown is eventually done.
627 */
628 ret = consumer_send_status_msg(sock, ret_code);
629 if (ret < 0) {
630 /* Somehow, the session daemon is not responding anymore. */
631 goto error_add_stream_nosignal;
632 }
633
634 health_code_update();
635
636 pthread_mutex_lock(&channel->lock);
637 new_stream = consumer_allocate_stream(channel->key,
638 fd,
639 channel->name,
640 channel->relayd_id,
641 channel->session_id,
642 channel->trace_chunk,
643 msg.u.stream.cpu,
644 &alloc_ret,
645 channel->type,
646 channel->monitor);
647 if (new_stream == NULL) {
648 switch (alloc_ret) {
649 case -ENOMEM:
650 case -EINVAL:
651 default:
652 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
653 break;
654 }
655 pthread_mutex_unlock(&channel->lock);
656 goto error_add_stream_nosignal;
657 }
658
659 new_stream->chan = channel;
660 new_stream->wait_fd = fd;
661 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
662 &new_stream->max_sb_size);
663 if (ret < 0) {
664 pthread_mutex_unlock(&channel->lock);
665 ERR("Failed to get kernel maximal subbuffer size");
666 goto error_add_stream_nosignal;
667 }
668
669 consumer_stream_update_channel_attributes(new_stream,
670 channel);
671 switch (channel->output) {
672 case CONSUMER_CHANNEL_SPLICE:
673 new_stream->output = LTTNG_EVENT_SPLICE;
674 ret = utils_create_pipe(new_stream->splice_pipe);
675 if (ret < 0) {
676 pthread_mutex_unlock(&channel->lock);
677 goto error_add_stream_nosignal;
678 }
679 break;
680 case CONSUMER_CHANNEL_MMAP:
681 new_stream->output = LTTNG_EVENT_MMAP;
682 break;
683 default:
684 ERR("Stream output unknown %d", channel->output);
685 pthread_mutex_unlock(&channel->lock);
686 goto error_add_stream_nosignal;
687 }
688
689 /*
690 * We've just assigned the channel to the stream so increment the
691 * refcount right now. We don't need to increment the refcount for
692 * streams in no monitor because we handle manually the cleanup of
693 * those. It is very important to make sure there is NO prior
694 * consumer_del_stream() calls or else the refcount will be unbalanced.
695 */
696 if (channel->monitor) {
697 uatomic_inc(&new_stream->chan->refcount);
698 }
699
700 /*
701 * The buffer flush is done on the session daemon side for the kernel
702 * so no need for the stream "hangup_flush_done" variable to be
703 * tracked. This is important for a kernel stream since we don't rely
704 * on the flush state of the stream to read data. It's not the case for
705 * user space tracing.
706 */
707 new_stream->hangup_flush_done = 0;
708
709 health_code_update();
710
711 pthread_mutex_lock(&new_stream->lock);
712 if (ctx->on_recv_stream) {
713 ret = ctx->on_recv_stream(new_stream);
714 if (ret < 0) {
715 pthread_mutex_unlock(&new_stream->lock);
716 pthread_mutex_unlock(&channel->lock);
717 consumer_stream_free(new_stream);
718 goto error_add_stream_nosignal;
719 }
720 }
721 health_code_update();
722
723 if (new_stream->metadata_flag) {
724 channel->metadata_stream = new_stream;
725 }
726
727 /* Do not monitor this stream. */
728 if (!channel->monitor) {
729 DBG("Kernel consumer add stream %s in no monitor mode with "
730 "relayd id %" PRIu64, new_stream->name,
731 new_stream->net_seq_idx);
732 cds_list_add(&new_stream->send_node, &channel->streams.head);
733 pthread_mutex_unlock(&new_stream->lock);
734 pthread_mutex_unlock(&channel->lock);
735 goto end_add_stream;
736 }
737
738 /* Send stream to relayd if the stream has an ID. */
739 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
740 ret = consumer_send_relayd_stream(new_stream,
741 new_stream->chan->pathname);
742 if (ret < 0) {
743 pthread_mutex_unlock(&new_stream->lock);
744 pthread_mutex_unlock(&channel->lock);
745 consumer_stream_free(new_stream);
746 goto error_add_stream_nosignal;
747 }
748
749 /*
750 * If adding an extra stream to an already
751 * existing channel (e.g. cpu hotplug), we need
752 * to send the "streams_sent" command to relayd.
753 */
754 if (channel->streams_sent_to_relayd) {
755 ret = consumer_send_relayd_streams_sent(
756 new_stream->net_seq_idx);
757 if (ret < 0) {
758 pthread_mutex_unlock(&new_stream->lock);
759 pthread_mutex_unlock(&channel->lock);
760 goto error_add_stream_nosignal;
761 }
762 }
763 }
764 pthread_mutex_unlock(&new_stream->lock);
765 pthread_mutex_unlock(&channel->lock);
766
767 /* Get the right pipe where the stream will be sent. */
768 if (new_stream->metadata_flag) {
769 consumer_add_metadata_stream(new_stream);
770 stream_pipe = ctx->consumer_metadata_pipe;
771 } else {
772 consumer_add_data_stream(new_stream);
773 stream_pipe = ctx->consumer_data_pipe;
774 }
775
776 /* Visible to other threads */
777 new_stream->globally_visible = 1;
778
779 health_code_update();
780
781 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
782 if (ret < 0) {
783 ERR("Consumer write %s stream to pipe %d",
784 new_stream->metadata_flag ? "metadata" : "data",
785 lttng_pipe_get_writefd(stream_pipe));
786 if (new_stream->metadata_flag) {
787 consumer_del_stream_for_metadata(new_stream);
788 } else {
789 consumer_del_stream_for_data(new_stream);
790 }
791 goto error_add_stream_nosignal;
792 }
793
794 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
795 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
796 end_add_stream:
797 break;
798 error_add_stream_nosignal:
799 goto end_nosignal;
800 error_add_stream_fatal:
801 goto error_fatal;
802 }
803 case LTTNG_CONSUMER_STREAMS_SENT:
804 {
805 struct lttng_consumer_channel *channel;
806
807 /*
808 * Get stream's channel reference. Needed when adding the stream to the
809 * global hash table.
810 */
811 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
812 if (!channel) {
813 /*
814 * We could not find the channel. Can happen if cpu hotplug
815 * happens while tearing down.
816 */
817 ERR("Unable to find channel key %" PRIu64,
818 msg.u.sent_streams.channel_key);
819 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
820 }
821
822 health_code_update();
823
824 /*
825 * Send status code to session daemon.
826 */
827 ret = consumer_send_status_msg(sock, ret_code);
828 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
829 /* Somehow, the session daemon is not responding anymore. */
830 goto error_streams_sent_nosignal;
831 }
832
833 health_code_update();
834
835 /*
836 * We should not send this message if we don't monitor the
837 * streams in this channel.
838 */
839 if (!channel->monitor) {
840 goto end_error_streams_sent;
841 }
842
843 health_code_update();
844 /* Send stream to relayd if the stream has an ID. */
845 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
846 ret = consumer_send_relayd_streams_sent(
847 msg.u.sent_streams.net_seq_idx);
848 if (ret < 0) {
849 goto error_streams_sent_nosignal;
850 }
851 channel->streams_sent_to_relayd = true;
852 }
853 end_error_streams_sent:
854 break;
855 error_streams_sent_nosignal:
856 goto end_nosignal;
857 }
858 case LTTNG_CONSUMER_UPDATE_STREAM:
859 {
860 rcu_read_unlock();
861 return -ENOSYS;
862 }
863 case LTTNG_CONSUMER_DESTROY_RELAYD:
864 {
865 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
866 struct consumer_relayd_sock_pair *relayd;
867
868 DBG("Kernel consumer destroying relayd %" PRIu64, index);
869
870 /* Get relayd reference if exists. */
871 relayd = consumer_find_relayd(index);
872 if (relayd == NULL) {
873 DBG("Unable to find relayd %" PRIu64, index);
874 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
875 }
876
877 /*
878 * Each relayd socket pair has a refcount of stream attached to it
879 * which tells if the relayd is still active or not depending on the
880 * refcount value.
881 *
882 * This will set the destroy flag of the relayd object and destroy it
883 * if the refcount reaches zero when called.
884 *
885 * The destroy can happen either here or when a stream fd hangs up.
886 */
887 if (relayd) {
888 consumer_flag_relayd_for_destroy(relayd);
889 }
890
891 health_code_update();
892
893 ret = consumer_send_status_msg(sock, ret_code);
894 if (ret < 0) {
895 /* Somehow, the session daemon is not responding anymore. */
896 goto error_fatal;
897 }
898
899 goto end_nosignal;
900 }
901 case LTTNG_CONSUMER_DATA_PENDING:
902 {
903 int32_t ret;
904 uint64_t id = msg.u.data_pending.session_id;
905
906 DBG("Kernel consumer data pending command for id %" PRIu64, id);
907
908 ret = consumer_data_pending(id);
909
910 health_code_update();
911
912 /* Send back returned value to session daemon */
913 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
914 if (ret < 0) {
915 PERROR("send data pending ret code");
916 goto error_fatal;
917 }
918
919 /*
920 * No need to send back a status message since the data pending
921 * returned value is the response.
922 */
923 break;
924 }
925 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
926 {
927 struct lttng_consumer_channel *channel;
928 uint64_t key = msg.u.snapshot_channel.key;
929
930 channel = consumer_find_channel(key);
931 if (!channel) {
932 ERR("Channel %" PRIu64 " not found", key);
933 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
934 } else {
935 pthread_mutex_lock(&channel->lock);
936 if (msg.u.snapshot_channel.metadata == 1) {
937 ret = lttng_kconsumer_snapshot_metadata(channel, key,
938 msg.u.snapshot_channel.pathname,
939 msg.u.snapshot_channel.relayd_id, ctx);
940 if (ret < 0) {
941 ERR("Snapshot metadata failed");
942 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
943 }
944 } else {
945 ret = lttng_kconsumer_snapshot_channel(channel, key,
946 msg.u.snapshot_channel.pathname,
947 msg.u.snapshot_channel.relayd_id,
948 msg.u.snapshot_channel.nb_packets_per_stream,
949 ctx);
950 if (ret < 0) {
951 ERR("Snapshot channel failed");
952 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
953 }
954 }
955 pthread_mutex_unlock(&channel->lock);
956 }
957 health_code_update();
958
959 ret = consumer_send_status_msg(sock, ret_code);
960 if (ret < 0) {
961 /* Somehow, the session daemon is not responding anymore. */
962 goto end_nosignal;
963 }
964 break;
965 }
966 case LTTNG_CONSUMER_DESTROY_CHANNEL:
967 {
968 uint64_t key = msg.u.destroy_channel.key;
969 struct lttng_consumer_channel *channel;
970
971 channel = consumer_find_channel(key);
972 if (!channel) {
973 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
974 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
975 }
976
977 health_code_update();
978
979 ret = consumer_send_status_msg(sock, ret_code);
980 if (ret < 0) {
981 /* Somehow, the session daemon is not responding anymore. */
982 goto end_destroy_channel;
983 }
984
985 health_code_update();
986
987 /* Stop right now if no channel was found. */
988 if (!channel) {
989 goto end_destroy_channel;
990 }
991
992 /*
993 * This command should ONLY be issued for channel with streams set in
994 * no monitor mode.
995 */
996 assert(!channel->monitor);
997
998 /*
999 * The refcount should ALWAYS be 0 in the case of a channel in no
1000 * monitor mode.
1001 */
1002 assert(!uatomic_sub_return(&channel->refcount, 1));
1003
1004 consumer_del_channel(channel);
1005 end_destroy_channel:
1006 goto end_nosignal;
1007 }
1008 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1009 {
1010 ssize_t ret;
1011 uint64_t count;
1012 struct lttng_consumer_channel *channel;
1013 uint64_t id = msg.u.discarded_events.session_id;
1014 uint64_t key = msg.u.discarded_events.channel_key;
1015
1016 DBG("Kernel consumer discarded events command for session id %"
1017 PRIu64 ", channel key %" PRIu64, id, key);
1018
1019 channel = consumer_find_channel(key);
1020 if (!channel) {
1021 ERR("Kernel consumer discarded events channel %"
1022 PRIu64 " not found", key);
1023 count = 0;
1024 } else {
1025 count = channel->discarded_events;
1026 }
1027
1028 health_code_update();
1029
1030 /* Send back returned value to session daemon */
1031 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1032 if (ret < 0) {
1033 PERROR("send discarded events");
1034 goto error_fatal;
1035 }
1036
1037 break;
1038 }
1039 case LTTNG_CONSUMER_LOST_PACKETS:
1040 {
1041 ssize_t ret;
1042 uint64_t count;
1043 struct lttng_consumer_channel *channel;
1044 uint64_t id = msg.u.lost_packets.session_id;
1045 uint64_t key = msg.u.lost_packets.channel_key;
1046
1047 DBG("Kernel consumer lost packets command for session id %"
1048 PRIu64 ", channel key %" PRIu64, id, key);
1049
1050 channel = consumer_find_channel(key);
1051 if (!channel) {
1052 ERR("Kernel consumer lost packets channel %"
1053 PRIu64 " not found", key);
1054 count = 0;
1055 } else {
1056 count = channel->lost_packets;
1057 }
1058
1059 health_code_update();
1060
1061 /* Send back returned value to session daemon */
1062 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1063 if (ret < 0) {
1064 PERROR("send lost packets");
1065 goto error_fatal;
1066 }
1067
1068 break;
1069 }
1070 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1071 {
1072 int channel_monitor_pipe;
1073
1074 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1075 /* Successfully received the command's type. */
1076 ret = consumer_send_status_msg(sock, ret_code);
1077 if (ret < 0) {
1078 goto error_fatal;
1079 }
1080
1081 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1082 1);
1083 if (ret != sizeof(channel_monitor_pipe)) {
1084 ERR("Failed to receive channel monitor pipe");
1085 goto error_fatal;
1086 }
1087
1088 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1089 ret = consumer_timer_thread_set_channel_monitor_pipe(
1090 channel_monitor_pipe);
1091 if (!ret) {
1092 int flags;
1093
1094 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1095 /* Set the pipe as non-blocking. */
1096 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1097 if (ret == -1) {
1098 PERROR("fcntl get flags of the channel monitoring pipe");
1099 goto error_fatal;
1100 }
1101 flags = ret;
1102
1103 ret = fcntl(channel_monitor_pipe, F_SETFL,
1104 flags | O_NONBLOCK);
1105 if (ret == -1) {
1106 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1107 goto error_fatal;
1108 }
1109 DBG("Channel monitor pipe set as non-blocking");
1110 } else {
1111 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1112 }
1113 ret = consumer_send_status_msg(sock, ret_code);
1114 if (ret < 0) {
1115 goto error_fatal;
1116 }
1117 break;
1118 }
1119 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1120 {
1121 struct lttng_consumer_channel *channel;
1122 uint64_t key = msg.u.rotate_channel.key;
1123
1124 DBG("Consumer rotate channel %" PRIu64, key);
1125
1126 channel = consumer_find_channel(key);
1127 if (!channel) {
1128 ERR("Channel %" PRIu64 " not found", key);
1129 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1130 } else {
1131 /*
1132 * Sample the rotate position of all the streams in this channel.
1133 */
1134 ret = lttng_consumer_rotate_channel(channel, key,
1135 msg.u.rotate_channel.relayd_id,
1136 msg.u.rotate_channel.metadata,
1137 ctx);
1138 if (ret < 0) {
1139 ERR("Rotate channel failed");
1140 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1141 }
1142
1143 health_code_update();
1144 }
1145 ret = consumer_send_status_msg(sock, ret_code);
1146 if (ret < 0) {
1147 /* Somehow, the session daemon is not responding anymore. */
1148 goto error_rotate_channel;
1149 }
1150 if (channel) {
1151 /* Rotate the streams that are ready right now. */
1152 ret = lttng_consumer_rotate_ready_streams(
1153 channel, key, ctx);
1154 if (ret < 0) {
1155 ERR("Rotate ready streams failed");
1156 }
1157 }
1158 break;
1159 error_rotate_channel:
1160 goto end_nosignal;
1161 }
1162 case LTTNG_CONSUMER_INIT:
1163 {
1164 ret_code = lttng_consumer_init_command(ctx,
1165 msg.u.init.sessiond_uuid);
1166 health_code_update();
1167 ret = consumer_send_status_msg(sock, ret_code);
1168 if (ret < 0) {
1169 /* Somehow, the session daemon is not responding anymore. */
1170 goto end_nosignal;
1171 }
1172 break;
1173 }
1174 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1175 {
1176 const struct lttng_credentials credentials = {
1177 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1178 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1179 };
1180 const bool is_local_trace =
1181 !msg.u.create_trace_chunk.relayd_id.is_set;
1182 const uint64_t relayd_id =
1183 msg.u.create_trace_chunk.relayd_id.value;
1184 const char *chunk_override_name =
1185 *msg.u.create_trace_chunk.override_name ?
1186 msg.u.create_trace_chunk.override_name :
1187 NULL;
1188 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1189 LTTNG_OPTIONAL_INIT;
1190
1191 /*
1192 * The session daemon will only provide a chunk directory file
1193 * descriptor for local traces.
1194 */
1195 if (is_local_trace) {
1196 int chunk_dirfd;
1197
1198 /* Acnowledge the reception of the command. */
1199 ret = consumer_send_status_msg(sock,
1200 LTTCOMM_CONSUMERD_SUCCESS);
1201 if (ret < 0) {
1202 /* Somehow, the session daemon is not responding anymore. */
1203 goto end_nosignal;
1204 }
1205
1206 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1207 if (ret != sizeof(chunk_dirfd)) {
1208 ERR("Failed to receive trace chunk directory file descriptor");
1209 goto error_fatal;
1210 }
1211
1212 DBG("Received trace chunk directory fd (%d)",
1213 chunk_dirfd);
1214 ret = lttng_directory_handle_init_from_dirfd(
1215 &chunk_directory_handle.value,
1216 chunk_dirfd);
1217 if (ret) {
1218 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1219 if (close(chunk_dirfd)) {
1220 PERROR("Failed to close chunk directory file descriptor");
1221 }
1222 goto error_fatal;
1223 }
1224 chunk_directory_handle.is_set = true;
1225 }
1226
1227 ret_code = lttng_consumer_create_trace_chunk(
1228 !is_local_trace ? &relayd_id : NULL,
1229 msg.u.create_trace_chunk.session_id,
1230 msg.u.create_trace_chunk.chunk_id,
1231 (time_t) msg.u.create_trace_chunk
1232 .creation_timestamp,
1233 chunk_override_name,
1234 msg.u.create_trace_chunk.credentials.is_set ?
1235 &credentials :
1236 NULL,
1237 chunk_directory_handle.is_set ?
1238 &chunk_directory_handle.value :
1239 NULL);
1240
1241 if (chunk_directory_handle.is_set) {
1242 lttng_directory_handle_fini(
1243 &chunk_directory_handle.value);
1244 }
1245 goto end_msg_sessiond;
1246 }
1247 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1248 {
1249 enum lttng_trace_chunk_command_type close_command =
1250 msg.u.close_trace_chunk.close_command.value;
1251 const uint64_t relayd_id =
1252 msg.u.close_trace_chunk.relayd_id.value;
1253
1254 ret_code = lttng_consumer_close_trace_chunk(
1255 msg.u.close_trace_chunk.relayd_id.is_set ?
1256 &relayd_id :
1257 NULL,
1258 msg.u.close_trace_chunk.session_id,
1259 msg.u.close_trace_chunk.chunk_id,
1260 (time_t) msg.u.close_trace_chunk.close_timestamp,
1261 msg.u.close_trace_chunk.close_command.is_set ?
1262 &close_command :
1263 NULL);
1264 goto end_msg_sessiond;
1265 }
1266 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1267 {
1268 const uint64_t relayd_id =
1269 msg.u.trace_chunk_exists.relayd_id.value;
1270
1271 ret_code = lttng_consumer_trace_chunk_exists(
1272 msg.u.trace_chunk_exists.relayd_id.is_set ?
1273 &relayd_id : NULL,
1274 msg.u.trace_chunk_exists.session_id,
1275 msg.u.trace_chunk_exists.chunk_id);
1276 goto end_msg_sessiond;
1277 }
1278 default:
1279 goto end_nosignal;
1280 }
1281
1282 end_nosignal:
1283 /*
1284 * Return 1 to indicate success since the 0 value can be a socket
1285 * shutdown during the recv() or send() call.
1286 */
1287 ret = 1;
1288 goto end;
1289 error_fatal:
1290 /* This will issue a consumer stop. */
1291 ret = -1;
1292 goto end;
1293 end_msg_sessiond:
1294 /*
1295 * The returned value here is not useful since either way we'll return 1 to
1296 * the caller because the session daemon socket management is done
1297 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1298 */
1299 ret = consumer_send_status_msg(sock, ret_code);
1300 if (ret < 0) {
1301 goto error_fatal;
1302 }
1303 ret = 1;
1304 end:
1305 health_code_update();
1306 rcu_read_unlock();
1307 return ret;
1308 }
1309
1310 /*
1311 * Populate index values of a kernel stream. Values are set in big endian order.
1312 *
1313 * Return 0 on success or else a negative value.
1314 */
1315 static int get_index_values(struct ctf_packet_index *index, int infd)
1316 {
1317 int ret;
1318
1319 ret = kernctl_get_timestamp_begin(infd, &index->timestamp_begin);
1320 if (ret < 0) {
1321 PERROR("kernctl_get_timestamp_begin");
1322 goto error;
1323 }
1324 index->timestamp_begin = htobe64(index->timestamp_begin);
1325
1326 ret = kernctl_get_timestamp_end(infd, &index->timestamp_end);
1327 if (ret < 0) {
1328 PERROR("kernctl_get_timestamp_end");
1329 goto error;
1330 }
1331 index->timestamp_end = htobe64(index->timestamp_end);
1332
1333 ret = kernctl_get_events_discarded(infd, &index->events_discarded);
1334 if (ret < 0) {
1335 PERROR("kernctl_get_events_discarded");
1336 goto error;
1337 }
1338 index->events_discarded = htobe64(index->events_discarded);
1339
1340 ret = kernctl_get_content_size(infd, &index->content_size);
1341 if (ret < 0) {
1342 PERROR("kernctl_get_content_size");
1343 goto error;
1344 }
1345 index->content_size = htobe64(index->content_size);
1346
1347 ret = kernctl_get_packet_size(infd, &index->packet_size);
1348 if (ret < 0) {
1349 PERROR("kernctl_get_packet_size");
1350 goto error;
1351 }
1352 index->packet_size = htobe64(index->packet_size);
1353
1354 ret = kernctl_get_stream_id(infd, &index->stream_id);
1355 if (ret < 0) {
1356 PERROR("kernctl_get_stream_id");
1357 goto error;
1358 }
1359 index->stream_id = htobe64(index->stream_id);
1360
1361 ret = kernctl_get_instance_id(infd, &index->stream_instance_id);
1362 if (ret < 0) {
1363 if (ret == -ENOTTY) {
1364 /* Command not implemented by lttng-modules. */
1365 index->stream_instance_id = -1ULL;
1366 } else {
1367 PERROR("kernctl_get_instance_id");
1368 goto error;
1369 }
1370 }
1371 index->stream_instance_id = htobe64(index->stream_instance_id);
1372
1373 ret = kernctl_get_sequence_number(infd, &index->packet_seq_num);
1374 if (ret < 0) {
1375 if (ret == -ENOTTY) {
1376 /* Command not implemented by lttng-modules. */
1377 index->packet_seq_num = -1ULL;
1378 ret = 0;
1379 } else {
1380 PERROR("kernctl_get_sequence_number");
1381 goto error;
1382 }
1383 }
1384 index->packet_seq_num = htobe64(index->packet_seq_num);
1385
1386 error:
1387 return ret;
1388 }
1389 /*
1390 * Sync metadata meaning request them to the session daemon and snapshot to the
1391 * metadata thread can consumer them.
1392 *
1393 * Metadata stream lock MUST be acquired.
1394 *
1395 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1396 * is empty or a negative value on error.
1397 */
1398 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1399 {
1400 int ret;
1401
1402 assert(metadata);
1403
1404 ret = kernctl_buffer_flush(metadata->wait_fd);
1405 if (ret < 0) {
1406 ERR("Failed to flush kernel stream");
1407 goto end;
1408 }
1409
1410 ret = kernctl_snapshot(metadata->wait_fd);
1411 if (ret < 0) {
1412 if (ret != -EAGAIN) {
1413 ERR("Sync metadata, taking kernel snapshot failed.");
1414 goto end;
1415 }
1416 DBG("Sync metadata, no new kernel metadata");
1417 /* No new metadata, exit. */
1418 ret = ENODATA;
1419 goto end;
1420 }
1421
1422 end:
1423 return ret;
1424 }
1425
1426 static
1427 int update_stream_stats(struct lttng_consumer_stream *stream)
1428 {
1429 int ret;
1430 uint64_t seq, discarded;
1431
1432 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1433 if (ret < 0) {
1434 if (ret == -ENOTTY) {
1435 /* Command not implemented by lttng-modules. */
1436 seq = -1ULL;
1437 } else {
1438 PERROR("kernctl_get_sequence_number");
1439 goto end;
1440 }
1441 }
1442
1443 /*
1444 * Start the sequence when we extract the first packet in case we don't
1445 * start at 0 (for example if a consumer is not connected to the
1446 * session immediately after the beginning).
1447 */
1448 if (stream->last_sequence_number == -1ULL) {
1449 stream->last_sequence_number = seq;
1450 } else if (seq > stream->last_sequence_number) {
1451 stream->chan->lost_packets += seq -
1452 stream->last_sequence_number - 1;
1453 } else {
1454 /* seq <= last_sequence_number */
1455 ERR("Sequence number inconsistent : prev = %" PRIu64
1456 ", current = %" PRIu64,
1457 stream->last_sequence_number, seq);
1458 ret = -1;
1459 goto end;
1460 }
1461 stream->last_sequence_number = seq;
1462
1463 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1464 if (ret < 0) {
1465 PERROR("kernctl_get_events_discarded");
1466 goto end;
1467 }
1468 if (discarded < stream->last_discarded_events) {
1469 /*
1470 * Overflow has occurred. We assume only one wrap-around
1471 * has occurred.
1472 */
1473 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1474 stream->last_discarded_events + discarded;
1475 } else {
1476 stream->chan->discarded_events += discarded -
1477 stream->last_discarded_events;
1478 }
1479 stream->last_discarded_events = discarded;
1480 ret = 0;
1481
1482 end:
1483 return ret;
1484 }
1485
1486 /*
1487 * Check if the local version of the metadata stream matches with the version
1488 * of the metadata stream in the kernel. If it was updated, set the reset flag
1489 * on the stream.
1490 */
1491 static
1492 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1493 {
1494 int ret;
1495 uint64_t cur_version;
1496
1497 ret = kernctl_get_metadata_version(infd, &cur_version);
1498 if (ret < 0) {
1499 if (ret == -ENOTTY) {
1500 /*
1501 * LTTng-modules does not implement this
1502 * command.
1503 */
1504 ret = 0;
1505 goto end;
1506 }
1507 ERR("Failed to get the metadata version");
1508 goto end;
1509 }
1510
1511 if (stream->metadata_version == cur_version) {
1512 ret = 0;
1513 goto end;
1514 }
1515
1516 DBG("New metadata version detected");
1517 stream->metadata_version = cur_version;
1518 stream->reset_metadata_flag = 1;
1519 ret = 0;
1520
1521 end:
1522 return ret;
1523 }
1524
1525 /*
1526 * Consume data on a file descriptor and write it on a trace file.
1527 * The stream and channel locks must be held by the caller.
1528 */
1529 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1530 struct lttng_consumer_local_data *ctx)
1531 {
1532 unsigned long len, subbuf_size, padding;
1533 int err, write_index = 1, rotation_ret;
1534 ssize_t ret = 0;
1535 int infd = stream->wait_fd;
1536 struct ctf_packet_index index;
1537
1538 DBG("In read_subbuffer (infd : %d)", infd);
1539
1540 /*
1541 * If the stream was flagged to be ready for rotation before we extract the
1542 * next packet, rotate it now.
1543 */
1544 if (stream->rotate_ready) {
1545 DBG("Rotate stream before extracting data");
1546 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1547 if (rotation_ret < 0) {
1548 ERR("Stream rotation error");
1549 ret = -1;
1550 goto error;
1551 }
1552 }
1553
1554 /* Get the next subbuffer */
1555 err = kernctl_get_next_subbuf(infd);
1556 if (err != 0) {
1557 /*
1558 * This is a debug message even for single-threaded consumer,
1559 * because poll() have more relaxed criterions than get subbuf,
1560 * so get_subbuf may fail for short race windows where poll()
1561 * would issue wakeups.
1562 */
1563 DBG("Reserving sub buffer failed (everything is normal, "
1564 "it is due to concurrency)");
1565 ret = err;
1566 goto error;
1567 }
1568
1569 /* Get the full subbuffer size including padding */
1570 err = kernctl_get_padded_subbuf_size(infd, &len);
1571 if (err != 0) {
1572 PERROR("Getting sub-buffer len failed.");
1573 err = kernctl_put_subbuf(infd);
1574 if (err != 0) {
1575 if (err == -EFAULT) {
1576 PERROR("Error in unreserving sub buffer\n");
1577 } else if (err == -EIO) {
1578 /* Should never happen with newer LTTng versions */
1579 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1580 }
1581 ret = err;
1582 goto error;
1583 }
1584 ret = err;
1585 goto error;
1586 }
1587
1588 if (!stream->metadata_flag) {
1589 ret = get_index_values(&index, infd);
1590 if (ret < 0) {
1591 err = kernctl_put_subbuf(infd);
1592 if (err != 0) {
1593 if (err == -EFAULT) {
1594 PERROR("Error in unreserving sub buffer\n");
1595 } else if (err == -EIO) {
1596 /* Should never happen with newer LTTng versions */
1597 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1598 }
1599 ret = err;
1600 goto error;
1601 }
1602 goto error;
1603 }
1604 ret = update_stream_stats(stream);
1605 if (ret < 0) {
1606 err = kernctl_put_subbuf(infd);
1607 if (err != 0) {
1608 if (err == -EFAULT) {
1609 PERROR("Error in unreserving sub buffer\n");
1610 } else if (err == -EIO) {
1611 /* Should never happen with newer LTTng versions */
1612 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1613 }
1614 ret = err;
1615 goto error;
1616 }
1617 goto error;
1618 }
1619 } else {
1620 write_index = 0;
1621 ret = metadata_stream_check_version(infd, stream);
1622 if (ret < 0) {
1623 err = kernctl_put_subbuf(infd);
1624 if (err != 0) {
1625 if (err == -EFAULT) {
1626 PERROR("Error in unreserving sub buffer\n");
1627 } else if (err == -EIO) {
1628 /* Should never happen with newer LTTng versions */
1629 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1630 }
1631 ret = err;
1632 goto error;
1633 }
1634 goto error;
1635 }
1636 }
1637
1638 switch (stream->chan->output) {
1639 case CONSUMER_CHANNEL_SPLICE:
1640 /*
1641 * XXX: The lttng-modules splice "actor" does not handle copying
1642 * partial pages hence only using the subbuffer size without the
1643 * padding makes the splice fail.
1644 */
1645 subbuf_size = len;
1646 padding = 0;
1647
1648 /* splice the subbuffer to the tracefile */
1649 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1650 padding, &index);
1651 /*
1652 * XXX: Splice does not support network streaming so the return value
1653 * is simply checked against subbuf_size and not like the mmap() op.
1654 */
1655 if (ret != subbuf_size) {
1656 /*
1657 * display the error but continue processing to try
1658 * to release the subbuffer
1659 */
1660 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1661 ret, subbuf_size);
1662 write_index = 0;
1663 }
1664 break;
1665 case CONSUMER_CHANNEL_MMAP:
1666 /* Get subbuffer size without padding */
1667 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1668 if (err != 0) {
1669 PERROR("Getting sub-buffer len failed.");
1670 err = kernctl_put_subbuf(infd);
1671 if (err != 0) {
1672 if (err == -EFAULT) {
1673 PERROR("Error in unreserving sub buffer\n");
1674 } else if (err == -EIO) {
1675 /* Should never happen with newer LTTng versions */
1676 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1677 }
1678 ret = err;
1679 goto error;
1680 }
1681 ret = err;
1682 goto error;
1683 }
1684
1685 /* Make sure the tracer is not gone mad on us! */
1686 assert(len >= subbuf_size);
1687
1688 padding = len - subbuf_size;
1689
1690 /* write the subbuffer to the tracefile */
1691 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1692 padding, &index);
1693 /*
1694 * The mmap operation should write subbuf_size amount of data when
1695 * network streaming or the full padding (len) size when we are _not_
1696 * streaming.
1697 */
1698 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1699 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1700 /*
1701 * Display the error but continue processing to try to release the
1702 * subbuffer. This is a DBG statement since this is possible to
1703 * happen without being a critical error.
1704 */
1705 DBG("Error writing to tracefile "
1706 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1707 ret, len, subbuf_size);
1708 write_index = 0;
1709 }
1710 break;
1711 default:
1712 ERR("Unknown output method");
1713 ret = -EPERM;
1714 }
1715
1716 err = kernctl_put_next_subbuf(infd);
1717 if (err != 0) {
1718 if (err == -EFAULT) {
1719 PERROR("Error in unreserving sub buffer\n");
1720 } else if (err == -EIO) {
1721 /* Should never happen with newer LTTng versions */
1722 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1723 }
1724 ret = err;
1725 goto error;
1726 }
1727
1728 /* Write index if needed. */
1729 if (!write_index) {
1730 goto rotate;
1731 }
1732
1733 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1734 /*
1735 * In live, block until all the metadata is sent.
1736 */
1737 pthread_mutex_lock(&stream->metadata_timer_lock);
1738 assert(!stream->missed_metadata_flush);
1739 stream->waiting_on_metadata = true;
1740 pthread_mutex_unlock(&stream->metadata_timer_lock);
1741
1742 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1743
1744 pthread_mutex_lock(&stream->metadata_timer_lock);
1745 stream->waiting_on_metadata = false;
1746 if (stream->missed_metadata_flush) {
1747 stream->missed_metadata_flush = false;
1748 pthread_mutex_unlock(&stream->metadata_timer_lock);
1749 (void) consumer_flush_kernel_index(stream);
1750 } else {
1751 pthread_mutex_unlock(&stream->metadata_timer_lock);
1752 }
1753 if (err < 0) {
1754 goto error;
1755 }
1756 }
1757
1758 err = consumer_stream_write_index(stream, &index);
1759 if (err < 0) {
1760 goto error;
1761 }
1762
1763 rotate:
1764 /*
1765 * After extracting the packet, we check if the stream is now ready to be
1766 * rotated and perform the action immediately.
1767 */
1768 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1769 if (rotation_ret == 1) {
1770 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1771 if (rotation_ret < 0) {
1772 ERR("Stream rotation error");
1773 ret = -1;
1774 goto error;
1775 }
1776 } else if (rotation_ret < 0) {
1777 ERR("Checking if stream is ready to rotate");
1778 ret = -1;
1779 goto error;
1780 }
1781
1782 error:
1783 return ret;
1784 }
1785
1786 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1787 {
1788 int ret;
1789
1790 assert(stream);
1791
1792 /*
1793 * Don't create anything if this is set for streaming or if there is
1794 * no current trace chunk on the parent channel.
1795 */
1796 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1797 stream->chan->trace_chunk) {
1798 ret = consumer_stream_create_output_files(stream, true);
1799 if (ret) {
1800 goto error;
1801 }
1802 }
1803
1804 if (stream->output == LTTNG_EVENT_MMAP) {
1805 /* get the len of the mmap region */
1806 unsigned long mmap_len;
1807
1808 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1809 if (ret != 0) {
1810 PERROR("kernctl_get_mmap_len");
1811 goto error_close_fd;
1812 }
1813 stream->mmap_len = (size_t) mmap_len;
1814
1815 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1816 MAP_PRIVATE, stream->wait_fd, 0);
1817 if (stream->mmap_base == MAP_FAILED) {
1818 PERROR("Error mmaping");
1819 ret = -1;
1820 goto error_close_fd;
1821 }
1822 }
1823
1824 /* we return 0 to let the library handle the FD internally */
1825 return 0;
1826
1827 error_close_fd:
1828 if (stream->out_fd >= 0) {
1829 int err;
1830
1831 err = close(stream->out_fd);
1832 assert(!err);
1833 stream->out_fd = -1;
1834 }
1835 error:
1836 return ret;
1837 }
1838
1839 /*
1840 * Check if data is still being extracted from the buffers for a specific
1841 * stream. Consumer data lock MUST be acquired before calling this function
1842 * and the stream lock.
1843 *
1844 * Return 1 if the traced data are still getting read else 0 meaning that the
1845 * data is available for trace viewer reading.
1846 */
1847 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1848 {
1849 int ret;
1850
1851 assert(stream);
1852
1853 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1854 ret = 0;
1855 goto end;
1856 }
1857
1858 ret = kernctl_get_next_subbuf(stream->wait_fd);
1859 if (ret == 0) {
1860 /* There is still data so let's put back this subbuffer. */
1861 ret = kernctl_put_subbuf(stream->wait_fd);
1862 assert(ret == 0);
1863 ret = 1; /* Data is pending */
1864 goto end;
1865 }
1866
1867 /* Data is NOT pending and ready to be read. */
1868 ret = 0;
1869
1870 end:
1871 return ret;
1872 }
This page took 0.123677 seconds and 3 git commands to generate.