Fix: initialize kernel stream max subbuffer size on creation
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2017 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _LGPL_SOURCE
21 #include <assert.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <inttypes.h>
30 #include <unistd.h>
31 #include <sys/stat.h>
32
33 #include <bin/lttng-consumerd/health-consumerd.h>
34 #include <common/common.h>
35 #include <common/kernel-ctl/kernel-ctl.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/sessiond-comm/relayd.h>
38 #include <common/compat/fcntl.h>
39 #include <common/compat/endian.h>
40 #include <common/pipe.h>
41 #include <common/relayd/relayd.h>
42 #include <common/utils.h>
43 #include <common/consumer/consumer-stream.h>
44 #include <common/index/index.h>
45 #include <common/consumer/consumer-timer.h>
46 #include <common/optional.h>
47
48 #include "kernel-consumer.h"
49
50 extern struct lttng_consumer_global_data consumer_data;
51 extern int consumer_poll_timeout;
52
53 /*
54 * Take a snapshot for a specific fd
55 *
56 * Returns 0 on success, < 0 on error
57 */
58 int lttng_kconsumer_take_snapshot(struct lttng_consumer_stream *stream)
59 {
60 int ret = 0;
61 int infd = stream->wait_fd;
62
63 ret = kernctl_snapshot(infd);
64 /*
65 * -EAGAIN is not an error, it just means that there is no data to
66 * be read.
67 */
68 if (ret != 0 && ret != -EAGAIN) {
69 PERROR("Getting sub-buffer snapshot.");
70 }
71
72 return ret;
73 }
74
75 /*
76 * Sample consumed and produced positions for a specific fd.
77 *
78 * Returns 0 on success, < 0 on error.
79 */
80 int lttng_kconsumer_sample_snapshot_positions(
81 struct lttng_consumer_stream *stream)
82 {
83 assert(stream);
84
85 return kernctl_snapshot_sample_positions(stream->wait_fd);
86 }
87
88 /*
89 * Get the produced position
90 *
91 * Returns 0 on success, < 0 on error
92 */
93 int lttng_kconsumer_get_produced_snapshot(struct lttng_consumer_stream *stream,
94 unsigned long *pos)
95 {
96 int ret;
97 int infd = stream->wait_fd;
98
99 ret = kernctl_snapshot_get_produced(infd, pos);
100 if (ret != 0) {
101 PERROR("kernctl_snapshot_get_produced");
102 }
103
104 return ret;
105 }
106
107 /*
108 * Get the consumerd position
109 *
110 * Returns 0 on success, < 0 on error
111 */
112 int lttng_kconsumer_get_consumed_snapshot(struct lttng_consumer_stream *stream,
113 unsigned long *pos)
114 {
115 int ret;
116 int infd = stream->wait_fd;
117
118 ret = kernctl_snapshot_get_consumed(infd, pos);
119 if (ret != 0) {
120 PERROR("kernctl_snapshot_get_consumed");
121 }
122
123 return ret;
124 }
125
126 /*
127 * Take a snapshot of all the stream of a channel
128 * RCU read-side lock must be held across this function to ensure existence of
129 * channel. The channel lock must be held by the caller.
130 *
131 * Returns 0 on success, < 0 on error
132 */
133 static int lttng_kconsumer_snapshot_channel(
134 struct lttng_consumer_channel *channel,
135 uint64_t key, char *path, uint64_t relayd_id,
136 uint64_t nb_packets_per_stream,
137 struct lttng_consumer_local_data *ctx)
138 {
139 int ret;
140 struct lttng_consumer_stream *stream;
141
142 DBG("Kernel consumer snapshot channel %" PRIu64, key);
143
144 rcu_read_lock();
145
146 /* Splice is not supported yet for channel snapshot. */
147 if (channel->output != CONSUMER_CHANNEL_MMAP) {
148 ERR("Unsupported output type for channel \"%s\": mmap output is required to record a snapshot",
149 channel->name);
150 ret = -1;
151 goto end;
152 }
153
154 cds_list_for_each_entry(stream, &channel->streams.head, send_node) {
155 unsigned long consumed_pos, produced_pos;
156
157 health_code_update();
158
159 /*
160 * Lock stream because we are about to change its state.
161 */
162 pthread_mutex_lock(&stream->lock);
163
164 assert(channel->trace_chunk);
165 if (!lttng_trace_chunk_get(channel->trace_chunk)) {
166 /*
167 * Can't happen barring an internal error as the channel
168 * holds a reference to the trace chunk.
169 */
170 ERR("Failed to acquire reference to channel's trace chunk");
171 ret = -1;
172 goto end_unlock;
173 }
174 assert(!stream->trace_chunk);
175 stream->trace_chunk = channel->trace_chunk;
176
177 /*
178 * Assign the received relayd ID so we can use it for streaming. The streams
179 * are not visible to anyone so this is OK to change it.
180 */
181 stream->net_seq_idx = relayd_id;
182 channel->relayd_id = relayd_id;
183 if (relayd_id != (uint64_t) -1ULL) {
184 ret = consumer_send_relayd_stream(stream, path);
185 if (ret < 0) {
186 ERR("sending stream to relayd");
187 goto end_unlock;
188 }
189 } else {
190 ret = consumer_stream_create_output_files(stream,
191 false);
192 if (ret < 0) {
193 goto end_unlock;
194 }
195 DBG("Kernel consumer snapshot stream (%" PRIu64 ")",
196 stream->key);
197 }
198
199 ret = kernctl_buffer_flush_empty(stream->wait_fd);
200 if (ret < 0) {
201 /*
202 * Doing a buffer flush which does not take into
203 * account empty packets. This is not perfect
204 * for stream intersection, but required as a
205 * fall-back when "flush_empty" is not
206 * implemented by lttng-modules.
207 */
208 ret = kernctl_buffer_flush(stream->wait_fd);
209 if (ret < 0) {
210 ERR("Failed to flush kernel stream");
211 goto end_unlock;
212 }
213 goto end_unlock;
214 }
215
216 ret = lttng_kconsumer_take_snapshot(stream);
217 if (ret < 0) {
218 ERR("Taking kernel snapshot");
219 goto end_unlock;
220 }
221
222 ret = lttng_kconsumer_get_produced_snapshot(stream, &produced_pos);
223 if (ret < 0) {
224 ERR("Produced kernel snapshot position");
225 goto end_unlock;
226 }
227
228 ret = lttng_kconsumer_get_consumed_snapshot(stream, &consumed_pos);
229 if (ret < 0) {
230 ERR("Consumerd kernel snapshot position");
231 goto end_unlock;
232 }
233
234 consumed_pos = consumer_get_consume_start_pos(consumed_pos,
235 produced_pos, nb_packets_per_stream,
236 stream->max_sb_size);
237
238 while ((long) (consumed_pos - produced_pos) < 0) {
239 ssize_t read_len;
240 unsigned long len, padded_len;
241
242 health_code_update();
243
244 DBG("Kernel consumer taking snapshot at pos %lu", consumed_pos);
245
246 ret = kernctl_get_subbuf(stream->wait_fd, &consumed_pos);
247 if (ret < 0) {
248 if (ret != -EAGAIN) {
249 PERROR("kernctl_get_subbuf snapshot");
250 goto end_unlock;
251 }
252 DBG("Kernel consumer get subbuf failed. Skipping it.");
253 consumed_pos += stream->max_sb_size;
254 stream->chan->lost_packets++;
255 continue;
256 }
257
258 ret = kernctl_get_subbuf_size(stream->wait_fd, &len);
259 if (ret < 0) {
260 ERR("Snapshot kernctl_get_subbuf_size");
261 goto error_put_subbuf;
262 }
263
264 ret = kernctl_get_padded_subbuf_size(stream->wait_fd, &padded_len);
265 if (ret < 0) {
266 ERR("Snapshot kernctl_get_padded_subbuf_size");
267 goto error_put_subbuf;
268 }
269
270 read_len = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len,
271 padded_len - len, NULL);
272 /*
273 * We write the padded len in local tracefiles but the data len
274 * when using a relay. Display the error but continue processing
275 * to try to release the subbuffer.
276 */
277 if (relayd_id != (uint64_t) -1ULL) {
278 if (read_len != len) {
279 ERR("Error sending to the relay (ret: %zd != len: %lu)",
280 read_len, len);
281 }
282 } else {
283 if (read_len != padded_len) {
284 ERR("Error writing to tracefile (ret: %zd != len: %lu)",
285 read_len, padded_len);
286 }
287 }
288
289 ret = kernctl_put_subbuf(stream->wait_fd);
290 if (ret < 0) {
291 ERR("Snapshot kernctl_put_subbuf");
292 goto end_unlock;
293 }
294 consumed_pos += stream->max_sb_size;
295 }
296
297 if (relayd_id == (uint64_t) -1ULL) {
298 if (stream->out_fd >= 0) {
299 ret = close(stream->out_fd);
300 if (ret < 0) {
301 PERROR("Kernel consumer snapshot close out_fd");
302 goto end_unlock;
303 }
304 stream->out_fd = -1;
305 }
306 } else {
307 close_relayd_stream(stream);
308 stream->net_seq_idx = (uint64_t) -1ULL;
309 }
310 lttng_trace_chunk_put(stream->trace_chunk);
311 stream->trace_chunk = NULL;
312 pthread_mutex_unlock(&stream->lock);
313 }
314
315 /* All good! */
316 ret = 0;
317 goto end;
318
319 error_put_subbuf:
320 ret = kernctl_put_subbuf(stream->wait_fd);
321 if (ret < 0) {
322 ERR("Snapshot kernctl_put_subbuf error path");
323 }
324 end_unlock:
325 pthread_mutex_unlock(&stream->lock);
326 end:
327 rcu_read_unlock();
328 return ret;
329 }
330
331 /*
332 * Read the whole metadata available for a snapshot.
333 * RCU read-side lock must be held across this function to ensure existence of
334 * metadata_channel. The channel lock must be held by the caller.
335 *
336 * Returns 0 on success, < 0 on error
337 */
338 static int lttng_kconsumer_snapshot_metadata(
339 struct lttng_consumer_channel *metadata_channel,
340 uint64_t key, char *path, uint64_t relayd_id,
341 struct lttng_consumer_local_data *ctx)
342 {
343 int ret, use_relayd = 0;
344 ssize_t ret_read;
345 struct lttng_consumer_stream *metadata_stream;
346
347 assert(ctx);
348
349 DBG("Kernel consumer snapshot metadata with key %" PRIu64 " at path %s",
350 key, path);
351
352 rcu_read_lock();
353
354 metadata_stream = metadata_channel->metadata_stream;
355 assert(metadata_stream);
356
357 pthread_mutex_lock(&metadata_stream->lock);
358 assert(metadata_channel->trace_chunk);
359 assert(metadata_stream->trace_chunk);
360
361 /* Flag once that we have a valid relayd for the stream. */
362 if (relayd_id != (uint64_t) -1ULL) {
363 use_relayd = 1;
364 }
365
366 if (use_relayd) {
367 ret = consumer_send_relayd_stream(metadata_stream, path);
368 if (ret < 0) {
369 goto error_snapshot;
370 }
371 } else {
372 ret = consumer_stream_create_output_files(metadata_stream,
373 false);
374 if (ret < 0) {
375 goto error_snapshot;
376 }
377 }
378
379 do {
380 health_code_update();
381
382 ret_read = lttng_kconsumer_read_subbuffer(metadata_stream, ctx);
383 if (ret_read < 0) {
384 if (ret_read != -EAGAIN) {
385 ERR("Kernel snapshot reading metadata subbuffer (ret: %zd)",
386 ret_read);
387 ret = ret_read;
388 goto error_snapshot;
389 }
390 /* ret_read is negative at this point so we will exit the loop. */
391 continue;
392 }
393 } while (ret_read >= 0);
394
395 if (use_relayd) {
396 close_relayd_stream(metadata_stream);
397 metadata_stream->net_seq_idx = (uint64_t) -1ULL;
398 } else {
399 if (metadata_stream->out_fd >= 0) {
400 ret = close(metadata_stream->out_fd);
401 if (ret < 0) {
402 PERROR("Kernel consumer snapshot metadata close out_fd");
403 /*
404 * Don't go on error here since the snapshot was successful at this
405 * point but somehow the close failed.
406 */
407 }
408 metadata_stream->out_fd = -1;
409 lttng_trace_chunk_put(metadata_stream->trace_chunk);
410 metadata_stream->trace_chunk = NULL;
411 }
412 }
413
414 ret = 0;
415 error_snapshot:
416 pthread_mutex_unlock(&metadata_stream->lock);
417 cds_list_del(&metadata_stream->send_node);
418 consumer_stream_destroy(metadata_stream, NULL);
419 metadata_channel->metadata_stream = NULL;
420 rcu_read_unlock();
421 return ret;
422 }
423
424 /*
425 * Receive command from session daemon and process it.
426 *
427 * Return 1 on success else a negative value or 0.
428 */
429 int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
430 int sock, struct pollfd *consumer_sockpoll)
431 {
432 ssize_t ret;
433 enum lttcomm_return_code ret_code = LTTCOMM_CONSUMERD_SUCCESS;
434 struct lttcomm_consumer_msg msg;
435
436 health_code_update();
437
438 ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
439 if (ret != sizeof(msg)) {
440 if (ret > 0) {
441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
442 ret = -1;
443 }
444 return ret;
445 }
446
447 health_code_update();
448
449 /* Deprecated command */
450 assert(msg.cmd_type != LTTNG_CONSUMER_STOP);
451
452 health_code_update();
453
454 /* relayd needs RCU read-side protection */
455 rcu_read_lock();
456
457 switch (msg.cmd_type) {
458 case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
459 {
460 /* Session daemon status message are handled in the following call. */
461 consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
462 msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
463 &msg.u.relayd_sock.sock, msg.u.relayd_sock.session_id,
464 msg.u.relayd_sock.relayd_session_id);
465 goto end_nosignal;
466 }
467 case LTTNG_CONSUMER_ADD_CHANNEL:
468 {
469 struct lttng_consumer_channel *new_channel;
470 int ret_recv;
471 const uint64_t chunk_id = msg.u.channel.chunk_id.value;
472
473 health_code_update();
474
475 /* First send a status message before receiving the fds. */
476 ret = consumer_send_status_msg(sock, ret_code);
477 if (ret < 0) {
478 /* Somehow, the session daemon is not responding anymore. */
479 goto error_fatal;
480 }
481
482 health_code_update();
483
484 DBG("consumer_add_channel %" PRIu64, msg.u.channel.channel_key);
485 new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
486 msg.u.channel.session_id,
487 msg.u.channel.chunk_id.is_set ?
488 &chunk_id : NULL,
489 msg.u.channel.pathname,
490 msg.u.channel.name,
491 msg.u.channel.relayd_id, msg.u.channel.output,
492 msg.u.channel.tracefile_size,
493 msg.u.channel.tracefile_count, 0,
494 msg.u.channel.monitor,
495 msg.u.channel.live_timer_interval,
496 NULL, NULL);
497 if (new_channel == NULL) {
498 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
499 goto end_nosignal;
500 }
501 new_channel->nb_init_stream_left = msg.u.channel.nb_init_streams;
502 switch (msg.u.channel.output) {
503 case LTTNG_EVENT_SPLICE:
504 new_channel->output = CONSUMER_CHANNEL_SPLICE;
505 break;
506 case LTTNG_EVENT_MMAP:
507 new_channel->output = CONSUMER_CHANNEL_MMAP;
508 break;
509 default:
510 ERR("Channel output unknown %d", msg.u.channel.output);
511 goto end_nosignal;
512 }
513
514 /* Translate and save channel type. */
515 switch (msg.u.channel.type) {
516 case CONSUMER_CHANNEL_TYPE_DATA:
517 case CONSUMER_CHANNEL_TYPE_METADATA:
518 new_channel->type = msg.u.channel.type;
519 break;
520 default:
521 assert(0);
522 goto end_nosignal;
523 };
524
525 health_code_update();
526
527 if (ctx->on_recv_channel != NULL) {
528 ret_recv = ctx->on_recv_channel(new_channel);
529 if (ret_recv == 0) {
530 ret = consumer_add_channel(new_channel, ctx);
531 } else if (ret_recv < 0) {
532 goto end_nosignal;
533 }
534 } else {
535 ret = consumer_add_channel(new_channel, ctx);
536 }
537 if (msg.u.channel.type == CONSUMER_CHANNEL_TYPE_DATA && !ret) {
538 int monitor_start_ret;
539
540 DBG("Consumer starting monitor timer");
541 consumer_timer_live_start(new_channel,
542 msg.u.channel.live_timer_interval);
543 monitor_start_ret = consumer_timer_monitor_start(
544 new_channel,
545 msg.u.channel.monitor_timer_interval);
546 if (monitor_start_ret < 0) {
547 ERR("Starting channel monitoring timer failed");
548 goto end_nosignal;
549 }
550
551 }
552
553 health_code_update();
554
555 /* If we received an error in add_channel, we need to report it. */
556 if (ret < 0) {
557 ret = consumer_send_status_msg(sock, ret);
558 if (ret < 0) {
559 goto error_fatal;
560 }
561 goto end_nosignal;
562 }
563
564 goto end_nosignal;
565 }
566 case LTTNG_CONSUMER_ADD_STREAM:
567 {
568 int fd;
569 struct lttng_pipe *stream_pipe;
570 struct lttng_consumer_stream *new_stream;
571 struct lttng_consumer_channel *channel;
572 int alloc_ret = 0;
573
574 /*
575 * Get stream's channel reference. Needed when adding the stream to the
576 * global hash table.
577 */
578 channel = consumer_find_channel(msg.u.stream.channel_key);
579 if (!channel) {
580 /*
581 * We could not find the channel. Can happen if cpu hotplug
582 * happens while tearing down.
583 */
584 ERR("Unable to find channel key %" PRIu64, msg.u.stream.channel_key);
585 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
586 }
587
588 health_code_update();
589
590 /* First send a status message before receiving the fds. */
591 ret = consumer_send_status_msg(sock, ret_code);
592 if (ret < 0) {
593 /* Somehow, the session daemon is not responding anymore. */
594 goto error_fatal;
595 }
596
597 health_code_update();
598
599 if (ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
600 /* Channel was not found. */
601 goto end_nosignal;
602 }
603
604 /* Blocking call */
605 health_poll_entry();
606 ret = lttng_consumer_poll_socket(consumer_sockpoll);
607 health_poll_exit();
608 if (ret) {
609 goto error_fatal;
610 }
611
612 health_code_update();
613
614 /* Get stream file descriptor from socket */
615 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
616 if (ret != sizeof(fd)) {
617 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
618 rcu_read_unlock();
619 return ret;
620 }
621
622 health_code_update();
623
624 /*
625 * Send status code to session daemon only if the recv works. If the
626 * above recv() failed, the session daemon is notified through the
627 * error socket and the teardown is eventually done.
628 */
629 ret = consumer_send_status_msg(sock, ret_code);
630 if (ret < 0) {
631 /* Somehow, the session daemon is not responding anymore. */
632 goto end_nosignal;
633 }
634
635 health_code_update();
636
637 pthread_mutex_lock(&channel->lock);
638 new_stream = consumer_allocate_stream(channel->key,
639 fd,
640 channel->name,
641 channel->relayd_id,
642 channel->session_id,
643 channel->trace_chunk,
644 msg.u.stream.cpu,
645 &alloc_ret,
646 channel->type,
647 channel->monitor);
648 if (new_stream == NULL) {
649 switch (alloc_ret) {
650 case -ENOMEM:
651 case -EINVAL:
652 default:
653 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
654 break;
655 }
656 pthread_mutex_unlock(&channel->lock);
657 goto end_nosignal;
658 }
659
660 new_stream->chan = channel;
661 new_stream->wait_fd = fd;
662 ret = kernctl_get_max_subbuf_size(new_stream->wait_fd,
663 &new_stream->max_sb_size);
664 if (ret < 0) {
665 pthread_mutex_unlock(&channel->lock);
666 ERR("Failed to get kernel maximal subbuffer size");
667 goto end_nosignal;
668 }
669
670 consumer_stream_update_channel_attributes(new_stream,
671 channel);
672 switch (channel->output) {
673 case CONSUMER_CHANNEL_SPLICE:
674 new_stream->output = LTTNG_EVENT_SPLICE;
675 ret = utils_create_pipe(new_stream->splice_pipe);
676 if (ret < 0) {
677 pthread_mutex_unlock(&channel->lock);
678 goto end_nosignal;
679 }
680 break;
681 case CONSUMER_CHANNEL_MMAP:
682 new_stream->output = LTTNG_EVENT_MMAP;
683 break;
684 default:
685 ERR("Stream output unknown %d", channel->output);
686 pthread_mutex_unlock(&channel->lock);
687 goto end_nosignal;
688 }
689
690 /*
691 * We've just assigned the channel to the stream so increment the
692 * refcount right now. We don't need to increment the refcount for
693 * streams in no monitor because we handle manually the cleanup of
694 * those. It is very important to make sure there is NO prior
695 * consumer_del_stream() calls or else the refcount will be unbalanced.
696 */
697 if (channel->monitor) {
698 uatomic_inc(&new_stream->chan->refcount);
699 }
700
701 /*
702 * The buffer flush is done on the session daemon side for the kernel
703 * so no need for the stream "hangup_flush_done" variable to be
704 * tracked. This is important for a kernel stream since we don't rely
705 * on the flush state of the stream to read data. It's not the case for
706 * user space tracing.
707 */
708 new_stream->hangup_flush_done = 0;
709
710 health_code_update();
711
712 pthread_mutex_lock(&new_stream->lock);
713 if (ctx->on_recv_stream) {
714 ret = ctx->on_recv_stream(new_stream);
715 if (ret < 0) {
716 pthread_mutex_unlock(&new_stream->lock);
717 pthread_mutex_unlock(&channel->lock);
718 consumer_stream_free(new_stream);
719 goto end_nosignal;
720 }
721 }
722 health_code_update();
723
724 if (new_stream->metadata_flag) {
725 channel->metadata_stream = new_stream;
726 }
727
728 /* Do not monitor this stream. */
729 if (!channel->monitor) {
730 DBG("Kernel consumer add stream %s in no monitor mode with "
731 "relayd id %" PRIu64, new_stream->name,
732 new_stream->net_seq_idx);
733 cds_list_add(&new_stream->send_node, &channel->streams.head);
734 pthread_mutex_unlock(&new_stream->lock);
735 pthread_mutex_unlock(&channel->lock);
736 break;
737 }
738
739 /* Send stream to relayd if the stream has an ID. */
740 if (new_stream->net_seq_idx != (uint64_t) -1ULL) {
741 ret = consumer_send_relayd_stream(new_stream,
742 new_stream->chan->pathname);
743 if (ret < 0) {
744 pthread_mutex_unlock(&new_stream->lock);
745 pthread_mutex_unlock(&channel->lock);
746 consumer_stream_free(new_stream);
747 goto end_nosignal;
748 }
749
750 /*
751 * If adding an extra stream to an already
752 * existing channel (e.g. cpu hotplug), we need
753 * to send the "streams_sent" command to relayd.
754 */
755 if (channel->streams_sent_to_relayd) {
756 ret = consumer_send_relayd_streams_sent(
757 new_stream->net_seq_idx);
758 if (ret < 0) {
759 pthread_mutex_unlock(&new_stream->lock);
760 pthread_mutex_unlock(&channel->lock);
761 goto end_nosignal;
762 }
763 }
764 }
765 pthread_mutex_unlock(&new_stream->lock);
766 pthread_mutex_unlock(&channel->lock);
767
768 /* Get the right pipe where the stream will be sent. */
769 if (new_stream->metadata_flag) {
770 consumer_add_metadata_stream(new_stream);
771 stream_pipe = ctx->consumer_metadata_pipe;
772 } else {
773 consumer_add_data_stream(new_stream);
774 stream_pipe = ctx->consumer_data_pipe;
775 }
776
777 /* Visible to other threads */
778 new_stream->globally_visible = 1;
779
780 health_code_update();
781
782 ret = lttng_pipe_write(stream_pipe, &new_stream, sizeof(new_stream));
783 if (ret < 0) {
784 ERR("Consumer write %s stream to pipe %d",
785 new_stream->metadata_flag ? "metadata" : "data",
786 lttng_pipe_get_writefd(stream_pipe));
787 if (new_stream->metadata_flag) {
788 consumer_del_stream_for_metadata(new_stream);
789 } else {
790 consumer_del_stream_for_data(new_stream);
791 }
792 goto end_nosignal;
793 }
794
795 DBG("Kernel consumer ADD_STREAM %s (fd: %d) %s with relayd id %" PRIu64,
796 new_stream->name, fd, new_stream->chan->pathname, new_stream->relayd_stream_id);
797 break;
798 }
799 case LTTNG_CONSUMER_STREAMS_SENT:
800 {
801 struct lttng_consumer_channel *channel;
802
803 /*
804 * Get stream's channel reference. Needed when adding the stream to the
805 * global hash table.
806 */
807 channel = consumer_find_channel(msg.u.sent_streams.channel_key);
808 if (!channel) {
809 /*
810 * We could not find the channel. Can happen if cpu hotplug
811 * happens while tearing down.
812 */
813 ERR("Unable to find channel key %" PRIu64,
814 msg.u.sent_streams.channel_key);
815 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
816 }
817
818 health_code_update();
819
820 /*
821 * Send status code to session daemon.
822 */
823 ret = consumer_send_status_msg(sock, ret_code);
824 if (ret < 0 || ret_code != LTTCOMM_CONSUMERD_SUCCESS) {
825 /* Somehow, the session daemon is not responding anymore. */
826 goto end_nosignal;
827 }
828
829 health_code_update();
830
831 /*
832 * We should not send this message if we don't monitor the
833 * streams in this channel.
834 */
835 if (!channel->monitor) {
836 break;
837 }
838
839 health_code_update();
840 /* Send stream to relayd if the stream has an ID. */
841 if (msg.u.sent_streams.net_seq_idx != (uint64_t) -1ULL) {
842 ret = consumer_send_relayd_streams_sent(
843 msg.u.sent_streams.net_seq_idx);
844 if (ret < 0) {
845 goto end_nosignal;
846 }
847 channel->streams_sent_to_relayd = true;
848 }
849 break;
850 }
851 case LTTNG_CONSUMER_UPDATE_STREAM:
852 {
853 rcu_read_unlock();
854 return -ENOSYS;
855 }
856 case LTTNG_CONSUMER_DESTROY_RELAYD:
857 {
858 uint64_t index = msg.u.destroy_relayd.net_seq_idx;
859 struct consumer_relayd_sock_pair *relayd;
860
861 DBG("Kernel consumer destroying relayd %" PRIu64, index);
862
863 /* Get relayd reference if exists. */
864 relayd = consumer_find_relayd(index);
865 if (relayd == NULL) {
866 DBG("Unable to find relayd %" PRIu64, index);
867 ret_code = LTTCOMM_CONSUMERD_RELAYD_FAIL;
868 }
869
870 /*
871 * Each relayd socket pair has a refcount of stream attached to it
872 * which tells if the relayd is still active or not depending on the
873 * refcount value.
874 *
875 * This will set the destroy flag of the relayd object and destroy it
876 * if the refcount reaches zero when called.
877 *
878 * The destroy can happen either here or when a stream fd hangs up.
879 */
880 if (relayd) {
881 consumer_flag_relayd_for_destroy(relayd);
882 }
883
884 health_code_update();
885
886 ret = consumer_send_status_msg(sock, ret_code);
887 if (ret < 0) {
888 /* Somehow, the session daemon is not responding anymore. */
889 goto error_fatal;
890 }
891
892 goto end_nosignal;
893 }
894 case LTTNG_CONSUMER_DATA_PENDING:
895 {
896 int32_t ret;
897 uint64_t id = msg.u.data_pending.session_id;
898
899 DBG("Kernel consumer data pending command for id %" PRIu64, id);
900
901 ret = consumer_data_pending(id);
902
903 health_code_update();
904
905 /* Send back returned value to session daemon */
906 ret = lttcomm_send_unix_sock(sock, &ret, sizeof(ret));
907 if (ret < 0) {
908 PERROR("send data pending ret code");
909 goto error_fatal;
910 }
911
912 /*
913 * No need to send back a status message since the data pending
914 * returned value is the response.
915 */
916 break;
917 }
918 case LTTNG_CONSUMER_SNAPSHOT_CHANNEL:
919 {
920 struct lttng_consumer_channel *channel;
921 uint64_t key = msg.u.snapshot_channel.key;
922
923 channel = consumer_find_channel(key);
924 if (!channel) {
925 ERR("Channel %" PRIu64 " not found", key);
926 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
927 } else {
928 pthread_mutex_lock(&channel->lock);
929 if (msg.u.snapshot_channel.metadata == 1) {
930 ret = lttng_kconsumer_snapshot_metadata(channel, key,
931 msg.u.snapshot_channel.pathname,
932 msg.u.snapshot_channel.relayd_id, ctx);
933 if (ret < 0) {
934 ERR("Snapshot metadata failed");
935 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
936 }
937 } else {
938 ret = lttng_kconsumer_snapshot_channel(channel, key,
939 msg.u.snapshot_channel.pathname,
940 msg.u.snapshot_channel.relayd_id,
941 msg.u.snapshot_channel.nb_packets_per_stream,
942 ctx);
943 if (ret < 0) {
944 ERR("Snapshot channel failed");
945 ret_code = LTTCOMM_CONSUMERD_SNAPSHOT_FAILED;
946 }
947 }
948 pthread_mutex_unlock(&channel->lock);
949 }
950 health_code_update();
951
952 ret = consumer_send_status_msg(sock, ret_code);
953 if (ret < 0) {
954 /* Somehow, the session daemon is not responding anymore. */
955 goto end_nosignal;
956 }
957 break;
958 }
959 case LTTNG_CONSUMER_DESTROY_CHANNEL:
960 {
961 uint64_t key = msg.u.destroy_channel.key;
962 struct lttng_consumer_channel *channel;
963
964 channel = consumer_find_channel(key);
965 if (!channel) {
966 ERR("Kernel consumer destroy channel %" PRIu64 " not found", key);
967 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
968 }
969
970 health_code_update();
971
972 ret = consumer_send_status_msg(sock, ret_code);
973 if (ret < 0) {
974 /* Somehow, the session daemon is not responding anymore. */
975 goto end_nosignal;
976 }
977
978 health_code_update();
979
980 /* Stop right now if no channel was found. */
981 if (!channel) {
982 goto end_nosignal;
983 }
984
985 /*
986 * This command should ONLY be issued for channel with streams set in
987 * no monitor mode.
988 */
989 assert(!channel->monitor);
990
991 /*
992 * The refcount should ALWAYS be 0 in the case of a channel in no
993 * monitor mode.
994 */
995 assert(!uatomic_sub_return(&channel->refcount, 1));
996
997 consumer_del_channel(channel);
998
999 goto end_nosignal;
1000 }
1001 case LTTNG_CONSUMER_DISCARDED_EVENTS:
1002 {
1003 ssize_t ret;
1004 uint64_t count;
1005 struct lttng_consumer_channel *channel;
1006 uint64_t id = msg.u.discarded_events.session_id;
1007 uint64_t key = msg.u.discarded_events.channel_key;
1008
1009 DBG("Kernel consumer discarded events command for session id %"
1010 PRIu64 ", channel key %" PRIu64, id, key);
1011
1012 channel = consumer_find_channel(key);
1013 if (!channel) {
1014 ERR("Kernel consumer discarded events channel %"
1015 PRIu64 " not found", key);
1016 count = 0;
1017 } else {
1018 count = channel->discarded_events;
1019 }
1020
1021 health_code_update();
1022
1023 /* Send back returned value to session daemon */
1024 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1025 if (ret < 0) {
1026 PERROR("send discarded events");
1027 goto error_fatal;
1028 }
1029
1030 break;
1031 }
1032 case LTTNG_CONSUMER_LOST_PACKETS:
1033 {
1034 ssize_t ret;
1035 uint64_t count;
1036 struct lttng_consumer_channel *channel;
1037 uint64_t id = msg.u.lost_packets.session_id;
1038 uint64_t key = msg.u.lost_packets.channel_key;
1039
1040 DBG("Kernel consumer lost packets command for session id %"
1041 PRIu64 ", channel key %" PRIu64, id, key);
1042
1043 channel = consumer_find_channel(key);
1044 if (!channel) {
1045 ERR("Kernel consumer lost packets channel %"
1046 PRIu64 " not found", key);
1047 count = 0;
1048 } else {
1049 count = channel->lost_packets;
1050 }
1051
1052 health_code_update();
1053
1054 /* Send back returned value to session daemon */
1055 ret = lttcomm_send_unix_sock(sock, &count, sizeof(count));
1056 if (ret < 0) {
1057 PERROR("send lost packets");
1058 goto error_fatal;
1059 }
1060
1061 break;
1062 }
1063 case LTTNG_CONSUMER_SET_CHANNEL_MONITOR_PIPE:
1064 {
1065 int channel_monitor_pipe;
1066
1067 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1068 /* Successfully received the command's type. */
1069 ret = consumer_send_status_msg(sock, ret_code);
1070 if (ret < 0) {
1071 goto error_fatal;
1072 }
1073
1074 ret = lttcomm_recv_fds_unix_sock(sock, &channel_monitor_pipe,
1075 1);
1076 if (ret != sizeof(channel_monitor_pipe)) {
1077 ERR("Failed to receive channel monitor pipe");
1078 goto error_fatal;
1079 }
1080
1081 DBG("Received channel monitor pipe (%d)", channel_monitor_pipe);
1082 ret = consumer_timer_thread_set_channel_monitor_pipe(
1083 channel_monitor_pipe);
1084 if (!ret) {
1085 int flags;
1086
1087 ret_code = LTTCOMM_CONSUMERD_SUCCESS;
1088 /* Set the pipe as non-blocking. */
1089 ret = fcntl(channel_monitor_pipe, F_GETFL, 0);
1090 if (ret == -1) {
1091 PERROR("fcntl get flags of the channel monitoring pipe");
1092 goto error_fatal;
1093 }
1094 flags = ret;
1095
1096 ret = fcntl(channel_monitor_pipe, F_SETFL,
1097 flags | O_NONBLOCK);
1098 if (ret == -1) {
1099 PERROR("fcntl set O_NONBLOCK flag of the channel monitoring pipe");
1100 goto error_fatal;
1101 }
1102 DBG("Channel monitor pipe set as non-blocking");
1103 } else {
1104 ret_code = LTTCOMM_CONSUMERD_ALREADY_SET;
1105 }
1106 ret = consumer_send_status_msg(sock, ret_code);
1107 if (ret < 0) {
1108 goto error_fatal;
1109 }
1110 break;
1111 }
1112 case LTTNG_CONSUMER_ROTATE_CHANNEL:
1113 {
1114 struct lttng_consumer_channel *channel;
1115 uint64_t key = msg.u.rotate_channel.key;
1116
1117 DBG("Consumer rotate channel %" PRIu64, key);
1118
1119 channel = consumer_find_channel(key);
1120 if (!channel) {
1121 ERR("Channel %" PRIu64 " not found", key);
1122 ret_code = LTTCOMM_CONSUMERD_CHAN_NOT_FOUND;
1123 } else {
1124 /*
1125 * Sample the rotate position of all the streams in this channel.
1126 */
1127 ret = lttng_consumer_rotate_channel(channel, key,
1128 msg.u.rotate_channel.relayd_id,
1129 msg.u.rotate_channel.metadata,
1130 ctx);
1131 if (ret < 0) {
1132 ERR("Rotate channel failed");
1133 ret_code = LTTCOMM_CONSUMERD_ROTATION_FAIL;
1134 }
1135
1136 health_code_update();
1137 }
1138 ret = consumer_send_status_msg(sock, ret_code);
1139 if (ret < 0) {
1140 /* Somehow, the session daemon is not responding anymore. */
1141 goto end_nosignal;
1142 }
1143 if (channel) {
1144 /* Rotate the streams that are ready right now. */
1145 ret = lttng_consumer_rotate_ready_streams(
1146 channel, key, ctx);
1147 if (ret < 0) {
1148 ERR("Rotate ready streams failed");
1149 }
1150 }
1151
1152 break;
1153 }
1154 case LTTNG_CONSUMER_INIT:
1155 {
1156 ret_code = lttng_consumer_init_command(ctx,
1157 msg.u.init.sessiond_uuid);
1158 health_code_update();
1159 ret = consumer_send_status_msg(sock, ret_code);
1160 if (ret < 0) {
1161 /* Somehow, the session daemon is not responding anymore. */
1162 goto end_nosignal;
1163 }
1164 break;
1165 }
1166 case LTTNG_CONSUMER_CREATE_TRACE_CHUNK:
1167 {
1168 const struct lttng_credentials credentials = {
1169 .uid = msg.u.create_trace_chunk.credentials.value.uid,
1170 .gid = msg.u.create_trace_chunk.credentials.value.gid,
1171 };
1172 const bool is_local_trace =
1173 !msg.u.create_trace_chunk.relayd_id.is_set;
1174 const uint64_t relayd_id =
1175 msg.u.create_trace_chunk.relayd_id.value;
1176 const char *chunk_override_name =
1177 *msg.u.create_trace_chunk.override_name ?
1178 msg.u.create_trace_chunk.override_name :
1179 NULL;
1180 LTTNG_OPTIONAL(struct lttng_directory_handle) chunk_directory_handle =
1181 LTTNG_OPTIONAL_INIT;
1182
1183 /*
1184 * The session daemon will only provide a chunk directory file
1185 * descriptor for local traces.
1186 */
1187 if (is_local_trace) {
1188 int chunk_dirfd;
1189
1190 /* Acnowledge the reception of the command. */
1191 ret = consumer_send_status_msg(sock,
1192 LTTCOMM_CONSUMERD_SUCCESS);
1193 if (ret < 0) {
1194 /* Somehow, the session daemon is not responding anymore. */
1195 goto end_nosignal;
1196 }
1197
1198 ret = lttcomm_recv_fds_unix_sock(sock, &chunk_dirfd, 1);
1199 if (ret != sizeof(chunk_dirfd)) {
1200 ERR("Failed to receive trace chunk directory file descriptor");
1201 goto error_fatal;
1202 }
1203
1204 DBG("Received trace chunk directory fd (%d)",
1205 chunk_dirfd);
1206 ret = lttng_directory_handle_init_from_dirfd(
1207 &chunk_directory_handle.value,
1208 chunk_dirfd);
1209 if (ret) {
1210 ERR("Failed to initialize chunk directory handle from directory file descriptor");
1211 if (close(chunk_dirfd)) {
1212 PERROR("Failed to close chunk directory file descriptor");
1213 }
1214 goto error_fatal;
1215 }
1216 chunk_directory_handle.is_set = true;
1217 }
1218
1219 ret_code = lttng_consumer_create_trace_chunk(
1220 !is_local_trace ? &relayd_id : NULL,
1221 msg.u.create_trace_chunk.session_id,
1222 msg.u.create_trace_chunk.chunk_id,
1223 (time_t) msg.u.create_trace_chunk
1224 .creation_timestamp,
1225 chunk_override_name,
1226 msg.u.create_trace_chunk.credentials.is_set ?
1227 &credentials :
1228 NULL,
1229 chunk_directory_handle.is_set ?
1230 &chunk_directory_handle.value :
1231 NULL);
1232
1233 if (chunk_directory_handle.is_set) {
1234 lttng_directory_handle_fini(
1235 &chunk_directory_handle.value);
1236 }
1237 goto end_msg_sessiond;
1238 }
1239 case LTTNG_CONSUMER_CLOSE_TRACE_CHUNK:
1240 {
1241 enum lttng_trace_chunk_command_type close_command =
1242 msg.u.close_trace_chunk.close_command.value;
1243 const uint64_t relayd_id =
1244 msg.u.close_trace_chunk.relayd_id.value;
1245
1246 ret_code = lttng_consumer_close_trace_chunk(
1247 msg.u.close_trace_chunk.relayd_id.is_set ?
1248 &relayd_id :
1249 NULL,
1250 msg.u.close_trace_chunk.session_id,
1251 msg.u.close_trace_chunk.chunk_id,
1252 (time_t) msg.u.close_trace_chunk.close_timestamp,
1253 msg.u.close_trace_chunk.close_command.is_set ?
1254 &close_command :
1255 NULL);
1256 goto end_msg_sessiond;
1257 }
1258 case LTTNG_CONSUMER_TRACE_CHUNK_EXISTS:
1259 {
1260 const uint64_t relayd_id =
1261 msg.u.trace_chunk_exists.relayd_id.value;
1262
1263 ret_code = lttng_consumer_trace_chunk_exists(
1264 msg.u.trace_chunk_exists.relayd_id.is_set ?
1265 &relayd_id : NULL,
1266 msg.u.trace_chunk_exists.session_id,
1267 msg.u.trace_chunk_exists.chunk_id);
1268 goto end_msg_sessiond;
1269 }
1270 default:
1271 goto end_nosignal;
1272 }
1273
1274 end_nosignal:
1275 rcu_read_unlock();
1276
1277 /*
1278 * Return 1 to indicate success since the 0 value can be a socket
1279 * shutdown during the recv() or send() call.
1280 */
1281 health_code_update();
1282 return 1;
1283
1284 end_msg_sessiond:
1285 /*
1286 * The returned value here is not useful since either way we'll return 1 to
1287 * the caller because the session daemon socket management is done
1288 * elsewhere. Returning a negative code or 0 will shutdown the consumer.
1289 */
1290 ret = consumer_send_status_msg(sock, ret_code);
1291 if (ret < 0) {
1292 goto error_fatal;
1293 }
1294 rcu_read_unlock();
1295
1296 health_code_update();
1297
1298 return 1;
1299
1300 error_fatal:
1301 rcu_read_unlock();
1302 /* This will issue a consumer stop. */
1303 return -1;
1304 }
1305
1306 /*
1307 * Populate index values of a kernel stream. Values are set in big endian order.
1308 *
1309 * Return 0 on success or else a negative value.
1310 */
1311 static int get_index_values(struct ctf_packet_index *index, int infd)
1312 {
1313 int ret;
1314
1315 ret = kernctl_get_timestamp_begin(infd, &index->timestamp_begin);
1316 if (ret < 0) {
1317 PERROR("kernctl_get_timestamp_begin");
1318 goto error;
1319 }
1320 index->timestamp_begin = htobe64(index->timestamp_begin);
1321
1322 ret = kernctl_get_timestamp_end(infd, &index->timestamp_end);
1323 if (ret < 0) {
1324 PERROR("kernctl_get_timestamp_end");
1325 goto error;
1326 }
1327 index->timestamp_end = htobe64(index->timestamp_end);
1328
1329 ret = kernctl_get_events_discarded(infd, &index->events_discarded);
1330 if (ret < 0) {
1331 PERROR("kernctl_get_events_discarded");
1332 goto error;
1333 }
1334 index->events_discarded = htobe64(index->events_discarded);
1335
1336 ret = kernctl_get_content_size(infd, &index->content_size);
1337 if (ret < 0) {
1338 PERROR("kernctl_get_content_size");
1339 goto error;
1340 }
1341 index->content_size = htobe64(index->content_size);
1342
1343 ret = kernctl_get_packet_size(infd, &index->packet_size);
1344 if (ret < 0) {
1345 PERROR("kernctl_get_packet_size");
1346 goto error;
1347 }
1348 index->packet_size = htobe64(index->packet_size);
1349
1350 ret = kernctl_get_stream_id(infd, &index->stream_id);
1351 if (ret < 0) {
1352 PERROR("kernctl_get_stream_id");
1353 goto error;
1354 }
1355 index->stream_id = htobe64(index->stream_id);
1356
1357 ret = kernctl_get_instance_id(infd, &index->stream_instance_id);
1358 if (ret < 0) {
1359 if (ret == -ENOTTY) {
1360 /* Command not implemented by lttng-modules. */
1361 index->stream_instance_id = -1ULL;
1362 } else {
1363 PERROR("kernctl_get_instance_id");
1364 goto error;
1365 }
1366 }
1367 index->stream_instance_id = htobe64(index->stream_instance_id);
1368
1369 ret = kernctl_get_sequence_number(infd, &index->packet_seq_num);
1370 if (ret < 0) {
1371 if (ret == -ENOTTY) {
1372 /* Command not implemented by lttng-modules. */
1373 index->packet_seq_num = -1ULL;
1374 ret = 0;
1375 } else {
1376 PERROR("kernctl_get_sequence_number");
1377 goto error;
1378 }
1379 }
1380 index->packet_seq_num = htobe64(index->packet_seq_num);
1381
1382 error:
1383 return ret;
1384 }
1385 /*
1386 * Sync metadata meaning request them to the session daemon and snapshot to the
1387 * metadata thread can consumer them.
1388 *
1389 * Metadata stream lock MUST be acquired.
1390 *
1391 * Return 0 if new metadatda is available, EAGAIN if the metadata stream
1392 * is empty or a negative value on error.
1393 */
1394 int lttng_kconsumer_sync_metadata(struct lttng_consumer_stream *metadata)
1395 {
1396 int ret;
1397
1398 assert(metadata);
1399
1400 ret = kernctl_buffer_flush(metadata->wait_fd);
1401 if (ret < 0) {
1402 ERR("Failed to flush kernel stream");
1403 goto end;
1404 }
1405
1406 ret = kernctl_snapshot(metadata->wait_fd);
1407 if (ret < 0) {
1408 if (ret != -EAGAIN) {
1409 ERR("Sync metadata, taking kernel snapshot failed.");
1410 goto end;
1411 }
1412 DBG("Sync metadata, no new kernel metadata");
1413 /* No new metadata, exit. */
1414 ret = ENODATA;
1415 goto end;
1416 }
1417
1418 end:
1419 return ret;
1420 }
1421
1422 static
1423 int update_stream_stats(struct lttng_consumer_stream *stream)
1424 {
1425 int ret;
1426 uint64_t seq, discarded;
1427
1428 ret = kernctl_get_sequence_number(stream->wait_fd, &seq);
1429 if (ret < 0) {
1430 if (ret == -ENOTTY) {
1431 /* Command not implemented by lttng-modules. */
1432 seq = -1ULL;
1433 } else {
1434 PERROR("kernctl_get_sequence_number");
1435 goto end;
1436 }
1437 }
1438
1439 /*
1440 * Start the sequence when we extract the first packet in case we don't
1441 * start at 0 (for example if a consumer is not connected to the
1442 * session immediately after the beginning).
1443 */
1444 if (stream->last_sequence_number == -1ULL) {
1445 stream->last_sequence_number = seq;
1446 } else if (seq > stream->last_sequence_number) {
1447 stream->chan->lost_packets += seq -
1448 stream->last_sequence_number - 1;
1449 } else {
1450 /* seq <= last_sequence_number */
1451 ERR("Sequence number inconsistent : prev = %" PRIu64
1452 ", current = %" PRIu64,
1453 stream->last_sequence_number, seq);
1454 ret = -1;
1455 goto end;
1456 }
1457 stream->last_sequence_number = seq;
1458
1459 ret = kernctl_get_events_discarded(stream->wait_fd, &discarded);
1460 if (ret < 0) {
1461 PERROR("kernctl_get_events_discarded");
1462 goto end;
1463 }
1464 if (discarded < stream->last_discarded_events) {
1465 /*
1466 * Overflow has occurred. We assume only one wrap-around
1467 * has occurred.
1468 */
1469 stream->chan->discarded_events += (1ULL << (CAA_BITS_PER_LONG - 1)) -
1470 stream->last_discarded_events + discarded;
1471 } else {
1472 stream->chan->discarded_events += discarded -
1473 stream->last_discarded_events;
1474 }
1475 stream->last_discarded_events = discarded;
1476 ret = 0;
1477
1478 end:
1479 return ret;
1480 }
1481
1482 /*
1483 * Check if the local version of the metadata stream matches with the version
1484 * of the metadata stream in the kernel. If it was updated, set the reset flag
1485 * on the stream.
1486 */
1487 static
1488 int metadata_stream_check_version(int infd, struct lttng_consumer_stream *stream)
1489 {
1490 int ret;
1491 uint64_t cur_version;
1492
1493 ret = kernctl_get_metadata_version(infd, &cur_version);
1494 if (ret < 0) {
1495 if (ret == -ENOTTY) {
1496 /*
1497 * LTTng-modules does not implement this
1498 * command.
1499 */
1500 ret = 0;
1501 goto end;
1502 }
1503 ERR("Failed to get the metadata version");
1504 goto end;
1505 }
1506
1507 if (stream->metadata_version == cur_version) {
1508 ret = 0;
1509 goto end;
1510 }
1511
1512 DBG("New metadata version detected");
1513 stream->metadata_version = cur_version;
1514 stream->reset_metadata_flag = 1;
1515 ret = 0;
1516
1517 end:
1518 return ret;
1519 }
1520
1521 /*
1522 * Consume data on a file descriptor and write it on a trace file.
1523 * The stream and channel locks must be held by the caller.
1524 */
1525 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
1526 struct lttng_consumer_local_data *ctx)
1527 {
1528 unsigned long len, subbuf_size, padding;
1529 int err, write_index = 1, rotation_ret;
1530 ssize_t ret = 0;
1531 int infd = stream->wait_fd;
1532 struct ctf_packet_index index;
1533
1534 DBG("In read_subbuffer (infd : %d)", infd);
1535
1536 /*
1537 * If the stream was flagged to be ready for rotation before we extract the
1538 * next packet, rotate it now.
1539 */
1540 if (stream->rotate_ready) {
1541 DBG("Rotate stream before extracting data");
1542 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1543 if (rotation_ret < 0) {
1544 ERR("Stream rotation error");
1545 ret = -1;
1546 goto error;
1547 }
1548 }
1549
1550 /* Get the next subbuffer */
1551 err = kernctl_get_next_subbuf(infd);
1552 if (err != 0) {
1553 /*
1554 * This is a debug message even for single-threaded consumer,
1555 * because poll() have more relaxed criterions than get subbuf,
1556 * so get_subbuf may fail for short race windows where poll()
1557 * would issue wakeups.
1558 */
1559 DBG("Reserving sub buffer failed (everything is normal, "
1560 "it is due to concurrency)");
1561 ret = err;
1562 goto error;
1563 }
1564
1565 /* Get the full subbuffer size including padding */
1566 err = kernctl_get_padded_subbuf_size(infd, &len);
1567 if (err != 0) {
1568 PERROR("Getting sub-buffer len failed.");
1569 err = kernctl_put_subbuf(infd);
1570 if (err != 0) {
1571 if (err == -EFAULT) {
1572 PERROR("Error in unreserving sub buffer\n");
1573 } else if (err == -EIO) {
1574 /* Should never happen with newer LTTng versions */
1575 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1576 }
1577 ret = err;
1578 goto error;
1579 }
1580 ret = err;
1581 goto error;
1582 }
1583
1584 if (!stream->metadata_flag) {
1585 ret = get_index_values(&index, infd);
1586 if (ret < 0) {
1587 err = kernctl_put_subbuf(infd);
1588 if (err != 0) {
1589 if (err == -EFAULT) {
1590 PERROR("Error in unreserving sub buffer\n");
1591 } else if (err == -EIO) {
1592 /* Should never happen with newer LTTng versions */
1593 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1594 }
1595 ret = err;
1596 goto error;
1597 }
1598 goto error;
1599 }
1600 ret = update_stream_stats(stream);
1601 if (ret < 0) {
1602 err = kernctl_put_subbuf(infd);
1603 if (err != 0) {
1604 if (err == -EFAULT) {
1605 PERROR("Error in unreserving sub buffer\n");
1606 } else if (err == -EIO) {
1607 /* Should never happen with newer LTTng versions */
1608 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1609 }
1610 ret = err;
1611 goto error;
1612 }
1613 goto error;
1614 }
1615 } else {
1616 write_index = 0;
1617 ret = metadata_stream_check_version(infd, stream);
1618 if (ret < 0) {
1619 err = kernctl_put_subbuf(infd);
1620 if (err != 0) {
1621 if (err == -EFAULT) {
1622 PERROR("Error in unreserving sub buffer\n");
1623 } else if (err == -EIO) {
1624 /* Should never happen with newer LTTng versions */
1625 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1626 }
1627 ret = err;
1628 goto error;
1629 }
1630 goto error;
1631 }
1632 }
1633
1634 switch (stream->chan->output) {
1635 case CONSUMER_CHANNEL_SPLICE:
1636 /*
1637 * XXX: The lttng-modules splice "actor" does not handle copying
1638 * partial pages hence only using the subbuffer size without the
1639 * padding makes the splice fail.
1640 */
1641 subbuf_size = len;
1642 padding = 0;
1643
1644 /* splice the subbuffer to the tracefile */
1645 ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
1646 padding, &index);
1647 /*
1648 * XXX: Splice does not support network streaming so the return value
1649 * is simply checked against subbuf_size and not like the mmap() op.
1650 */
1651 if (ret != subbuf_size) {
1652 /*
1653 * display the error but continue processing to try
1654 * to release the subbuffer
1655 */
1656 ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
1657 ret, subbuf_size);
1658 write_index = 0;
1659 }
1660 break;
1661 case CONSUMER_CHANNEL_MMAP:
1662 /* Get subbuffer size without padding */
1663 err = kernctl_get_subbuf_size(infd, &subbuf_size);
1664 if (err != 0) {
1665 PERROR("Getting sub-buffer len failed.");
1666 err = kernctl_put_subbuf(infd);
1667 if (err != 0) {
1668 if (err == -EFAULT) {
1669 PERROR("Error in unreserving sub buffer\n");
1670 } else if (err == -EIO) {
1671 /* Should never happen with newer LTTng versions */
1672 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1673 }
1674 ret = err;
1675 goto error;
1676 }
1677 ret = err;
1678 goto error;
1679 }
1680
1681 /* Make sure the tracer is not gone mad on us! */
1682 assert(len >= subbuf_size);
1683
1684 padding = len - subbuf_size;
1685
1686 /* write the subbuffer to the tracefile */
1687 ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
1688 padding, &index);
1689 /*
1690 * The mmap operation should write subbuf_size amount of data when
1691 * network streaming or the full padding (len) size when we are _not_
1692 * streaming.
1693 */
1694 if ((ret != subbuf_size && stream->net_seq_idx != (uint64_t) -1ULL) ||
1695 (ret != len && stream->net_seq_idx == (uint64_t) -1ULL)) {
1696 /*
1697 * Display the error but continue processing to try to release the
1698 * subbuffer. This is a DBG statement since this is possible to
1699 * happen without being a critical error.
1700 */
1701 DBG("Error writing to tracefile "
1702 "(ret: %zd != len: %lu != subbuf_size: %lu)",
1703 ret, len, subbuf_size);
1704 write_index = 0;
1705 }
1706 break;
1707 default:
1708 ERR("Unknown output method");
1709 ret = -EPERM;
1710 }
1711
1712 err = kernctl_put_next_subbuf(infd);
1713 if (err != 0) {
1714 if (err == -EFAULT) {
1715 PERROR("Error in unreserving sub buffer\n");
1716 } else if (err == -EIO) {
1717 /* Should never happen with newer LTTng versions */
1718 PERROR("Reader has been pushed by the writer, last sub-buffer corrupted.");
1719 }
1720 ret = err;
1721 goto error;
1722 }
1723
1724 /* Write index if needed. */
1725 if (!write_index) {
1726 goto rotate;
1727 }
1728
1729 if (stream->chan->live_timer_interval && !stream->metadata_flag) {
1730 /*
1731 * In live, block until all the metadata is sent.
1732 */
1733 pthread_mutex_lock(&stream->metadata_timer_lock);
1734 assert(!stream->missed_metadata_flush);
1735 stream->waiting_on_metadata = true;
1736 pthread_mutex_unlock(&stream->metadata_timer_lock);
1737
1738 err = consumer_stream_sync_metadata(ctx, stream->session_id);
1739
1740 pthread_mutex_lock(&stream->metadata_timer_lock);
1741 stream->waiting_on_metadata = false;
1742 if (stream->missed_metadata_flush) {
1743 stream->missed_metadata_flush = false;
1744 pthread_mutex_unlock(&stream->metadata_timer_lock);
1745 (void) consumer_flush_kernel_index(stream);
1746 } else {
1747 pthread_mutex_unlock(&stream->metadata_timer_lock);
1748 }
1749 if (err < 0) {
1750 goto error;
1751 }
1752 }
1753
1754 err = consumer_stream_write_index(stream, &index);
1755 if (err < 0) {
1756 goto error;
1757 }
1758
1759 rotate:
1760 /*
1761 * After extracting the packet, we check if the stream is now ready to be
1762 * rotated and perform the action immediately.
1763 */
1764 rotation_ret = lttng_consumer_stream_is_rotate_ready(stream);
1765 if (rotation_ret == 1) {
1766 rotation_ret = lttng_consumer_rotate_stream(ctx, stream);
1767 if (rotation_ret < 0) {
1768 ERR("Stream rotation error");
1769 ret = -1;
1770 goto error;
1771 }
1772 } else if (rotation_ret < 0) {
1773 ERR("Checking if stream is ready to rotate");
1774 ret = -1;
1775 goto error;
1776 }
1777
1778 error:
1779 return ret;
1780 }
1781
1782 int lttng_kconsumer_on_recv_stream(struct lttng_consumer_stream *stream)
1783 {
1784 int ret;
1785
1786 assert(stream);
1787
1788 /*
1789 * Don't create anything if this is set for streaming or if there is
1790 * no current trace chunk on the parent channel.
1791 */
1792 if (stream->net_seq_idx == (uint64_t) -1ULL && stream->chan->monitor &&
1793 stream->chan->trace_chunk) {
1794 ret = consumer_stream_create_output_files(stream, true);
1795 if (ret) {
1796 goto error;
1797 }
1798 }
1799
1800 if (stream->output == LTTNG_EVENT_MMAP) {
1801 /* get the len of the mmap region */
1802 unsigned long mmap_len;
1803
1804 ret = kernctl_get_mmap_len(stream->wait_fd, &mmap_len);
1805 if (ret != 0) {
1806 PERROR("kernctl_get_mmap_len");
1807 goto error_close_fd;
1808 }
1809 stream->mmap_len = (size_t) mmap_len;
1810
1811 stream->mmap_base = mmap(NULL, stream->mmap_len, PROT_READ,
1812 MAP_PRIVATE, stream->wait_fd, 0);
1813 if (stream->mmap_base == MAP_FAILED) {
1814 PERROR("Error mmaping");
1815 ret = -1;
1816 goto error_close_fd;
1817 }
1818 }
1819
1820 /* we return 0 to let the library handle the FD internally */
1821 return 0;
1822
1823 error_close_fd:
1824 if (stream->out_fd >= 0) {
1825 int err;
1826
1827 err = close(stream->out_fd);
1828 assert(!err);
1829 stream->out_fd = -1;
1830 }
1831 error:
1832 return ret;
1833 }
1834
1835 /*
1836 * Check if data is still being extracted from the buffers for a specific
1837 * stream. Consumer data lock MUST be acquired before calling this function
1838 * and the stream lock.
1839 *
1840 * Return 1 if the traced data are still getting read else 0 meaning that the
1841 * data is available for trace viewer reading.
1842 */
1843 int lttng_kconsumer_data_pending(struct lttng_consumer_stream *stream)
1844 {
1845 int ret;
1846
1847 assert(stream);
1848
1849 if (stream->endpoint_status != CONSUMER_ENDPOINT_ACTIVE) {
1850 ret = 0;
1851 goto end;
1852 }
1853
1854 ret = kernctl_get_next_subbuf(stream->wait_fd);
1855 if (ret == 0) {
1856 /* There is still data so let's put back this subbuffer. */
1857 ret = kernctl_put_subbuf(stream->wait_fd);
1858 assert(ret == 0);
1859 ret = 1; /* Data is pending */
1860 goto end;
1861 }
1862
1863 /* Data is NOT pending and ready to be read. */
1864 ret = 0;
1865
1866 end:
1867 return ret;
1868 }
This page took 0.100794 seconds and 5 git commands to generate.