Fix: reintroduce lazy kernel modules load, fix empty syscall list
[lttng-tools.git] / src / bin / lttng-sessiond / kernel.c
1 /*
2 * Copyright (C) 2011 - David Goulet <david.goulet@polymtl.ca>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18 #define _LGPL_SOURCE
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 #include <sys/types.h>
26
27 #include <common/common.h>
28 #include <common/trace-chunk.h>
29 #include <common/kernel-ctl/kernel-ctl.h>
30 #include <common/kernel-ctl/kernel-ioctl.h>
31 #include <common/sessiond-comm/sessiond-comm.h>
32
33 #include "lttng-sessiond.h"
34 #include "lttng-syscall.h"
35 #include "consumer.h"
36 #include "kernel.h"
37 #include "kernel-consumer.h"
38 #include "kern-modules.h"
39 #include "utils.h"
40 #include "rotate.h"
41 #include "modprobe.h"
42
43 /*
44 * Key used to reference a channel between the sessiond and the consumer. This
45 * is only read and updated with the session_list lock held.
46 */
47 static uint64_t next_kernel_channel_key;
48
49 static const char *module_proc_lttng = "/proc/lttng";
50
51 static int kernel_tracer_fd = -1;
52
53 #include <lttng/userspace-probe.h>
54 #include <lttng/userspace-probe-internal.h>
55 /*
56 * Add context on a kernel channel.
57 *
58 * Assumes the ownership of ctx.
59 */
60 int kernel_add_channel_context(struct ltt_kernel_channel *chan,
61 struct ltt_kernel_context *ctx)
62 {
63 int ret;
64
65 assert(chan);
66 assert(ctx);
67
68 DBG("Adding context to channel %s", chan->channel->name);
69 ret = kernctl_add_context(chan->fd, &ctx->ctx);
70 if (ret < 0) {
71 switch (-ret) {
72 case ENOSYS:
73 /* Exists but not available for this kernel */
74 ret = LTTNG_ERR_KERN_CONTEXT_UNAVAILABLE;
75 goto error;
76 case EEXIST:
77 /* If EEXIST, we just ignore the error */
78 ret = 0;
79 goto end;
80 default:
81 PERROR("add context ioctl");
82 ret = LTTNG_ERR_KERN_CONTEXT_FAIL;
83 goto error;
84 }
85 }
86 ret = 0;
87
88 end:
89 cds_list_add_tail(&ctx->list, &chan->ctx_list);
90 ctx->in_list = true;
91 ctx = NULL;
92 error:
93 if (ctx) {
94 trace_kernel_destroy_context(ctx);
95 }
96 return ret;
97 }
98
99 /*
100 * Create a new kernel session, register it to the kernel tracer and add it to
101 * the session daemon session.
102 */
103 int kernel_create_session(struct ltt_session *session)
104 {
105 int ret;
106 struct ltt_kernel_session *lks;
107
108 assert(session);
109
110 /* Allocate data structure */
111 lks = trace_kernel_create_session();
112 if (lks == NULL) {
113 ret = -1;
114 goto error;
115 }
116
117 /* Kernel tracer session creation */
118 ret = kernctl_create_session(kernel_tracer_fd);
119 if (ret < 0) {
120 PERROR("ioctl kernel create session");
121 goto error;
122 }
123
124 lks->fd = ret;
125 /* Prevent fd duplication after execlp() */
126 ret = fcntl(lks->fd, F_SETFD, FD_CLOEXEC);
127 if (ret < 0) {
128 PERROR("fcntl session fd");
129 }
130
131 lks->id = session->id;
132 lks->consumer_fds_sent = 0;
133 session->kernel_session = lks;
134
135 DBG("Kernel session created (fd: %d)", lks->fd);
136
137 return 0;
138
139 error:
140 if (lks) {
141 trace_kernel_destroy_session(lks);
142 trace_kernel_free_session(lks);
143 }
144 return ret;
145 }
146
147 /*
148 * Create a kernel channel, register it to the kernel tracer and add it to the
149 * kernel session.
150 */
151 int kernel_create_channel(struct ltt_kernel_session *session,
152 struct lttng_channel *chan)
153 {
154 int ret;
155 struct ltt_kernel_channel *lkc;
156
157 assert(session);
158 assert(chan);
159
160 /* Allocate kernel channel */
161 lkc = trace_kernel_create_channel(chan);
162 if (lkc == NULL) {
163 goto error;
164 }
165
166 DBG3("Kernel create channel %s with attr: %d, %" PRIu64 ", %" PRIu64 ", %u, %u, %d, %d",
167 chan->name, lkc->channel->attr.overwrite,
168 lkc->channel->attr.subbuf_size, lkc->channel->attr.num_subbuf,
169 lkc->channel->attr.switch_timer_interval, lkc->channel->attr.read_timer_interval,
170 lkc->channel->attr.live_timer_interval, lkc->channel->attr.output);
171
172 /* Kernel tracer channel creation */
173 ret = kernctl_create_channel(session->fd, &lkc->channel->attr);
174 if (ret < 0) {
175 PERROR("ioctl kernel create channel");
176 goto error;
177 }
178
179 /* Setup the channel fd */
180 lkc->fd = ret;
181 /* Prevent fd duplication after execlp() */
182 ret = fcntl(lkc->fd, F_SETFD, FD_CLOEXEC);
183 if (ret < 0) {
184 PERROR("fcntl session fd");
185 }
186
187 /* Add channel to session */
188 cds_list_add(&lkc->list, &session->channel_list.head);
189 session->channel_count++;
190 lkc->session = session;
191 lkc->key = ++next_kernel_channel_key;
192
193 DBG("Kernel channel %s created (fd: %d, key: %" PRIu64 ")",
194 lkc->channel->name, lkc->fd, lkc->key);
195
196 return 0;
197
198 error:
199 if (lkc) {
200 free(lkc->channel);
201 free(lkc);
202 }
203 return -1;
204 }
205
206 /*
207 * Compute the offset of the instrumentation byte in the binary based on the
208 * function probe location using the ELF lookup method.
209 *
210 * Returns 0 on success and set the offset out parameter to the offset of the
211 * elf symbol
212 * Returns -1 on error
213 */
214 static
215 int extract_userspace_probe_offset_function_elf(
216 const struct lttng_userspace_probe_location *probe_location,
217 struct ltt_kernel_session *session, uint64_t *offset)
218 {
219 int fd;
220 int ret = 0;
221 const char *symbol = NULL;
222 const struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
223 enum lttng_userspace_probe_location_lookup_method_type lookup_method_type;
224
225 assert(lttng_userspace_probe_location_get_type(probe_location) ==
226 LTTNG_USERSPACE_PROBE_LOCATION_TYPE_FUNCTION);
227
228 lookup = lttng_userspace_probe_location_get_lookup_method(
229 probe_location);
230 if (!lookup) {
231 ret = -1;
232 goto end;
233 }
234
235 lookup_method_type =
236 lttng_userspace_probe_location_lookup_method_get_type(lookup);
237
238 assert(lookup_method_type ==
239 LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF);
240
241 symbol = lttng_userspace_probe_location_function_get_function_name(
242 probe_location);
243 if (!symbol) {
244 ret = -1;
245 goto end;
246 }
247
248 fd = lttng_userspace_probe_location_function_get_binary_fd(probe_location);
249 if (fd < 0) {
250 ret = -1;
251 goto end;
252 }
253
254 ret = run_as_extract_elf_symbol_offset(fd, symbol, session->uid,
255 session->gid, offset);
256 if (ret < 0) {
257 DBG("userspace probe offset calculation failed for "
258 "function %s", symbol);
259 goto end;
260 }
261
262 DBG("userspace probe elf offset for %s is 0x%jd", symbol, (intmax_t)(*offset));
263 end:
264 return ret;
265 }
266
267 /*
268 * Compute the offsets of the instrumentation bytes in the binary based on the
269 * tracepoint probe location using the SDT lookup method. This function
270 * allocates the offsets buffer, the caller must free it.
271 *
272 * Returns 0 on success and set the offset out parameter to the offsets of the
273 * SDT tracepoint.
274 * Returns -1 on error.
275 */
276 static
277 int extract_userspace_probe_offset_tracepoint_sdt(
278 const struct lttng_userspace_probe_location *probe_location,
279 struct ltt_kernel_session *session, uint64_t **offsets,
280 uint32_t *offsets_count)
281 {
282 enum lttng_userspace_probe_location_lookup_method_type lookup_method_type;
283 const struct lttng_userspace_probe_location_lookup_method *lookup = NULL;
284 const char *probe_name = NULL, *provider_name = NULL;
285 int ret = 0;
286 int fd, i;
287
288 assert(lttng_userspace_probe_location_get_type(probe_location) ==
289 LTTNG_USERSPACE_PROBE_LOCATION_TYPE_TRACEPOINT);
290
291 lookup = lttng_userspace_probe_location_get_lookup_method(probe_location);
292 if (!lookup) {
293 ret = -1;
294 goto end;
295 }
296
297 lookup_method_type =
298 lttng_userspace_probe_location_lookup_method_get_type(lookup);
299
300 assert(lookup_method_type ==
301 LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT);
302
303
304 probe_name = lttng_userspace_probe_location_tracepoint_get_probe_name(
305 probe_location);
306 if (!probe_name) {
307 ret = -1;
308 goto end;
309 }
310
311 provider_name = lttng_userspace_probe_location_tracepoint_get_provider_name(
312 probe_location);
313 if (!provider_name) {
314 ret = -1;
315 goto end;
316 }
317
318 fd = lttng_userspace_probe_location_tracepoint_get_binary_fd(probe_location);
319 if (fd < 0) {
320 ret = -1;
321 goto end;
322 }
323
324 ret = run_as_extract_sdt_probe_offsets(fd, provider_name, probe_name,
325 session->uid, session->gid, offsets, offsets_count);
326 if (ret < 0) {
327 DBG("userspace probe offset calculation failed for sdt "
328 "probe %s:%s", provider_name, probe_name);
329 goto end;
330 }
331
332 if (*offsets_count == 0) {
333 DBG("no userspace probe offset found");
334 goto end;
335 }
336
337 DBG("%u userspace probe SDT offsets found for %s:%s at:",
338 *offsets_count, provider_name, probe_name);
339 for (i = 0; i < *offsets_count; i++) {
340 DBG("\t0x%jd", (intmax_t)((*offsets)[i]));
341 }
342 end:
343 return ret;
344 }
345
346 /*
347 * Extract the offsets of the instrumentation point for the different lookup
348 * methods.
349 */
350 static
351 int userspace_probe_add_callsites(struct lttng_event *ev,
352 struct ltt_kernel_session *session, int fd)
353 {
354 const struct lttng_userspace_probe_location_lookup_method *lookup_method = NULL;
355 enum lttng_userspace_probe_location_lookup_method_type type;
356 const struct lttng_userspace_probe_location *location = NULL;
357 int ret;
358
359 assert(ev);
360 assert(ev->type == LTTNG_EVENT_USERSPACE_PROBE);
361
362 location = lttng_event_get_userspace_probe_location(ev);
363 if (!location) {
364 ret = -1;
365 goto end;
366 }
367 lookup_method =
368 lttng_userspace_probe_location_get_lookup_method(location);
369 if (!lookup_method) {
370 ret = -1;
371 goto end;
372 }
373
374 type = lttng_userspace_probe_location_lookup_method_get_type(lookup_method);
375 switch (type) {
376 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_FUNCTION_ELF:
377 {
378 struct lttng_kernel_event_callsite callsite;
379 uint64_t offset;
380
381 ret = extract_userspace_probe_offset_function_elf(location, session, &offset);
382 if (ret) {
383 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
384 goto end;
385 }
386
387 callsite.u.uprobe.offset = offset;
388 ret = kernctl_add_callsite(fd, &callsite);
389 if (ret) {
390 WARN("Adding callsite to userspace probe "
391 "event %s failed.", ev->name);
392 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
393 goto end;
394 }
395 break;
396 }
397 case LTTNG_USERSPACE_PROBE_LOCATION_LOOKUP_METHOD_TYPE_TRACEPOINT_SDT:
398 {
399 int i;
400 uint64_t *offsets = NULL;
401 uint32_t offsets_count;
402 struct lttng_kernel_event_callsite callsite;
403
404 /*
405 * This call allocates the offsets buffer. This buffer must be freed
406 * by the caller
407 */
408 ret = extract_userspace_probe_offset_tracepoint_sdt(location, session,
409 &offsets, &offsets_count);
410 if (ret) {
411 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
412 goto end;
413 }
414 for (i = 0; i < offsets_count; i++) {
415 callsite.u.uprobe.offset = offsets[i];
416 ret = kernctl_add_callsite(fd, &callsite);
417 if (ret) {
418 WARN("Adding callsite to userspace probe "
419 "event %s failed.", ev->name);
420 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
421 free(offsets);
422 goto end;
423 }
424 }
425 free(offsets);
426 break;
427 }
428 default:
429 ret = LTTNG_ERR_PROBE_LOCATION_INVAL;
430 goto end;
431 }
432 end:
433 return ret;
434 }
435
436 /*
437 * Create a kernel event, enable it to the kernel tracer and add it to the
438 * channel event list of the kernel session.
439 * We own filter_expression and filter.
440 */
441 int kernel_create_event(struct lttng_event *ev,
442 struct ltt_kernel_channel *channel,
443 char *filter_expression,
444 struct lttng_filter_bytecode *filter)
445 {
446 int err, fd;
447 enum lttng_error_code ret;
448 struct ltt_kernel_event *event;
449
450 assert(ev);
451 assert(channel);
452
453 /* We pass ownership of filter_expression and filter */
454 ret = trace_kernel_create_event(ev, filter_expression,
455 filter, &event);
456 if (ret != LTTNG_OK) {
457 goto error;
458 }
459
460 fd = kernctl_create_event(channel->fd, event->event);
461 if (fd < 0) {
462 switch (-fd) {
463 case EEXIST:
464 ret = LTTNG_ERR_KERN_EVENT_EXIST;
465 break;
466 case ENOSYS:
467 WARN("Event type not implemented");
468 ret = LTTNG_ERR_KERN_EVENT_ENOSYS;
469 break;
470 case ENOENT:
471 WARN("Event %s not found!", ev->name);
472 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
473 break;
474 default:
475 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
476 PERROR("create event ioctl");
477 }
478 goto free_event;
479 }
480
481 event->type = ev->type;
482 event->fd = fd;
483 /* Prevent fd duplication after execlp() */
484 err = fcntl(event->fd, F_SETFD, FD_CLOEXEC);
485 if (err < 0) {
486 PERROR("fcntl session fd");
487 }
488
489 if (filter) {
490 err = kernctl_filter(event->fd, filter);
491 if (err < 0) {
492 switch (-err) {
493 case ENOMEM:
494 ret = LTTNG_ERR_FILTER_NOMEM;
495 break;
496 default:
497 ret = LTTNG_ERR_FILTER_INVAL;
498 break;
499 }
500 goto filter_error;
501 }
502 }
503
504 if (ev->type == LTTNG_EVENT_USERSPACE_PROBE) {
505 ret = userspace_probe_add_callsites(ev, channel->session, event->fd);
506 if (ret) {
507 goto add_callsite_error;
508 }
509 }
510
511 err = kernctl_enable(event->fd);
512 if (err < 0) {
513 switch (-err) {
514 case EEXIST:
515 ret = LTTNG_ERR_KERN_EVENT_EXIST;
516 break;
517 default:
518 PERROR("enable kernel event");
519 ret = LTTNG_ERR_KERN_ENABLE_FAIL;
520 break;
521 }
522 goto enable_error;
523 }
524
525 /* Add event to event list */
526 cds_list_add(&event->list, &channel->events_list.head);
527 channel->event_count++;
528
529 DBG("Event %s created (fd: %d)", ev->name, event->fd);
530
531 return 0;
532
533 add_callsite_error:
534 enable_error:
535 filter_error:
536 {
537 int closeret;
538
539 closeret = close(event->fd);
540 if (closeret) {
541 PERROR("close event fd");
542 }
543 }
544 free_event:
545 free(event);
546 error:
547 return ret;
548 }
549
550 /*
551 * Disable a kernel channel.
552 */
553 int kernel_disable_channel(struct ltt_kernel_channel *chan)
554 {
555 int ret;
556
557 assert(chan);
558
559 ret = kernctl_disable(chan->fd);
560 if (ret < 0) {
561 PERROR("disable chan ioctl");
562 goto error;
563 }
564
565 chan->enabled = 0;
566 DBG("Kernel channel %s disabled (fd: %d, key: %" PRIu64 ")",
567 chan->channel->name, chan->fd, chan->key);
568
569 return 0;
570
571 error:
572 return ret;
573 }
574
575 /*
576 * Enable a kernel channel.
577 */
578 int kernel_enable_channel(struct ltt_kernel_channel *chan)
579 {
580 int ret;
581
582 assert(chan);
583
584 ret = kernctl_enable(chan->fd);
585 if (ret < 0 && ret != -EEXIST) {
586 PERROR("Enable kernel chan");
587 goto error;
588 }
589
590 chan->enabled = 1;
591 DBG("Kernel channel %s enabled (fd: %d, key: %" PRIu64 ")",
592 chan->channel->name, chan->fd, chan->key);
593
594 return 0;
595
596 error:
597 return ret;
598 }
599
600 /*
601 * Enable a kernel event.
602 */
603 int kernel_enable_event(struct ltt_kernel_event *event)
604 {
605 int ret;
606
607 assert(event);
608
609 ret = kernctl_enable(event->fd);
610 if (ret < 0) {
611 switch (-ret) {
612 case EEXIST:
613 ret = LTTNG_ERR_KERN_EVENT_EXIST;
614 break;
615 default:
616 PERROR("enable kernel event");
617 break;
618 }
619 goto error;
620 }
621
622 event->enabled = 1;
623 DBG("Kernel event %s enabled (fd: %d)", event->event->name, event->fd);
624
625 return 0;
626
627 error:
628 return ret;
629 }
630
631 /*
632 * Disable a kernel event.
633 */
634 int kernel_disable_event(struct ltt_kernel_event *event)
635 {
636 int ret;
637
638 assert(event);
639
640 ret = kernctl_disable(event->fd);
641 if (ret < 0) {
642 switch (-ret) {
643 case EEXIST:
644 ret = LTTNG_ERR_KERN_EVENT_EXIST;
645 break;
646 default:
647 PERROR("disable kernel event");
648 break;
649 }
650 goto error;
651 }
652
653 event->enabled = 0;
654 DBG("Kernel event %s disabled (fd: %d)", event->event->name, event->fd);
655
656 return 0;
657
658 error:
659 return ret;
660 }
661
662
663 int kernel_track_pid(struct ltt_kernel_session *session, int pid)
664 {
665 int ret;
666
667 DBG("Kernel track PID %d for session id %" PRIu64 ".",
668 pid, session->id);
669 ret = kernctl_track_pid(session->fd, pid);
670 if (!ret) {
671 return LTTNG_OK;
672 }
673 switch (-ret) {
674 case EINVAL:
675 return LTTNG_ERR_INVALID;
676 case ENOMEM:
677 return LTTNG_ERR_NOMEM;
678 case EEXIST:
679 return LTTNG_ERR_PID_TRACKED;
680 default:
681 return LTTNG_ERR_UNK;
682 }
683 }
684
685 int kernel_untrack_pid(struct ltt_kernel_session *session, int pid)
686 {
687 int ret;
688
689 DBG("Kernel untrack PID %d for session id %" PRIu64 ".",
690 pid, session->id);
691 ret = kernctl_untrack_pid(session->fd, pid);
692 if (!ret) {
693 return LTTNG_OK;
694 }
695 switch (-ret) {
696 case EINVAL:
697 return LTTNG_ERR_INVALID;
698 case ENOMEM:
699 return LTTNG_ERR_NOMEM;
700 case ENOENT:
701 return LTTNG_ERR_PID_NOT_TRACKED;
702 default:
703 return LTTNG_ERR_UNK;
704 }
705 }
706
707 ssize_t kernel_list_tracker_pids(struct ltt_kernel_session *session,
708 int **_pids)
709 {
710 int fd, ret;
711 int pid;
712 ssize_t nbmem, count = 0;
713 FILE *fp;
714 int *pids;
715
716 fd = kernctl_list_tracker_pids(session->fd);
717 if (fd < 0) {
718 PERROR("kernel tracker pids list");
719 goto error;
720 }
721
722 fp = fdopen(fd, "r");
723 if (fp == NULL) {
724 PERROR("kernel tracker pids list fdopen");
725 goto error_fp;
726 }
727
728 nbmem = KERNEL_TRACKER_PIDS_INIT_LIST_SIZE;
729 pids = zmalloc(sizeof(*pids) * nbmem);
730 if (pids == NULL) {
731 PERROR("alloc list pids");
732 count = -ENOMEM;
733 goto end;
734 }
735
736 while (fscanf(fp, "process { pid = %u; };\n", &pid) == 1) {
737 if (count >= nbmem) {
738 int *new_pids;
739 size_t new_nbmem;
740
741 new_nbmem = nbmem << 1;
742 DBG("Reallocating pids list from %zu to %zu entries",
743 nbmem, new_nbmem);
744 new_pids = realloc(pids, new_nbmem * sizeof(*new_pids));
745 if (new_pids == NULL) {
746 PERROR("realloc list events");
747 free(pids);
748 count = -ENOMEM;
749 goto end;
750 }
751 /* Zero the new memory */
752 memset(new_pids + nbmem, 0,
753 (new_nbmem - nbmem) * sizeof(*new_pids));
754 nbmem = new_nbmem;
755 pids = new_pids;
756 }
757 pids[count++] = pid;
758 }
759
760 *_pids = pids;
761 DBG("Kernel list tracker pids done (%zd pids)", count);
762 end:
763 ret = fclose(fp); /* closes both fp and fd */
764 if (ret) {
765 PERROR("fclose");
766 }
767 return count;
768
769 error_fp:
770 ret = close(fd);
771 if (ret) {
772 PERROR("close");
773 }
774 error:
775 return -1;
776 }
777
778 /*
779 * Create kernel metadata, open from the kernel tracer and add it to the
780 * kernel session.
781 */
782 int kernel_open_metadata(struct ltt_kernel_session *session)
783 {
784 int ret;
785 struct ltt_kernel_metadata *lkm = NULL;
786
787 assert(session);
788
789 /* Allocate kernel metadata */
790 lkm = trace_kernel_create_metadata();
791 if (lkm == NULL) {
792 goto error;
793 }
794
795 /* Kernel tracer metadata creation */
796 ret = kernctl_open_metadata(session->fd, &lkm->conf->attr);
797 if (ret < 0) {
798 goto error_open;
799 }
800
801 lkm->fd = ret;
802 lkm->key = ++next_kernel_channel_key;
803 /* Prevent fd duplication after execlp() */
804 ret = fcntl(lkm->fd, F_SETFD, FD_CLOEXEC);
805 if (ret < 0) {
806 PERROR("fcntl session fd");
807 }
808
809 session->metadata = lkm;
810
811 DBG("Kernel metadata opened (fd: %d)", lkm->fd);
812
813 return 0;
814
815 error_open:
816 trace_kernel_destroy_metadata(lkm);
817 error:
818 return -1;
819 }
820
821 /*
822 * Start tracing session.
823 */
824 int kernel_start_session(struct ltt_kernel_session *session)
825 {
826 int ret;
827
828 assert(session);
829
830 ret = kernctl_start_session(session->fd);
831 if (ret < 0) {
832 PERROR("ioctl start session");
833 goto error;
834 }
835
836 DBG("Kernel session started");
837
838 return 0;
839
840 error:
841 return ret;
842 }
843
844 /*
845 * Make a kernel wait to make sure in-flight probe have completed.
846 */
847 void kernel_wait_quiescent(void)
848 {
849 int ret;
850 int fd = kernel_tracer_fd;
851
852 DBG("Kernel quiescent wait on %d", fd);
853
854 ret = kernctl_wait_quiescent(fd);
855 if (ret < 0) {
856 PERROR("wait quiescent ioctl");
857 ERR("Kernel quiescent wait failed");
858 }
859 }
860
861 /*
862 * Force flush buffer of metadata.
863 */
864 int kernel_metadata_flush_buffer(int fd)
865 {
866 int ret;
867
868 DBG("Kernel flushing metadata buffer on fd %d", fd);
869
870 ret = kernctl_buffer_flush(fd);
871 if (ret < 0) {
872 ERR("Fail to flush metadata buffers %d (ret: %d)", fd, ret);
873 }
874
875 return 0;
876 }
877
878 /*
879 * Force flush buffer for channel.
880 */
881 int kernel_flush_buffer(struct ltt_kernel_channel *channel)
882 {
883 int ret;
884 struct ltt_kernel_stream *stream;
885
886 assert(channel);
887
888 DBG("Flush buffer for channel %s", channel->channel->name);
889
890 cds_list_for_each_entry(stream, &channel->stream_list.head, list) {
891 DBG("Flushing channel stream %d", stream->fd);
892 ret = kernctl_buffer_flush(stream->fd);
893 if (ret < 0) {
894 PERROR("ioctl");
895 ERR("Fail to flush buffer for stream %d (ret: %d)",
896 stream->fd, ret);
897 }
898 }
899
900 return 0;
901 }
902
903 /*
904 * Stop tracing session.
905 */
906 int kernel_stop_session(struct ltt_kernel_session *session)
907 {
908 int ret;
909
910 assert(session);
911
912 ret = kernctl_stop_session(session->fd);
913 if (ret < 0) {
914 goto error;
915 }
916
917 DBG("Kernel session stopped");
918
919 return 0;
920
921 error:
922 return ret;
923 }
924
925 /*
926 * Open stream of channel, register it to the kernel tracer and add it
927 * to the stream list of the channel.
928 *
929 * Note: given that the streams may appear in random order wrt CPU
930 * number (e.g. cpu hotplug), the index value of the stream number in
931 * the stream name is not necessarily linked to the CPU number.
932 *
933 * Return the number of created stream. Else, a negative value.
934 */
935 int kernel_open_channel_stream(struct ltt_kernel_channel *channel)
936 {
937 int ret;
938 struct ltt_kernel_stream *lks;
939
940 assert(channel);
941
942 while ((ret = kernctl_create_stream(channel->fd)) >= 0) {
943 lks = trace_kernel_create_stream(channel->channel->name,
944 channel->stream_count);
945 if (lks == NULL) {
946 ret = close(ret);
947 if (ret) {
948 PERROR("close");
949 }
950 goto error;
951 }
952
953 lks->fd = ret;
954 /* Prevent fd duplication after execlp() */
955 ret = fcntl(lks->fd, F_SETFD, FD_CLOEXEC);
956 if (ret < 0) {
957 PERROR("fcntl session fd");
958 }
959
960 lks->tracefile_size = channel->channel->attr.tracefile_size;
961 lks->tracefile_count = channel->channel->attr.tracefile_count;
962
963 /* Add stream to channel stream list */
964 cds_list_add(&lks->list, &channel->stream_list.head);
965 channel->stream_count++;
966
967 DBG("Kernel stream %s created (fd: %d, state: %d)", lks->name, lks->fd,
968 lks->state);
969 }
970
971 return channel->stream_count;
972
973 error:
974 return -1;
975 }
976
977 /*
978 * Open the metadata stream and set it to the kernel session.
979 */
980 int kernel_open_metadata_stream(struct ltt_kernel_session *session)
981 {
982 int ret;
983
984 assert(session);
985
986 ret = kernctl_create_stream(session->metadata->fd);
987 if (ret < 0) {
988 PERROR("kernel create metadata stream");
989 goto error;
990 }
991
992 DBG("Kernel metadata stream created (fd: %d)", ret);
993 session->metadata_stream_fd = ret;
994 /* Prevent fd duplication after execlp() */
995 ret = fcntl(session->metadata_stream_fd, F_SETFD, FD_CLOEXEC);
996 if (ret < 0) {
997 PERROR("fcntl session fd");
998 }
999
1000 return 0;
1001
1002 error:
1003 return -1;
1004 }
1005
1006 /*
1007 * Get the event list from the kernel tracer and return the number of elements.
1008 */
1009 ssize_t kernel_list_events(struct lttng_event **events)
1010 {
1011 int fd, ret;
1012 char *event;
1013 size_t nbmem, count = 0;
1014 FILE *fp;
1015 struct lttng_event *elist;
1016
1017 assert(events);
1018
1019 fd = kernctl_tracepoint_list(kernel_tracer_fd);
1020 if (fd < 0) {
1021 PERROR("kernel tracepoint list");
1022 goto error;
1023 }
1024
1025 fp = fdopen(fd, "r");
1026 if (fp == NULL) {
1027 PERROR("kernel tracepoint list fdopen");
1028 goto error_fp;
1029 }
1030
1031 /*
1032 * Init memory size counter
1033 * See kernel-ctl.h for explanation of this value
1034 */
1035 nbmem = KERNEL_EVENT_INIT_LIST_SIZE;
1036 elist = zmalloc(sizeof(struct lttng_event) * nbmem);
1037 if (elist == NULL) {
1038 PERROR("alloc list events");
1039 count = -ENOMEM;
1040 goto end;
1041 }
1042
1043 while (fscanf(fp, "event { name = %m[^;]; };\n", &event) == 1) {
1044 if (count >= nbmem) {
1045 struct lttng_event *new_elist;
1046 size_t new_nbmem;
1047
1048 new_nbmem = nbmem << 1;
1049 DBG("Reallocating event list from %zu to %zu bytes",
1050 nbmem, new_nbmem);
1051 new_elist = realloc(elist, new_nbmem * sizeof(struct lttng_event));
1052 if (new_elist == NULL) {
1053 PERROR("realloc list events");
1054 free(event);
1055 free(elist);
1056 count = -ENOMEM;
1057 goto end;
1058 }
1059 /* Zero the new memory */
1060 memset(new_elist + nbmem, 0,
1061 (new_nbmem - nbmem) * sizeof(struct lttng_event));
1062 nbmem = new_nbmem;
1063 elist = new_elist;
1064 }
1065 strncpy(elist[count].name, event, LTTNG_SYMBOL_NAME_LEN);
1066 elist[count].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0';
1067 elist[count].enabled = -1;
1068 count++;
1069 free(event);
1070 }
1071
1072 *events = elist;
1073 DBG("Kernel list events done (%zu events)", count);
1074 end:
1075 ret = fclose(fp); /* closes both fp and fd */
1076 if (ret) {
1077 PERROR("fclose");
1078 }
1079 return count;
1080
1081 error_fp:
1082 ret = close(fd);
1083 if (ret) {
1084 PERROR("close");
1085 }
1086 error:
1087 return -1;
1088 }
1089
1090 /*
1091 * Get kernel version and validate it.
1092 */
1093 int kernel_validate_version(struct lttng_kernel_tracer_version *version,
1094 struct lttng_kernel_tracer_abi_version *abi_version)
1095 {
1096 int ret;
1097
1098 ret = kernctl_tracer_version(kernel_tracer_fd, version);
1099 if (ret < 0) {
1100 ERR("Failed to retrieve the lttng-modules version");
1101 goto error;
1102 }
1103
1104 /* Validate version */
1105 if (version->major != VERSION_MAJOR) {
1106 ERR("Kernel tracer major version (%d) is not compatible with lttng-tools major version (%d)",
1107 version->major, VERSION_MAJOR);
1108 goto error_version;
1109 }
1110 ret = kernctl_tracer_abi_version(kernel_tracer_fd, abi_version);
1111 if (ret < 0) {
1112 ERR("Failed to retrieve lttng-modules ABI version");
1113 goto error;
1114 }
1115 if (abi_version->major != LTTNG_MODULES_ABI_MAJOR_VERSION) {
1116 ERR("Kernel tracer ABI version (%d.%d) does not match the expected ABI major version (%d.*)",
1117 abi_version->major, abi_version->minor,
1118 LTTNG_MODULES_ABI_MAJOR_VERSION);
1119 goto error;
1120 }
1121 DBG2("Kernel tracer version validated (%d.%d, ABI %d.%d)",
1122 version->major, version->minor,
1123 abi_version->major, abi_version->minor);
1124 return 0;
1125
1126 error_version:
1127 ret = -1;
1128
1129 error:
1130 ERR("Kernel tracer version check failed; kernel tracing will not be available");
1131 return ret;
1132 }
1133
1134 /*
1135 * Kernel work-arounds called at the start of sessiond main().
1136 */
1137 int init_kernel_workarounds(void)
1138 {
1139 int ret;
1140 FILE *fp;
1141
1142 /*
1143 * boot_id needs to be read once before being used concurrently
1144 * to deal with a Linux kernel race. A fix is proposed for
1145 * upstream, but the work-around is needed for older kernels.
1146 */
1147 fp = fopen("/proc/sys/kernel/random/boot_id", "r");
1148 if (!fp) {
1149 goto end_boot_id;
1150 }
1151 while (!feof(fp)) {
1152 char buf[37] = "";
1153
1154 ret = fread(buf, 1, sizeof(buf), fp);
1155 if (ret < 0) {
1156 /* Ignore error, we don't really care */
1157 }
1158 }
1159 ret = fclose(fp);
1160 if (ret) {
1161 PERROR("fclose");
1162 }
1163 end_boot_id:
1164 return 0;
1165 }
1166
1167 /*
1168 * Teardown of a kernel session, keeping data required by destroy notifiers.
1169 */
1170 void kernel_destroy_session(struct ltt_kernel_session *ksess)
1171 {
1172 struct lttng_trace_chunk *trace_chunk;
1173
1174 if (ksess == NULL) {
1175 DBG3("No kernel session when tearing down session");
1176 return;
1177 }
1178
1179 DBG("Tearing down kernel session");
1180 trace_chunk = ksess->current_trace_chunk;
1181
1182 /*
1183 * Destroy channels on the consumer if at least one FD has been sent and we
1184 * are in no output mode because the streams are in *no* monitor mode so we
1185 * have to send a command to clean them up or else they leaked.
1186 */
1187 if (!ksess->output_traces && ksess->consumer_fds_sent) {
1188 int ret;
1189 struct consumer_socket *socket;
1190 struct lttng_ht_iter iter;
1191
1192 /* For each consumer socket. */
1193 rcu_read_lock();
1194 cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter,
1195 socket, node.node) {
1196 struct ltt_kernel_channel *chan;
1197
1198 /* For each channel, ask the consumer to destroy it. */
1199 cds_list_for_each_entry(chan, &ksess->channel_list.head, list) {
1200 ret = kernel_consumer_destroy_channel(socket, chan);
1201 if (ret < 0) {
1202 /* Consumer is probably dead. Use next socket. */
1203 continue;
1204 }
1205 }
1206 }
1207 rcu_read_unlock();
1208 }
1209
1210 /* Close any relayd session */
1211 consumer_output_send_destroy_relayd(ksess->consumer);
1212
1213 trace_kernel_destroy_session(ksess);
1214 lttng_trace_chunk_put(trace_chunk);
1215 }
1216
1217 /* Teardown of data required by destroy notifiers. */
1218 void kernel_free_session(struct ltt_kernel_session *ksess)
1219 {
1220 if (ksess == NULL) {
1221 return;
1222 }
1223 trace_kernel_free_session(ksess);
1224 }
1225
1226 /*
1227 * Destroy a kernel channel object. It does not do anything on the tracer side.
1228 */
1229 void kernel_destroy_channel(struct ltt_kernel_channel *kchan)
1230 {
1231 struct ltt_kernel_session *ksess = NULL;
1232
1233 assert(kchan);
1234 assert(kchan->channel);
1235
1236 DBG3("Kernel destroy channel %s", kchan->channel->name);
1237
1238 /* Update channel count of associated session. */
1239 if (kchan->session) {
1240 /* Keep pointer reference so we can update it after the destroy. */
1241 ksess = kchan->session;
1242 }
1243
1244 trace_kernel_destroy_channel(kchan);
1245
1246 /*
1247 * At this point the kernel channel is not visible anymore. This is safe
1248 * since in order to work on a visible kernel session, the tracing session
1249 * lock (ltt_session.lock) MUST be acquired.
1250 */
1251 if (ksess) {
1252 ksess->channel_count--;
1253 }
1254 }
1255
1256 /*
1257 * Take a snapshot for a given kernel session.
1258 *
1259 * Return LTTNG_OK on success or else return a LTTNG_ERR code.
1260 */
1261 enum lttng_error_code kernel_snapshot_record(
1262 struct ltt_kernel_session *ksess,
1263 const struct consumer_output *output, int wait,
1264 uint64_t nb_packets_per_stream)
1265 {
1266 int err, ret, saved_metadata_fd;
1267 enum lttng_error_code status = LTTNG_OK;
1268 struct consumer_socket *socket;
1269 struct lttng_ht_iter iter;
1270 struct ltt_kernel_metadata *saved_metadata;
1271 char *trace_path = NULL;
1272
1273 assert(ksess);
1274 assert(ksess->consumer);
1275 assert(output);
1276
1277 DBG("Kernel snapshot record started");
1278
1279 /* Save current metadata since the following calls will change it. */
1280 saved_metadata = ksess->metadata;
1281 saved_metadata_fd = ksess->metadata_stream_fd;
1282
1283 rcu_read_lock();
1284
1285 ret = kernel_open_metadata(ksess);
1286 if (ret < 0) {
1287 status = LTTNG_ERR_KERN_META_FAIL;
1288 goto error;
1289 }
1290
1291 ret = kernel_open_metadata_stream(ksess);
1292 if (ret < 0) {
1293 status = LTTNG_ERR_KERN_META_FAIL;
1294 goto error_open_stream;
1295 }
1296
1297 trace_path = setup_channel_trace_path(ksess->consumer,
1298 DEFAULT_KERNEL_TRACE_DIR);
1299 if (!trace_path) {
1300 status = LTTNG_ERR_INVALID;
1301 goto error;
1302 }
1303 /* Send metadata to consumer and snapshot everything. */
1304 cds_lfht_for_each_entry(output->socks->ht, &iter.iter,
1305 socket, node.node) {
1306 struct ltt_kernel_channel *chan;
1307
1308 pthread_mutex_lock(socket->lock);
1309 /* This stream must not be monitored by the consumer. */
1310 ret = kernel_consumer_add_metadata(socket, ksess, 0);
1311 pthread_mutex_unlock(socket->lock);
1312 if (ret < 0) {
1313 status = LTTNG_ERR_KERN_META_FAIL;
1314 goto error_consumer;
1315 }
1316
1317 /* For each channel, ask the consumer to snapshot it. */
1318 cds_list_for_each_entry(chan, &ksess->channel_list.head, list) {
1319 status = consumer_snapshot_channel(socket, chan->key, output, 0,
1320 ksess->uid, ksess->gid,
1321 trace_path, wait,
1322 nb_packets_per_stream);
1323 if (status != LTTNG_OK) {
1324 (void) kernel_consumer_destroy_metadata(socket,
1325 ksess->metadata);
1326 goto error_consumer;
1327 }
1328 }
1329
1330 /* Snapshot metadata, */
1331 status = consumer_snapshot_channel(socket, ksess->metadata->key, output,
1332 1, ksess->uid, ksess->gid, trace_path, wait, 0);
1333 if (status != LTTNG_OK) {
1334 goto error_consumer;
1335 }
1336
1337 /*
1338 * The metadata snapshot is done, ask the consumer to destroy it since
1339 * it's not monitored on the consumer side.
1340 */
1341 (void) kernel_consumer_destroy_metadata(socket, ksess->metadata);
1342 }
1343
1344 error_consumer:
1345 /* Close newly opened metadata stream. It's now on the consumer side. */
1346 err = close(ksess->metadata_stream_fd);
1347 if (err < 0) {
1348 PERROR("close snapshot kernel");
1349 }
1350
1351 error_open_stream:
1352 trace_kernel_destroy_metadata(ksess->metadata);
1353 error:
1354 /* Restore metadata state.*/
1355 ksess->metadata = saved_metadata;
1356 ksess->metadata_stream_fd = saved_metadata_fd;
1357 rcu_read_unlock();
1358 free(trace_path);
1359 return status;
1360 }
1361
1362 /*
1363 * Get the syscall mask array from the kernel tracer.
1364 *
1365 * Return 0 on success else a negative value. In both case, syscall_mask should
1366 * be freed.
1367 */
1368 int kernel_syscall_mask(int chan_fd, char **syscall_mask, uint32_t *nr_bits)
1369 {
1370 assert(syscall_mask);
1371 assert(nr_bits);
1372
1373 return kernctl_syscall_mask(chan_fd, syscall_mask, nr_bits);
1374 }
1375
1376 /*
1377 * Check for the support of the RING_BUFFER_SNAPSHOT_SAMPLE_POSITIONS via abi
1378 * version number.
1379 *
1380 * Return 1 on success, 0 when feature is not supported, negative value in case
1381 * of errors.
1382 */
1383 int kernel_supports_ring_buffer_snapshot_sample_positions(void)
1384 {
1385 int ret = 0; // Not supported by default
1386 struct lttng_kernel_tracer_abi_version abi;
1387
1388 ret = kernctl_tracer_abi_version(kernel_tracer_fd, &abi);
1389 if (ret < 0) {
1390 ERR("Failed to retrieve lttng-modules ABI version");
1391 goto error;
1392 }
1393
1394 /*
1395 * RING_BUFFER_SNAPSHOT_SAMPLE_POSITIONS was introduced in 2.3
1396 */
1397 if (abi.major >= 2 && abi.minor >= 3) {
1398 /* Supported */
1399 ret = 1;
1400 } else {
1401 /* Not supported */
1402 ret = 0;
1403 }
1404 error:
1405 return ret;
1406 }
1407
1408 /*
1409 * Rotate a kernel session.
1410 *
1411 * Return LTTNG_OK on success or else an LTTng error code.
1412 */
1413 enum lttng_error_code kernel_rotate_session(struct ltt_session *session)
1414 {
1415 int ret;
1416 enum lttng_error_code status = LTTNG_OK;
1417 struct consumer_socket *socket;
1418 struct lttng_ht_iter iter;
1419 struct ltt_kernel_session *ksess = session->kernel_session;
1420
1421 assert(ksess);
1422 assert(ksess->consumer);
1423
1424 DBG("Rotate kernel session %s started (session %" PRIu64 ")",
1425 session->name, session->id);
1426
1427 rcu_read_lock();
1428
1429 /*
1430 * Note that this loop will end after one iteration given that there is
1431 * only one kernel consumer.
1432 */
1433 cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter,
1434 socket, node.node) {
1435 struct ltt_kernel_channel *chan;
1436
1437 /* For each channel, ask the consumer to rotate it. */
1438 cds_list_for_each_entry(chan, &ksess->channel_list.head, list) {
1439 DBG("Rotate kernel channel %" PRIu64 ", session %s",
1440 chan->key, session->name);
1441 ret = consumer_rotate_channel(socket, chan->key,
1442 ksess->uid, ksess->gid, ksess->consumer,
1443 /* is_metadata_channel */ false);
1444 if (ret < 0) {
1445 status = LTTNG_ERR_KERN_CONSUMER_FAIL;
1446 goto error;
1447 }
1448 }
1449
1450 /*
1451 * Rotate the metadata channel.
1452 */
1453 ret = consumer_rotate_channel(socket, ksess->metadata->key,
1454 ksess->uid, ksess->gid, ksess->consumer,
1455 /* is_metadata_channel */ true);
1456 if (ret < 0) {
1457 status = LTTNG_ERR_KERN_CONSUMER_FAIL;
1458 goto error;
1459 }
1460 }
1461
1462 error:
1463 rcu_read_unlock();
1464 return status;
1465 }
1466
1467 enum lttng_error_code kernel_create_channel_subdirectories(
1468 const struct ltt_kernel_session *ksess)
1469 {
1470 enum lttng_error_code ret = LTTNG_OK;
1471 enum lttng_trace_chunk_status chunk_status;
1472
1473 rcu_read_lock();
1474 assert(ksess->current_trace_chunk);
1475
1476 /*
1477 * Create the index subdirectory which will take care
1478 * of implicitly creating the channel's path.
1479 */
1480 chunk_status = lttng_trace_chunk_create_subdirectory(
1481 ksess->current_trace_chunk,
1482 DEFAULT_KERNEL_TRACE_DIR "/" DEFAULT_INDEX_DIR);
1483 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
1484 ret = LTTNG_ERR_CREATE_DIR_FAIL;
1485 goto error;
1486 }
1487 error:
1488 rcu_read_unlock();
1489 return ret;
1490 }
1491
1492 /*
1493 * Setup necessary data for kernel tracer action.
1494 */
1495 LTTNG_HIDDEN
1496 int init_kernel_tracer(void)
1497 {
1498 int ret;
1499 bool is_root = !getuid();
1500
1501 /* Modprobe lttng kernel modules */
1502 ret = modprobe_lttng_control();
1503 if (ret < 0) {
1504 goto error;
1505 }
1506
1507 /* Open debugfs lttng */
1508 kernel_tracer_fd = open(module_proc_lttng, O_RDWR);
1509 if (kernel_tracer_fd < 0) {
1510 DBG("Failed to open %s", module_proc_lttng);
1511 goto error_open;
1512 }
1513
1514 /* Validate kernel version */
1515 ret = kernel_validate_version(&kernel_tracer_version,
1516 &kernel_tracer_abi_version);
1517 if (ret < 0) {
1518 goto error_version;
1519 }
1520
1521 ret = modprobe_lttng_data();
1522 if (ret < 0) {
1523 goto error_modules;
1524 }
1525
1526 ret = kernel_supports_ring_buffer_snapshot_sample_positions();
1527 if (ret < 0) {
1528 goto error_modules;
1529 }
1530
1531 if (ret < 1) {
1532 WARN("Kernel tracer does not support buffer monitoring. "
1533 "The monitoring timer of channels in the kernel domain "
1534 "will be set to 0 (disabled).");
1535 }
1536
1537 DBG("Kernel tracer fd %d", kernel_tracer_fd);
1538
1539 ret = syscall_init_table(kernel_tracer_fd);
1540 if (ret < 0) {
1541 ERR("Unable to populate syscall table. Syscall tracing won't "
1542 "work for this session daemon.");
1543 }
1544 return 0;
1545
1546 error_version:
1547 modprobe_remove_lttng_control();
1548 ret = close(kernel_tracer_fd);
1549 if (ret) {
1550 PERROR("close");
1551 }
1552 kernel_tracer_fd = -1;
1553 return LTTNG_ERR_KERN_VERSION;
1554
1555 error_modules:
1556 ret = close(kernel_tracer_fd);
1557 if (ret) {
1558 PERROR("close");
1559 }
1560
1561 error_open:
1562 modprobe_remove_lttng_control();
1563
1564 error:
1565 WARN("No kernel tracer available");
1566 kernel_tracer_fd = -1;
1567 if (!is_root) {
1568 return LTTNG_ERR_NEED_ROOT_SESSIOND;
1569 } else {
1570 return LTTNG_ERR_KERN_NA;
1571 }
1572 }
1573
1574 LTTNG_HIDDEN
1575 void cleanup_kernel_tracer(void)
1576 {
1577 int ret;
1578
1579 DBG2("Closing kernel fd");
1580 if (kernel_tracer_fd >= 0) {
1581 ret = close(kernel_tracer_fd);
1582 if (ret) {
1583 PERROR("close");
1584 }
1585 kernel_tracer_fd = -1;
1586 }
1587 DBG("Unloading kernel modules");
1588 modprobe_remove_lttng_all();
1589 free(syscall_table);
1590 }
1591
1592 LTTNG_HIDDEN
1593 bool kernel_tracer_is_initialized(void)
1594 {
1595 return kernel_tracer_fd >= 0;
1596 }
This page took 0.098543 seconds and 5 git commands to generate.