Fix: bytecode validator: reject specialized load instructions
[lttng-ust.git] / liblttng-ust / lttng-context-perf-counters.c
1 /*
2 * lttng-context-perf-counters.c
3 *
4 * LTTng UST performance monitoring counters (perf-counters) integration.
5 *
6 * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; only
11 * version 2.1 of the License.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #define _LGPL_SOURCE
24 #include <sys/types.h>
25 #include <unistd.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <sys/mman.h>
31 #include <sys/syscall.h>
32 #include <lttng/ust-events.h>
33 #include <lttng/ust-tracer.h>
34 #include <lttng/ringbuffer-config.h>
35 #include <lttng/ust-cancelstate.h>
36 #include <urcu/system.h>
37 #include <urcu/arch.h>
38 #include <urcu/rculist.h>
39 #include <helper.h>
40 #include <urcu/ref.h>
41 #include <usterr-signal-safe.h>
42 #include <signal.h>
43 #include <urcu/tls-compat.h>
44 #include "perf_event.h"
45 #include "lttng-tracer-core.h"
46
47 /*
48 * We use a global perf counter key and iterate on per-thread RCU lists
49 * of fields in the fast path, even though this is not strictly speaking
50 * what would provide the best fast-path complexity, to ensure teardown
51 * of sessions vs thread exit is handled racelessly.
52 *
53 * Updates and traversals of thread_list are protected by UST lock.
54 * Updates to rcu_field_list are protected by UST lock.
55 */
56
57 struct lttng_perf_counter_thread_field {
58 struct lttng_perf_counter_field *field; /* Back reference */
59 struct perf_event_mmap_page *pc;
60 struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
61 struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
62 int fd; /* Perf FD */
63 };
64
65 struct lttng_perf_counter_thread {
66 struct cds_list_head rcu_field_list; /* RCU per-thread list of fields */
67 };
68
69 struct lttng_perf_counter_field {
70 struct perf_event_attr attr;
71 struct cds_list_head thread_field_list; /* Per-field list of thread fields */
72 };
73
74 static pthread_key_t perf_counter_key;
75
76 /*
77 * lttng_perf_lock - Protect lttng-ust perf counter data structures
78 *
79 * Nests within the ust_lock, and therefore within the libc dl lock.
80 * Therefore, we need to fixup the TLS before nesting into this lock.
81 * Nests inside RCU bp read-side lock. Protects against concurrent
82 * fork.
83 */
84 static pthread_mutex_t ust_perf_mutex = PTHREAD_MUTEX_INITIALIZER;
85
86 /*
87 * Track whether we are tracing from a signal handler nested on an
88 * application thread.
89 */
90 static DEFINE_URCU_TLS(int, ust_perf_mutex_nest);
91
92 /*
93 * Force a read (imply TLS fixup for dlopen) of TLS variables.
94 */
95 void lttng_ust_fixup_perf_counter_tls(void)
96 {
97 asm volatile ("" : : "m" (URCU_TLS(ust_perf_mutex_nest)));
98 }
99
100 void lttng_perf_lock(void)
101 {
102 sigset_t sig_all_blocked, orig_mask;
103 int ret;
104
105 if (lttng_ust_cancelstate_disable_push()) {
106 ERR("lttng_ust_cancelstate_disable_push");
107 }
108 sigfillset(&sig_all_blocked);
109 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
110 if (ret) {
111 ERR("pthread_sigmask: %s", strerror(ret));
112 }
113 if (!URCU_TLS(ust_perf_mutex_nest)++) {
114 /*
115 * Ensure the compiler don't move the store after the close()
116 * call in case close() would be marked as leaf.
117 */
118 cmm_barrier();
119 pthread_mutex_lock(&ust_perf_mutex);
120 }
121 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
122 if (ret) {
123 ERR("pthread_sigmask: %s", strerror(ret));
124 }
125 }
126
127 void lttng_perf_unlock(void)
128 {
129 sigset_t sig_all_blocked, orig_mask;
130 int ret;
131
132 sigfillset(&sig_all_blocked);
133 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
134 if (ret) {
135 ERR("pthread_sigmask: %s", strerror(ret));
136 }
137 /*
138 * Ensure the compiler don't move the store before the close()
139 * call, in case close() would be marked as leaf.
140 */
141 cmm_barrier();
142 if (!--URCU_TLS(ust_perf_mutex_nest)) {
143 pthread_mutex_unlock(&ust_perf_mutex);
144 }
145 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
146 if (ret) {
147 ERR("pthread_sigmask: %s", strerror(ret));
148 }
149 if (lttng_ust_cancelstate_disable_pop()) {
150 ERR("lttng_ust_cancelstate_disable_pop");
151 }
152 }
153
154 static
155 size_t perf_counter_get_size(struct lttng_ctx_field *field, size_t offset)
156 {
157 size_t size = 0;
158
159 size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
160 size += sizeof(uint64_t);
161 return size;
162 }
163
164 static
165 uint64_t read_perf_counter_syscall(
166 struct lttng_perf_counter_thread_field *thread_field)
167 {
168 uint64_t count;
169
170 if (caa_unlikely(thread_field->fd < 0))
171 return 0;
172
173 if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
174 < sizeof(count)))
175 return 0;
176
177 return count;
178 }
179
180 #if defined(__x86_64__) || defined(__i386__)
181
182 static
183 uint64_t rdpmc(unsigned int counter)
184 {
185 unsigned int low, high;
186
187 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
188
189 return low | ((uint64_t) high) << 32;
190 }
191
192 static
193 bool has_rdpmc(struct perf_event_mmap_page *pc)
194 {
195 if (caa_unlikely(!pc->cap_bit0_is_deprecated))
196 return false;
197 /* Since Linux kernel 3.12. */
198 return pc->cap_user_rdpmc;
199 }
200
201 static
202 uint64_t arch_read_perf_counter(
203 struct lttng_perf_counter_thread_field *thread_field)
204 {
205 uint32_t seq, idx;
206 uint64_t count;
207 struct perf_event_mmap_page *pc = thread_field->pc;
208
209 if (caa_unlikely(!pc))
210 return 0;
211
212 do {
213 seq = CMM_LOAD_SHARED(pc->lock);
214 cmm_barrier();
215
216 idx = pc->index;
217 if (caa_likely(has_rdpmc(pc) && idx)) {
218 int64_t pmcval;
219
220 pmcval = rdpmc(idx - 1);
221 /* Sign-extend the pmc register result. */
222 pmcval <<= 64 - pc->pmc_width;
223 pmcval >>= 64 - pc->pmc_width;
224 count = pc->offset + pmcval;
225 } else {
226 /* Fall-back on system call if rdpmc cannot be used. */
227 return read_perf_counter_syscall(thread_field);
228 }
229 cmm_barrier();
230 } while (CMM_LOAD_SHARED(pc->lock) != seq);
231
232 return count;
233 }
234
235 static
236 int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
237 {
238 struct perf_event_mmap_page *pc = thread_field->pc;
239
240 if (!pc)
241 return 0;
242 return !has_rdpmc(pc);
243 }
244
245 #else
246
247 /* Generic (slow) implementation using a read system call. */
248 static
249 uint64_t arch_read_perf_counter(
250 struct lttng_perf_counter_thread_field *thread_field)
251 {
252 return read_perf_counter_syscall(thread_field);
253 }
254
255 static
256 int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
257 {
258 return 1;
259 }
260
261 #endif
262
263 static
264 int sys_perf_event_open(struct perf_event_attr *attr,
265 pid_t pid, int cpu, int group_fd,
266 unsigned long flags)
267 {
268 return syscall(SYS_perf_event_open, attr, pid, cpu,
269 group_fd, flags);
270 }
271
272 static
273 int open_perf_fd(struct perf_event_attr *attr)
274 {
275 int fd;
276
277 fd = sys_perf_event_open(attr, 0, -1, -1, 0);
278 if (fd < 0)
279 return -1;
280
281 return fd;
282 }
283
284 static
285 void close_perf_fd(int fd)
286 {
287 int ret;
288
289 if (fd < 0)
290 return;
291
292 ret = close(fd);
293 if (ret) {
294 perror("Error closing LTTng-UST perf memory mapping FD");
295 }
296 }
297
298 static void setup_perf(struct lttng_perf_counter_thread_field *thread_field)
299 {
300 void *perf_addr;
301
302 perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
303 PROT_READ, MAP_SHARED, thread_field->fd, 0);
304 if (perf_addr == MAP_FAILED)
305 perf_addr = NULL;
306 thread_field->pc = perf_addr;
307
308 if (!arch_perf_keep_fd(thread_field)) {
309 close_perf_fd(thread_field->fd);
310 thread_field->fd = -1;
311 }
312 }
313
314 static
315 void unmap_perf_page(struct perf_event_mmap_page *pc)
316 {
317 int ret;
318
319 if (!pc)
320 return;
321 ret = munmap(pc, sizeof(struct perf_event_mmap_page));
322 if (ret < 0) {
323 PERROR("Error in munmap");
324 abort();
325 }
326 }
327
328 static
329 struct lttng_perf_counter_thread *alloc_perf_counter_thread(void)
330 {
331 struct lttng_perf_counter_thread *perf_thread;
332 sigset_t newmask, oldmask;
333 int ret;
334
335 ret = sigfillset(&newmask);
336 if (ret)
337 abort();
338 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
339 if (ret)
340 abort();
341 /* Check again with signals disabled */
342 perf_thread = pthread_getspecific(perf_counter_key);
343 if (perf_thread)
344 goto skip;
345 perf_thread = zmalloc(sizeof(*perf_thread));
346 if (!perf_thread)
347 abort();
348 CDS_INIT_LIST_HEAD(&perf_thread->rcu_field_list);
349 ret = pthread_setspecific(perf_counter_key, perf_thread);
350 if (ret)
351 abort();
352 skip:
353 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
354 if (ret)
355 abort();
356 return perf_thread;
357 }
358
359 static
360 struct lttng_perf_counter_thread_field *
361 add_thread_field(struct lttng_perf_counter_field *perf_field,
362 struct lttng_perf_counter_thread *perf_thread)
363 {
364 struct lttng_perf_counter_thread_field *thread_field;
365 sigset_t newmask, oldmask;
366 int ret;
367
368 ret = sigfillset(&newmask);
369 if (ret)
370 abort();
371 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
372 if (ret)
373 abort();
374 /* Check again with signals disabled */
375 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
376 rcu_field_node) {
377 if (thread_field->field == perf_field)
378 goto skip;
379 }
380 thread_field = zmalloc(sizeof(*thread_field));
381 if (!thread_field)
382 abort();
383 thread_field->field = perf_field;
384 thread_field->fd = open_perf_fd(&perf_field->attr);
385 if (thread_field->fd >= 0)
386 setup_perf(thread_field);
387 /*
388 * Note: thread_field->pc can be NULL if setup_perf() fails.
389 * Also, thread_field->fd can be -1 if open_perf_fd() fails.
390 */
391 lttng_perf_lock();
392 cds_list_add_rcu(&thread_field->rcu_field_node,
393 &perf_thread->rcu_field_list);
394 cds_list_add(&thread_field->thread_field_node,
395 &perf_field->thread_field_list);
396 lttng_perf_unlock();
397 skip:
398 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
399 if (ret)
400 abort();
401 return thread_field;
402 }
403
404 static
405 struct lttng_perf_counter_thread_field *
406 get_thread_field(struct lttng_perf_counter_field *field)
407 {
408 struct lttng_perf_counter_thread *perf_thread;
409 struct lttng_perf_counter_thread_field *thread_field;
410
411 perf_thread = pthread_getspecific(perf_counter_key);
412 if (!perf_thread)
413 perf_thread = alloc_perf_counter_thread();
414 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
415 rcu_field_node) {
416 if (thread_field->field == field)
417 return thread_field;
418 }
419 /* perf_counter_thread_field not found, need to add one */
420 return add_thread_field(field, perf_thread);
421 }
422
423 static
424 uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
425 {
426 struct lttng_perf_counter_field *perf_field;
427 struct lttng_perf_counter_thread_field *perf_thread_field;
428
429 perf_field = field->u.perf_counter;
430 perf_thread_field = get_thread_field(perf_field);
431 return arch_read_perf_counter(perf_thread_field);
432 }
433
434 static
435 void perf_counter_record(struct lttng_ctx_field *field,
436 struct lttng_ust_lib_ring_buffer_ctx *ctx,
437 struct lttng_channel *chan)
438 {
439 uint64_t value;
440
441 value = wrapper_perf_counter_read(field);
442 lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
443 chan->ops->event_write(ctx, &value, sizeof(value));
444 }
445
446 static
447 void perf_counter_get_value(struct lttng_ctx_field *field,
448 struct lttng_ctx_value *value)
449 {
450 value->u.s64 = wrapper_perf_counter_read(field);
451 }
452
453 /* Called with perf lock held */
454 static
455 void lttng_destroy_perf_thread_field(
456 struct lttng_perf_counter_thread_field *thread_field)
457 {
458 close_perf_fd(thread_field->fd);
459 unmap_perf_page(thread_field->pc);
460 cds_list_del_rcu(&thread_field->rcu_field_node);
461 cds_list_del(&thread_field->thread_field_node);
462 free(thread_field);
463 }
464
465 static
466 void lttng_destroy_perf_thread_key(void *_key)
467 {
468 struct lttng_perf_counter_thread *perf_thread = _key;
469 struct lttng_perf_counter_thread_field *pos, *p;
470
471 lttng_perf_lock();
472 cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
473 rcu_field_node)
474 lttng_destroy_perf_thread_field(pos);
475 lttng_perf_unlock();
476 free(perf_thread);
477 }
478
479 /* Called with UST lock held */
480 static
481 void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
482 {
483 struct lttng_perf_counter_field *perf_field;
484 struct lttng_perf_counter_thread_field *pos, *p;
485
486 free((char *) field->event_field.name);
487 perf_field = field->u.perf_counter;
488 /*
489 * This put is performed when no threads can concurrently
490 * perform a "get" concurrently, thanks to urcu-bp grace
491 * period. Holding the lttng perf lock protects against
492 * concurrent modification of the per-thread thread field
493 * list.
494 */
495 lttng_perf_lock();
496 cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
497 thread_field_node)
498 lttng_destroy_perf_thread_field(pos);
499 lttng_perf_unlock();
500 free(perf_field);
501 }
502
503 #ifdef __ARM_ARCH_7A__
504
505 static
506 int perf_get_exclude_kernel(void)
507 {
508 return 0;
509 }
510
511 #else /* __ARM_ARCH_7A__ */
512
513 static
514 int perf_get_exclude_kernel(void)
515 {
516 return 1;
517 }
518
519 #endif /* __ARM_ARCH_7A__ */
520
521 /* Called with UST lock held */
522 int lttng_add_perf_counter_to_ctx(uint32_t type,
523 uint64_t config,
524 const char *name,
525 struct lttng_ctx **ctx)
526 {
527 struct lttng_ctx_field *field;
528 struct lttng_perf_counter_field *perf_field;
529 char *name_alloc;
530 int ret;
531
532 name_alloc = strdup(name);
533 if (!name_alloc) {
534 ret = -ENOMEM;
535 goto name_alloc_error;
536 }
537 perf_field = zmalloc(sizeof(*perf_field));
538 if (!perf_field) {
539 ret = -ENOMEM;
540 goto perf_field_alloc_error;
541 }
542 field = lttng_append_context(ctx);
543 if (!field) {
544 ret = -ENOMEM;
545 goto append_context_error;
546 }
547 if (lttng_find_context(*ctx, name_alloc)) {
548 ret = -EEXIST;
549 goto find_error;
550 }
551
552 field->destroy = lttng_destroy_perf_counter_field;
553
554 field->event_field.name = name_alloc;
555 field->event_field.type.atype = atype_integer;
556 field->event_field.type.u.basic.integer.size =
557 sizeof(uint64_t) * CHAR_BIT;
558 field->event_field.type.u.basic.integer.alignment =
559 lttng_alignof(uint64_t) * CHAR_BIT;
560 field->event_field.type.u.basic.integer.signedness =
561 lttng_is_signed_type(uint64_t);
562 field->event_field.type.u.basic.integer.reverse_byte_order = 0;
563 field->event_field.type.u.basic.integer.base = 10;
564 field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
565 field->get_size = perf_counter_get_size;
566 field->record = perf_counter_record;
567 field->get_value = perf_counter_get_value;
568
569 perf_field->attr.type = type;
570 perf_field->attr.config = config;
571 perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
572 CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
573 field->u.perf_counter = perf_field;
574
575 /* Ensure that this perf counter can be used in this process. */
576 ret = open_perf_fd(&perf_field->attr);
577 if (ret < 0) {
578 ret = -ENODEV;
579 goto setup_error;
580 }
581 close_perf_fd(ret);
582
583 /*
584 * Contexts can only be added before tracing is started, so we
585 * don't have to synchronize against concurrent threads using
586 * the field here.
587 */
588
589 lttng_context_update(*ctx);
590 return 0;
591
592 setup_error:
593 find_error:
594 lttng_remove_context_field(ctx, field);
595 append_context_error:
596 free(perf_field);
597 perf_field_alloc_error:
598 free(name_alloc);
599 name_alloc_error:
600 return ret;
601 }
602
603 int lttng_perf_counter_init(void)
604 {
605 int ret;
606
607 ret = pthread_key_create(&perf_counter_key,
608 lttng_destroy_perf_thread_key);
609 if (ret)
610 ret = -ret;
611 return ret;
612 }
613
614 void lttng_perf_counter_exit(void)
615 {
616 int ret;
617
618 ret = pthread_key_delete(perf_counter_key);
619 if (ret) {
620 errno = ret;
621 PERROR("Error in pthread_key_delete");
622 }
623 }
This page took 0.041269 seconds and 4 git commands to generate.