Fix: lttng perf counter deadlock
[lttng-ust.git] / liblttng-ust / lttng-context-perf-counters.c
1 /*
2 * lttng-context-perf-counters.c
3 *
4 * LTTng UST performance monitoring counters (perf-counters) integration.
5 *
6 * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; only
11 * version 2.1 of the License.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #define _LGPL_SOURCE
24 #include <sys/types.h>
25 #include <unistd.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <sys/mman.h>
31 #include <sys/syscall.h>
32 #include <lttng/ust-events.h>
33 #include <lttng/ust-tracer.h>
34 #include <lttng/ringbuffer-config.h>
35 #include <urcu/system.h>
36 #include <urcu/arch.h>
37 #include <urcu/rculist.h>
38 #include <helper.h>
39 #include <urcu/ref.h>
40 #include <usterr-signal-safe.h>
41 #include <signal.h>
42 #include <urcu/tls-compat.h>
43 #include "perf_event.h"
44 #include "lttng-tracer-core.h"
45
46 /*
47 * We use a global perf counter key and iterate on per-thread RCU lists
48 * of fields in the fast path, even though this is not strictly speaking
49 * what would provide the best fast-path complexity, to ensure teardown
50 * of sessions vs thread exit is handled racelessly.
51 *
52 * Updates and traversals of thread_list are protected by UST lock.
53 * Updates to rcu_field_list are protected by UST lock.
54 */
55
56 struct lttng_perf_counter_thread_field {
57 struct lttng_perf_counter_field *field; /* Back reference */
58 struct perf_event_mmap_page *pc;
59 struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
60 struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
61 int fd; /* Perf FD */
62 };
63
64 struct lttng_perf_counter_thread {
65 struct cds_list_head rcu_field_list; /* RCU per-thread list of fields */
66 };
67
68 struct lttng_perf_counter_field {
69 struct perf_event_attr attr;
70 struct cds_list_head thread_field_list; /* Per-field list of thread fields */
71 };
72
73 static pthread_key_t perf_counter_key;
74
75 /*
76 * lttng_perf_lock - Protect lttng-ust perf counter data structures
77 *
78 * Nests within the ust_lock, and therefore within the libc dl lock.
79 * Therefore, we need to fixup the TLS before nesting into this lock.
80 * Nests inside RCU bp read-side lock. Protects against concurrent
81 * fork.
82 */
83 static pthread_mutex_t ust_perf_mutex = PTHREAD_MUTEX_INITIALIZER;
84
85 /*
86 * Cancel state when grabbing the ust_perf_mutex. Saved when locking,
87 * restored on unlock. Protected by ust_perf_mutex.
88 */
89 static int ust_perf_saved_cancelstate;
90
91 /*
92 * Track whether we are tracing from a signal handler nested on an
93 * application thread.
94 */
95 static DEFINE_URCU_TLS(int, ust_perf_mutex_nest);
96
97 /*
98 * Force a read (imply TLS fixup for dlopen) of TLS variables.
99 */
100 void lttng_ust_fixup_perf_counter_tls(void)
101 {
102 asm volatile ("" : : "m" (URCU_TLS(ust_perf_mutex_nest)));
103 }
104
105 void lttng_perf_lock(void)
106 {
107 sigset_t sig_all_blocked, orig_mask;
108 int ret, oldstate;
109
110 ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
111 if (ret) {
112 ERR("pthread_setcancelstate: %s", strerror(ret));
113 }
114 sigfillset(&sig_all_blocked);
115 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
116 if (ret) {
117 ERR("pthread_sigmask: %s", strerror(ret));
118 }
119 if (!URCU_TLS(ust_perf_mutex_nest)++) {
120 /*
121 * Ensure the compiler don't move the store after the close()
122 * call in case close() would be marked as leaf.
123 */
124 cmm_barrier();
125 pthread_mutex_lock(&ust_perf_mutex);
126 ust_perf_saved_cancelstate = oldstate;
127 }
128 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
129 if (ret) {
130 ERR("pthread_sigmask: %s", strerror(ret));
131 }
132 }
133
134 void lttng_perf_unlock(void)
135 {
136 sigset_t sig_all_blocked, orig_mask;
137 int ret, newstate, oldstate;
138 bool restore_cancel = false;
139
140 sigfillset(&sig_all_blocked);
141 ret = pthread_sigmask(SIG_SETMASK, &sig_all_blocked, &orig_mask);
142 if (ret) {
143 ERR("pthread_sigmask: %s", strerror(ret));
144 }
145 /*
146 * Ensure the compiler don't move the store before the close()
147 * call, in case close() would be marked as leaf.
148 */
149 cmm_barrier();
150 if (!--URCU_TLS(ust_perf_mutex_nest)) {
151 newstate = ust_perf_saved_cancelstate;
152 restore_cancel = true;
153 pthread_mutex_unlock(&ust_perf_mutex);
154 }
155 ret = pthread_sigmask(SIG_SETMASK, &orig_mask, NULL);
156 if (ret) {
157 ERR("pthread_sigmask: %s", strerror(ret));
158 }
159 if (restore_cancel) {
160 ret = pthread_setcancelstate(newstate, &oldstate);
161 if (ret) {
162 ERR("pthread_setcancelstate: %s", strerror(ret));
163 }
164 }
165 }
166
167 static
168 size_t perf_counter_get_size(struct lttng_ctx_field *field, size_t offset)
169 {
170 size_t size = 0;
171
172 size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
173 size += sizeof(uint64_t);
174 return size;
175 }
176
177 static
178 uint64_t read_perf_counter_syscall(
179 struct lttng_perf_counter_thread_field *thread_field)
180 {
181 uint64_t count;
182
183 if (caa_unlikely(thread_field->fd < 0))
184 return 0;
185
186 if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
187 < sizeof(count)))
188 return 0;
189
190 return count;
191 }
192
193 #if defined(__x86_64__) || defined(__i386__)
194
195 static
196 uint64_t rdpmc(unsigned int counter)
197 {
198 unsigned int low, high;
199
200 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
201
202 return low | ((uint64_t) high) << 32;
203 }
204
205 static
206 bool has_rdpmc(struct perf_event_mmap_page *pc)
207 {
208 if (caa_unlikely(!pc->cap_bit0_is_deprecated))
209 return false;
210 /* Since Linux kernel 3.12. */
211 return pc->cap_user_rdpmc;
212 }
213
214 static
215 uint64_t arch_read_perf_counter(
216 struct lttng_perf_counter_thread_field *thread_field)
217 {
218 uint32_t seq, idx;
219 uint64_t count;
220 struct perf_event_mmap_page *pc = thread_field->pc;
221
222 if (caa_unlikely(!pc))
223 return 0;
224
225 do {
226 seq = CMM_LOAD_SHARED(pc->lock);
227 cmm_barrier();
228
229 idx = pc->index;
230 if (caa_likely(has_rdpmc(pc) && idx)) {
231 int64_t pmcval;
232
233 pmcval = rdpmc(idx - 1);
234 /* Sign-extend the pmc register result. */
235 pmcval <<= 64 - pc->pmc_width;
236 pmcval >>= 64 - pc->pmc_width;
237 count = pc->offset + pmcval;
238 } else {
239 /* Fall-back on system call if rdpmc cannot be used. */
240 return read_perf_counter_syscall(thread_field);
241 }
242 cmm_barrier();
243 } while (CMM_LOAD_SHARED(pc->lock) != seq);
244
245 return count;
246 }
247
248 static
249 int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
250 {
251 struct perf_event_mmap_page *pc = thread_field->pc;
252
253 if (!pc)
254 return 0;
255 return !has_rdpmc(pc);
256 }
257
258 #else
259
260 /* Generic (slow) implementation using a read system call. */
261 static
262 uint64_t arch_read_perf_counter(
263 struct lttng_perf_counter_thread_field *thread_field)
264 {
265 return read_perf_counter_syscall(thread_field);
266 }
267
268 static
269 int arch_perf_keep_fd(struct lttng_perf_counter_thread_field *thread_field)
270 {
271 return 1;
272 }
273
274 #endif
275
276 static
277 int sys_perf_event_open(struct perf_event_attr *attr,
278 pid_t pid, int cpu, int group_fd,
279 unsigned long flags)
280 {
281 return syscall(SYS_perf_event_open, attr, pid, cpu,
282 group_fd, flags);
283 }
284
285 static
286 int open_perf_fd(struct perf_event_attr *attr)
287 {
288 int fd;
289
290 fd = sys_perf_event_open(attr, 0, -1, -1, 0);
291 if (fd < 0)
292 return -1;
293
294 return fd;
295 }
296
297 static
298 void close_perf_fd(int fd)
299 {
300 int ret;
301
302 if (fd < 0)
303 return;
304
305 ret = close(fd);
306 if (ret) {
307 perror("Error closing LTTng-UST perf memory mapping FD");
308 }
309 }
310
311 static void setup_perf(struct lttng_perf_counter_thread_field *thread_field)
312 {
313 void *perf_addr;
314
315 perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
316 PROT_READ, MAP_SHARED, thread_field->fd, 0);
317 if (perf_addr == MAP_FAILED)
318 perf_addr = NULL;
319 thread_field->pc = perf_addr;
320
321 if (!arch_perf_keep_fd(thread_field)) {
322 close_perf_fd(thread_field->fd);
323 thread_field->fd = -1;
324 }
325 }
326
327 static
328 void unmap_perf_page(struct perf_event_mmap_page *pc)
329 {
330 int ret;
331
332 if (!pc)
333 return;
334 ret = munmap(pc, sizeof(struct perf_event_mmap_page));
335 if (ret < 0) {
336 PERROR("Error in munmap");
337 abort();
338 }
339 }
340
341 static
342 struct lttng_perf_counter_thread *alloc_perf_counter_thread(void)
343 {
344 struct lttng_perf_counter_thread *perf_thread;
345 sigset_t newmask, oldmask;
346 int ret;
347
348 ret = sigfillset(&newmask);
349 if (ret)
350 abort();
351 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
352 if (ret)
353 abort();
354 /* Check again with signals disabled */
355 perf_thread = pthread_getspecific(perf_counter_key);
356 if (perf_thread)
357 goto skip;
358 perf_thread = zmalloc(sizeof(*perf_thread));
359 if (!perf_thread)
360 abort();
361 CDS_INIT_LIST_HEAD(&perf_thread->rcu_field_list);
362 ret = pthread_setspecific(perf_counter_key, perf_thread);
363 if (ret)
364 abort();
365 skip:
366 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
367 if (ret)
368 abort();
369 return perf_thread;
370 }
371
372 static
373 struct lttng_perf_counter_thread_field *
374 add_thread_field(struct lttng_perf_counter_field *perf_field,
375 struct lttng_perf_counter_thread *perf_thread)
376 {
377 struct lttng_perf_counter_thread_field *thread_field;
378 sigset_t newmask, oldmask;
379 int ret;
380
381 ret = sigfillset(&newmask);
382 if (ret)
383 abort();
384 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
385 if (ret)
386 abort();
387 /* Check again with signals disabled */
388 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
389 rcu_field_node) {
390 if (thread_field->field == perf_field)
391 goto skip;
392 }
393 thread_field = zmalloc(sizeof(*thread_field));
394 if (!thread_field)
395 abort();
396 thread_field->field = perf_field;
397 thread_field->fd = open_perf_fd(&perf_field->attr);
398 if (thread_field->fd >= 0)
399 setup_perf(thread_field);
400 /*
401 * Note: thread_field->pc can be NULL if setup_perf() fails.
402 * Also, thread_field->fd can be -1 if open_perf_fd() fails.
403 */
404 lttng_perf_lock();
405 cds_list_add_rcu(&thread_field->rcu_field_node,
406 &perf_thread->rcu_field_list);
407 cds_list_add(&thread_field->thread_field_node,
408 &perf_field->thread_field_list);
409 lttng_perf_unlock();
410 skip:
411 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
412 if (ret)
413 abort();
414 return thread_field;
415 }
416
417 static
418 struct lttng_perf_counter_thread_field *
419 get_thread_field(struct lttng_perf_counter_field *field)
420 {
421 struct lttng_perf_counter_thread *perf_thread;
422 struct lttng_perf_counter_thread_field *thread_field;
423
424 perf_thread = pthread_getspecific(perf_counter_key);
425 if (!perf_thread)
426 perf_thread = alloc_perf_counter_thread();
427 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
428 rcu_field_node) {
429 if (thread_field->field == field)
430 return thread_field;
431 }
432 /* perf_counter_thread_field not found, need to add one */
433 return add_thread_field(field, perf_thread);
434 }
435
436 static
437 uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
438 {
439 struct lttng_perf_counter_field *perf_field;
440 struct lttng_perf_counter_thread_field *perf_thread_field;
441
442 perf_field = field->u.perf_counter;
443 perf_thread_field = get_thread_field(perf_field);
444 return arch_read_perf_counter(perf_thread_field);
445 }
446
447 static
448 void perf_counter_record(struct lttng_ctx_field *field,
449 struct lttng_ust_lib_ring_buffer_ctx *ctx,
450 struct lttng_channel *chan)
451 {
452 uint64_t value;
453
454 value = wrapper_perf_counter_read(field);
455 lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
456 chan->ops->event_write(ctx, &value, sizeof(value));
457 }
458
459 static
460 void perf_counter_get_value(struct lttng_ctx_field *field,
461 struct lttng_ctx_value *value)
462 {
463 uint64_t v;
464
465 v = wrapper_perf_counter_read(field);
466 value->u.s64 = v;
467 }
468
469 /* Called with perf lock held */
470 static
471 void lttng_destroy_perf_thread_field(
472 struct lttng_perf_counter_thread_field *thread_field)
473 {
474 close_perf_fd(thread_field->fd);
475 unmap_perf_page(thread_field->pc);
476 cds_list_del_rcu(&thread_field->rcu_field_node);
477 cds_list_del(&thread_field->thread_field_node);
478 free(thread_field);
479 }
480
481 static
482 void lttng_destroy_perf_thread_key(void *_key)
483 {
484 struct lttng_perf_counter_thread *perf_thread = _key;
485 struct lttng_perf_counter_thread_field *pos, *p;
486
487 lttng_perf_lock();
488 cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
489 rcu_field_node)
490 lttng_destroy_perf_thread_field(pos);
491 lttng_perf_unlock();
492 free(perf_thread);
493 }
494
495 /* Called with UST lock held */
496 static
497 void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
498 {
499 struct lttng_perf_counter_field *perf_field;
500 struct lttng_perf_counter_thread_field *pos, *p;
501
502 free((char *) field->event_field.name);
503 perf_field = field->u.perf_counter;
504 /*
505 * This put is performed when no threads can concurrently
506 * perform a "get" concurrently, thanks to urcu-bp grace
507 * period. Holding the lttng perf lock protects against
508 * concurrent modification of the per-thread thread field
509 * list.
510 */
511 lttng_perf_lock();
512 cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
513 thread_field_node)
514 lttng_destroy_perf_thread_field(pos);
515 lttng_perf_unlock();
516 free(perf_field);
517 }
518
519 #ifdef __ARM_ARCH_7A__
520
521 static
522 int perf_get_exclude_kernel(void)
523 {
524 return 0;
525 }
526
527 #else /* __ARM_ARCH_7A__ */
528
529 static
530 int perf_get_exclude_kernel(void)
531 {
532 return 1;
533 }
534
535 #endif /* __ARM_ARCH_7A__ */
536
537 /* Called with UST lock held */
538 int lttng_add_perf_counter_to_ctx(uint32_t type,
539 uint64_t config,
540 const char *name,
541 struct lttng_ctx **ctx)
542 {
543 struct lttng_ctx_field *field;
544 struct lttng_perf_counter_field *perf_field;
545 char *name_alloc;
546 int ret;
547
548 name_alloc = strdup(name);
549 if (!name_alloc) {
550 ret = -ENOMEM;
551 goto name_alloc_error;
552 }
553 perf_field = zmalloc(sizeof(*perf_field));
554 if (!perf_field) {
555 ret = -ENOMEM;
556 goto perf_field_alloc_error;
557 }
558 field = lttng_append_context(ctx);
559 if (!field) {
560 ret = -ENOMEM;
561 goto append_context_error;
562 }
563 if (lttng_find_context(*ctx, name_alloc)) {
564 ret = -EEXIST;
565 goto find_error;
566 }
567
568 field->destroy = lttng_destroy_perf_counter_field;
569
570 field->event_field.name = name_alloc;
571 field->event_field.type.atype = atype_integer;
572 field->event_field.type.u.basic.integer.size =
573 sizeof(uint64_t) * CHAR_BIT;
574 field->event_field.type.u.basic.integer.alignment =
575 lttng_alignof(uint64_t) * CHAR_BIT;
576 field->event_field.type.u.basic.integer.signedness =
577 lttng_is_signed_type(uint64_t);
578 field->event_field.type.u.basic.integer.reverse_byte_order = 0;
579 field->event_field.type.u.basic.integer.base = 10;
580 field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
581 field->get_size = perf_counter_get_size;
582 field->record = perf_counter_record;
583 field->get_value = perf_counter_get_value;
584
585 perf_field->attr.type = type;
586 perf_field->attr.config = config;
587 perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
588 CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
589 field->u.perf_counter = perf_field;
590
591 /* Ensure that this perf counter can be used in this process. */
592 ret = open_perf_fd(&perf_field->attr);
593 if (ret < 0) {
594 ret = -ENODEV;
595 goto setup_error;
596 }
597 close_perf_fd(ret);
598
599 /*
600 * Contexts can only be added before tracing is started, so we
601 * don't have to synchronize against concurrent threads using
602 * the field here.
603 */
604
605 lttng_context_update(*ctx);
606 return 0;
607
608 setup_error:
609 find_error:
610 lttng_remove_context_field(ctx, field);
611 append_context_error:
612 free(perf_field);
613 perf_field_alloc_error:
614 free(name_alloc);
615 name_alloc_error:
616 return ret;
617 }
618
619 int lttng_perf_counter_init(void)
620 {
621 int ret;
622
623 ret = pthread_key_create(&perf_counter_key,
624 lttng_destroy_perf_thread_key);
625 if (ret)
626 ret = -ret;
627 return ret;
628 }
629
630 void lttng_perf_counter_exit(void)
631 {
632 int ret;
633
634 ret = pthread_key_delete(perf_counter_key);
635 if (ret) {
636 errno = ret;
637 PERROR("Error in pthread_key_delete");
638 }
639 }
This page took 0.042709 seconds and 4 git commands to generate.