ust: add files from kernel
[ust.git] / libmarkers / marker.c
CommitLineData
68c1021b
PMF
1/*
2 * Copyright (C) 2007 Mathieu Desnoyers
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18#include <linux/module.h>
19#include <linux/mutex.h>
20#include <linux/types.h>
21#include <linux/jhash.h>
22#include <linux/list.h>
23#include <linux/rcupdate.h>
24#include <linux/marker.h>
25#include <linux/err.h>
26#include <linux/slab.h>
27#include <linux/immediate.h>
28#include <linux/sched.h>
29#include <linux/uaccess.h>
30#include <linux/user_marker.h>
31#include <linux/ltt-tracer.h>
32
33extern struct marker __start___markers[];
34extern struct marker __stop___markers[];
35
36/* Set to 1 to enable marker debug output */
37static const int marker_debug;
38
39/*
40 * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
41 * and module markers and the hash table.
42 */
43static DEFINE_MUTEX(markers_mutex);
44
45void lock_markers(void)
46{
47 mutex_lock(&markers_mutex);
48}
49
50void unlock_markers(void)
51{
52 mutex_unlock(&markers_mutex);
53}
54
55/*
56 * Marker hash table, containing the active markers.
57 * Protected by module_mutex.
58 */
59#define MARKER_HASH_BITS 6
60#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
61static struct hlist_head marker_table[MARKER_TABLE_SIZE];
62
63/*
64 * Note about RCU :
65 * It is used to make sure every handler has finished using its private data
66 * between two consecutive operation (add or remove) on a given marker. It is
67 * also used to delay the free of multiple probes array until a quiescent state
68 * is reached.
69 * marker entries modifications are protected by the markers_mutex.
70 */
71struct marker_entry {
72 struct hlist_node hlist;
73 char *format;
74 char *name;
75 /* Probe wrapper */
76 void (*call)(const struct marker *mdata, void *call_private, ...);
77 struct marker_probe_closure single;
78 struct marker_probe_closure *multi;
79 int refcount; /* Number of times armed. 0 if disarmed. */
80 struct rcu_head rcu;
81 void *oldptr;
82 int rcu_pending;
83 u16 channel_id;
84 u16 event_id;
85 unsigned char ptype:1;
86 unsigned char format_allocated:1;
87 char channel[0]; /* Contains channel'\0'name'\0'format'\0' */
88};
89
90#ifdef CONFIG_MARKERS_USERSPACE
91static void marker_update_processes(void);
92#else
93static void marker_update_processes(void)
94{
95}
96#endif
97
98/**
99 * __mark_empty_function - Empty probe callback
100 * @mdata: marker data
101 * @probe_private: probe private data
102 * @call_private: call site private data
103 * @fmt: format string
104 * @...: variable argument list
105 *
106 * Empty callback provided as a probe to the markers. By providing this to a
107 * disabled marker, we make sure the execution flow is always valid even
108 * though the function pointer change and the marker enabling are two distinct
109 * operations that modifies the execution flow of preemptible code.
110 */
111notrace void __mark_empty_function(const struct marker *mdata,
112 void *probe_private, void *call_private, const char *fmt, va_list *args)
113{
114}
115EXPORT_SYMBOL_GPL(__mark_empty_function);
116
117/*
118 * marker_probe_cb Callback that prepares the variable argument list for probes.
119 * @mdata: pointer of type struct marker
120 * @call_private: caller site private data
121 * @...: Variable argument list.
122 *
123 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
124 * need to put a full smp_rmb() in this branch. This is why we do not use
125 * rcu_dereference() for the pointer read.
126 */
127notrace void marker_probe_cb(const struct marker *mdata,
128 void *call_private, ...)
129{
130 va_list args;
131 char ptype;
132
133 /*
134 * rcu_read_lock_sched does two things : disabling preemption to make
135 * sure the teardown of the callbacks can be done correctly when they
136 * are in modules and they insure RCU read coherency.
137 */
138 rcu_read_lock_sched_notrace();
139 ptype = mdata->ptype;
140 if (likely(!ptype)) {
141 marker_probe_func *func;
142 /* Must read the ptype before ptr. They are not data dependant,
143 * so we put an explicit smp_rmb() here. */
144 smp_rmb();
145 func = mdata->single.func;
146 /* Must read the ptr before private data. They are not data
147 * dependant, so we put an explicit smp_rmb() here. */
148 smp_rmb();
149 va_start(args, call_private);
150 func(mdata, mdata->single.probe_private, call_private,
151 mdata->format, &args);
152 va_end(args);
153 } else {
154 struct marker_probe_closure *multi;
155 int i;
156 /*
157 * Read mdata->ptype before mdata->multi.
158 */
159 smp_rmb();
160 multi = mdata->multi;
161 /*
162 * multi points to an array, therefore accessing the array
163 * depends on reading multi. However, even in this case,
164 * we must insure that the pointer is read _before_ the array
165 * data. Same as rcu_dereference, but we need a full smp_rmb()
166 * in the fast path, so put the explicit barrier here.
167 */
168 smp_read_barrier_depends();
169 for (i = 0; multi[i].func; i++) {
170 va_start(args, call_private);
171 multi[i].func(mdata, multi[i].probe_private,
172 call_private, mdata->format, &args);
173 va_end(args);
174 }
175 }
176 rcu_read_unlock_sched_notrace();
177}
178EXPORT_SYMBOL_GPL(marker_probe_cb);
179
180/*
181 * marker_probe_cb Callback that does not prepare the variable argument list.
182 * @mdata: pointer of type struct marker
183 * @call_private: caller site private data
184 * @...: Variable argument list.
185 *
186 * Should be connected to markers "MARK_NOARGS".
187 */
188static notrace void marker_probe_cb_noarg(const struct marker *mdata,
189 void *call_private, ...)
190{
191 va_list args; /* not initialized */
192 char ptype;
193
194 rcu_read_lock_sched_notrace();
195 ptype = mdata->ptype;
196 if (likely(!ptype)) {
197 marker_probe_func *func;
198 /* Must read the ptype before ptr. They are not data dependant,
199 * so we put an explicit smp_rmb() here. */
200 smp_rmb();
201 func = mdata->single.func;
202 /* Must read the ptr before private data. They are not data
203 * dependant, so we put an explicit smp_rmb() here. */
204 smp_rmb();
205 func(mdata, mdata->single.probe_private, call_private,
206 mdata->format, &args);
207 } else {
208 struct marker_probe_closure *multi;
209 int i;
210 /*
211 * Read mdata->ptype before mdata->multi.
212 */
213 smp_rmb();
214 multi = mdata->multi;
215 /*
216 * multi points to an array, therefore accessing the array
217 * depends on reading multi. However, even in this case,
218 * we must insure that the pointer is read _before_ the array
219 * data. Same as rcu_dereference, but we need a full smp_rmb()
220 * in the fast path, so put the explicit barrier here.
221 */
222 smp_read_barrier_depends();
223 for (i = 0; multi[i].func; i++)
224 multi[i].func(mdata, multi[i].probe_private,
225 call_private, mdata->format, &args);
226 }
227 rcu_read_unlock_sched_notrace();
228}
229
230static void free_old_closure(struct rcu_head *head)
231{
232 struct marker_entry *entry = container_of(head,
233 struct marker_entry, rcu);
234 kfree(entry->oldptr);
235 /* Make sure we free the data before setting the pending flag to 0 */
236 smp_wmb();
237 entry->rcu_pending = 0;
238}
239
240static void debug_print_probes(struct marker_entry *entry)
241{
242 int i;
243
244 if (!marker_debug)
245 return;
246
247 if (!entry->ptype) {
248 printk(KERN_DEBUG "Single probe : %p %p\n",
249 entry->single.func,
250 entry->single.probe_private);
251 } else {
252 for (i = 0; entry->multi[i].func; i++)
253 printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
254 entry->multi[i].func,
255 entry->multi[i].probe_private);
256 }
257}
258
259static struct marker_probe_closure *
260marker_entry_add_probe(struct marker_entry *entry,
261 marker_probe_func *probe, void *probe_private)
262{
263 int nr_probes = 0;
264 struct marker_probe_closure *old, *new;
265
266 WARN_ON(!probe);
267
268 debug_print_probes(entry);
269 old = entry->multi;
270 if (!entry->ptype) {
271 if (entry->single.func == probe &&
272 entry->single.probe_private == probe_private)
273 return ERR_PTR(-EBUSY);
274 if (entry->single.func == __mark_empty_function) {
275 /* 0 -> 1 probes */
276 entry->single.func = probe;
277 entry->single.probe_private = probe_private;
278 entry->refcount = 1;
279 entry->ptype = 0;
280 debug_print_probes(entry);
281 return NULL;
282 } else {
283 /* 1 -> 2 probes */
284 nr_probes = 1;
285 old = NULL;
286 }
287 } else {
288 /* (N -> N+1), (N != 0, 1) probes */
289 for (nr_probes = 0; old[nr_probes].func; nr_probes++)
290 if (old[nr_probes].func == probe
291 && old[nr_probes].probe_private
292 == probe_private)
293 return ERR_PTR(-EBUSY);
294 }
295 /* + 2 : one for new probe, one for NULL func */
296 new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
297 GFP_KERNEL);
298 if (new == NULL)
299 return ERR_PTR(-ENOMEM);
300 if (!old)
301 new[0] = entry->single;
302 else
303 memcpy(new, old,
304 nr_probes * sizeof(struct marker_probe_closure));
305 new[nr_probes].func = probe;
306 new[nr_probes].probe_private = probe_private;
307 entry->refcount = nr_probes + 1;
308 entry->multi = new;
309 entry->ptype = 1;
310 debug_print_probes(entry);
311 return old;
312}
313
314static struct marker_probe_closure *
315marker_entry_remove_probe(struct marker_entry *entry,
316 marker_probe_func *probe, void *probe_private)
317{
318 int nr_probes = 0, nr_del = 0, i;
319 struct marker_probe_closure *old, *new;
320
321 old = entry->multi;
322
323 debug_print_probes(entry);
324 if (!entry->ptype) {
325 /* 0 -> N is an error */
326 WARN_ON(entry->single.func == __mark_empty_function);
327 /* 1 -> 0 probes */
328 WARN_ON(probe && entry->single.func != probe);
329 WARN_ON(entry->single.probe_private != probe_private);
330 entry->single.func = __mark_empty_function;
331 entry->refcount = 0;
332 entry->ptype = 0;
333 debug_print_probes(entry);
334 return NULL;
335 } else {
336 /* (N -> M), (N > 1, M >= 0) probes */
337 for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
338 if ((!probe || old[nr_probes].func == probe)
339 && old[nr_probes].probe_private
340 == probe_private)
341 nr_del++;
342 }
343 }
344
345 if (nr_probes - nr_del == 0) {
346 /* N -> 0, (N > 1) */
347 entry->single.func = __mark_empty_function;
348 entry->refcount = 0;
349 entry->ptype = 0;
350 } else if (nr_probes - nr_del == 1) {
351 /* N -> 1, (N > 1) */
352 for (i = 0; old[i].func; i++)
353 if ((probe && old[i].func != probe) ||
354 old[i].probe_private != probe_private)
355 entry->single = old[i];
356 entry->refcount = 1;
357 entry->ptype = 0;
358 } else {
359 int j = 0;
360 /* N -> M, (N > 1, M > 1) */
361 /* + 1 for NULL */
362 new = kzalloc((nr_probes - nr_del + 1)
363 * sizeof(struct marker_probe_closure), GFP_KERNEL);
364 if (new == NULL)
365 return ERR_PTR(-ENOMEM);
366 for (i = 0; old[i].func; i++)
367 if ((probe && old[i].func != probe) ||
368 old[i].probe_private != probe_private)
369 new[j++] = old[i];
370 entry->refcount = nr_probes - nr_del;
371 entry->ptype = 1;
372 entry->multi = new;
373 }
374 debug_print_probes(entry);
375 return old;
376}
377
378/*
379 * Get marker if the marker is present in the marker hash table.
380 * Must be called with markers_mutex held.
381 * Returns NULL if not present.
382 */
383static struct marker_entry *get_marker(const char *channel, const char *name)
384{
385 struct hlist_head *head;
386 struct hlist_node *node;
387 struct marker_entry *e;
388 size_t channel_len = strlen(channel) + 1;
389 size_t name_len = strlen(name) + 1;
390 u32 hash;
391
392 hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
393 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
394 hlist_for_each_entry(e, node, head, hlist) {
395 if (!strcmp(channel, e->channel) && !strcmp(name, e->name))
396 return e;
397 }
398 return NULL;
399}
400
401/*
402 * Add the marker to the marker hash table. Must be called with markers_mutex
403 * held.
404 */
405static struct marker_entry *add_marker(const char *channel, const char *name,
406 const char *format)
407{
408 struct hlist_head *head;
409 struct hlist_node *node;
410 struct marker_entry *e;
411 size_t channel_len = strlen(channel) + 1;
412 size_t name_len = strlen(name) + 1;
413 size_t format_len = 0;
414 u32 hash;
415
416 hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
417 if (format)
418 format_len = strlen(format) + 1;
419 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
420 hlist_for_each_entry(e, node, head, hlist) {
421 if (!strcmp(channel, e->channel) && !strcmp(name, e->name)) {
422 printk(KERN_NOTICE
423 "Marker %s.%s busy\n", channel, name);
424 return ERR_PTR(-EBUSY); /* Already there */
425 }
426 }
427 /*
428 * Using kmalloc here to allocate a variable length element. Could
429 * cause some memory fragmentation if overused.
430 */
431 e = kmalloc(sizeof(struct marker_entry)
432 + channel_len + name_len + format_len,
433 GFP_KERNEL);
434 if (!e)
435 return ERR_PTR(-ENOMEM);
436 memcpy(e->channel, channel, channel_len);
437 e->name = &e->channel[channel_len];
438 memcpy(e->name, name, name_len);
439 if (format) {
440 e->format = &e->name[channel_len + name_len];
441 memcpy(e->format, format, format_len);
442 if (strcmp(e->format, MARK_NOARGS) == 0)
443 e->call = marker_probe_cb_noarg;
444 else
445 e->call = marker_probe_cb;
446 trace_mark(metadata, core_marker_format,
447 "channel %s name %s format %s",
448 e->channel, e->name, e->format);
449 } else {
450 e->format = NULL;
451 e->call = marker_probe_cb;
452 }
453 e->single.func = __mark_empty_function;
454 e->single.probe_private = NULL;
455 e->multi = NULL;
456 e->ptype = 0;
457 e->format_allocated = 0;
458 e->refcount = 0;
459 e->rcu_pending = 0;
460 hlist_add_head(&e->hlist, head);
461 return e;
462}
463
464/*
465 * Remove the marker from the marker hash table. Must be called with mutex_lock
466 * held.
467 */
468static int remove_marker(const char *channel, const char *name)
469{
470 struct hlist_head *head;
471 struct hlist_node *node;
472 struct marker_entry *e;
473 int found = 0;
474 size_t channel_len = strlen(channel) + 1;
475 size_t name_len = strlen(name) + 1;
476 u32 hash;
477 int ret;
478
479 hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
480 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
481 hlist_for_each_entry(e, node, head, hlist) {
482 if (!strcmp(channel, e->channel) && !strcmp(name, e->name)) {
483 found = 1;
484 break;
485 }
486 }
487 if (!found)
488 return -ENOENT;
489 if (e->single.func != __mark_empty_function)
490 return -EBUSY;
491 hlist_del(&e->hlist);
492 if (e->format_allocated)
493 kfree(e->format);
494 ret = ltt_channels_unregister(e->channel);
495 WARN_ON(ret);
496 /* Make sure the call_rcu has been executed */
497 if (e->rcu_pending)
498 rcu_barrier_sched();
499 kfree(e);
500 return 0;
501}
502
503/*
504 * Set the mark_entry format to the format found in the element.
505 */
506static int marker_set_format(struct marker_entry *entry, const char *format)
507{
508 entry->format = kstrdup(format, GFP_KERNEL);
509 if (!entry->format)
510 return -ENOMEM;
511 entry->format_allocated = 1;
512
513 trace_mark(metadata, core_marker_format,
514 "channel %s name %s format %s",
515 entry->channel, entry->name, entry->format);
516 return 0;
517}
518
519/*
520 * Sets the probe callback corresponding to one marker.
521 */
522static int set_marker(struct marker_entry *entry, struct marker *elem,
523 int active)
524{
525 int ret = 0;
526 WARN_ON(strcmp(entry->name, elem->name) != 0);
527
528 if (entry->format) {
529 if (strcmp(entry->format, elem->format) != 0) {
530 printk(KERN_NOTICE
531 "Format mismatch for probe %s "
532 "(%s), marker (%s)\n",
533 entry->name,
534 entry->format,
535 elem->format);
536 return -EPERM;
537 }
538 } else {
539 ret = marker_set_format(entry, elem->format);
540 if (ret)
541 return ret;
542 }
543
544 /*
545 * probe_cb setup (statically known) is done here. It is
546 * asynchronous with the rest of execution, therefore we only
547 * pass from a "safe" callback (with argument) to an "unsafe"
548 * callback (does not set arguments).
549 */
550 elem->call = entry->call;
551 elem->channel_id = entry->channel_id;
552 elem->event_id = entry->event_id;
553 /*
554 * Sanity check :
555 * We only update the single probe private data when the ptr is
556 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
557 */
558 WARN_ON(elem->single.func != __mark_empty_function
559 && elem->single.probe_private != entry->single.probe_private
560 && !elem->ptype);
561 elem->single.probe_private = entry->single.probe_private;
562 /*
563 * Make sure the private data is valid when we update the
564 * single probe ptr.
565 */
566 smp_wmb();
567 elem->single.func = entry->single.func;
568 /*
569 * We also make sure that the new probe callbacks array is consistent
570 * before setting a pointer to it.
571 */
572 rcu_assign_pointer(elem->multi, entry->multi);
573 /*
574 * Update the function or multi probe array pointer before setting the
575 * ptype.
576 */
577 smp_wmb();
578 elem->ptype = entry->ptype;
579
580 if (elem->tp_name && (active ^ _imv_read(elem->state))) {
581 WARN_ON(!elem->tp_cb);
582 /*
583 * It is ok to directly call the probe registration because type
584 * checking has been done in the __trace_mark_tp() macro.
585 */
586
587 if (active) {
588 /*
589 * try_module_get should always succeed because we hold
590 * markers_mutex to get the tp_cb address.
591 */
592 ret = try_module_get(__module_text_address(
593 (unsigned long)elem->tp_cb));
594 BUG_ON(!ret);
595 ret = tracepoint_probe_register_noupdate(
596 elem->tp_name,
597 elem->tp_cb);
598 } else {
599 ret = tracepoint_probe_unregister_noupdate(
600 elem->tp_name,
601 elem->tp_cb);
602 /*
603 * tracepoint_probe_update_all() must be called
604 * before the module containing tp_cb is unloaded.
605 */
606 module_put(__module_text_address(
607 (unsigned long)elem->tp_cb));
608 }
609 }
610 elem->state__imv = active;
611
612 return ret;
613}
614
615/*
616 * Disable a marker and its probe callback.
617 * Note: only waiting an RCU period after setting elem->call to the empty
618 * function insures that the original callback is not used anymore. This insured
619 * by rcu_read_lock_sched around the call site.
620 */
621static void disable_marker(struct marker *elem)
622{
623 int ret;
624
625 /* leave "call" as is. It is known statically. */
626 if (elem->tp_name && _imv_read(elem->state)) {
627 WARN_ON(!elem->tp_cb);
628 /*
629 * It is ok to directly call the probe registration because type
630 * checking has been done in the __trace_mark_tp() macro.
631 */
632 ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
633 elem->tp_cb);
634 WARN_ON(ret);
635 /*
636 * tracepoint_probe_update_all() must be called
637 * before the module containing tp_cb is unloaded.
638 */
639 module_put(__module_text_address((unsigned long)elem->tp_cb));
640 }
641 elem->state__imv = 0;
642 elem->single.func = __mark_empty_function;
643 /* Update the function before setting the ptype */
644 smp_wmb();
645 elem->ptype = 0; /* single probe */
646 /*
647 * Leave the private data and channel_id/event_id there, because removal
648 * is racy and should be done only after an RCU period. These are never
649 * used until the next initialization anyway.
650 */
651}
652
653/**
654 * marker_update_probe_range - Update a probe range
655 * @begin: beginning of the range
656 * @end: end of the range
657 *
658 * Updates the probe callback corresponding to a range of markers.
659 */
660void marker_update_probe_range(struct marker *begin,
661 struct marker *end)
662{
663 struct marker *iter;
664 struct marker_entry *mark_entry;
665
666 mutex_lock(&markers_mutex);
667 for (iter = begin; iter < end; iter++) {
668 mark_entry = get_marker(iter->channel, iter->name);
669 if (mark_entry) {
670 set_marker(mark_entry, iter, !!mark_entry->refcount);
671 /*
672 * ignore error, continue
673 */
674 } else {
675 disable_marker(iter);
676 }
677 }
678 mutex_unlock(&markers_mutex);
679}
680
681/*
682 * Update probes, removing the faulty probes.
683 *
684 * Internal callback only changed before the first probe is connected to it.
685 * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
686 * transitions. All other transitions will leave the old private data valid.
687 * This makes the non-atomicity of the callback/private data updates valid.
688 *
689 * "special case" updates :
690 * 0 -> 1 callback
691 * 1 -> 0 callback
692 * 1 -> 2 callbacks
693 * 2 -> 1 callbacks
694 * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
695 * Site effect : marker_set_format may delete the marker entry (creating a
696 * replacement).
697 */
698static void marker_update_probes(void)
699{
700 /* Core kernel markers */
701 marker_update_probe_range(__start___markers, __stop___markers);
702 /* Markers in modules. */
703 module_update_markers();
704 tracepoint_probe_update_all();
705 /* Update immediate values */
706 core_imv_update();
707 module_imv_update();
708 marker_update_processes();
709}
710
711/**
712 * marker_probe_register - Connect a probe to a marker
713 * @channel: marker channel
714 * @name: marker name
715 * @format: format string
716 * @probe: probe handler
717 * @probe_private: probe private data
718 *
719 * private data must be a valid allocated memory address, or NULL.
720 * Returns 0 if ok, error value on error.
721 * The probe address must at least be aligned on the architecture pointer size.
722 */
723int marker_probe_register(const char *channel, const char *name,
724 const char *format, marker_probe_func *probe,
725 void *probe_private)
726{
727 struct marker_entry *entry;
728 int ret = 0, ret_err;
729 struct marker_probe_closure *old;
730 int first_probe = 0;
731
732 mutex_lock(&markers_mutex);
733 entry = get_marker(channel, name);
734 if (!entry) {
735 first_probe = 1;
736 entry = add_marker(channel, name, format);
737 if (IS_ERR(entry))
738 ret = PTR_ERR(entry);
739 if (ret)
740 goto end;
741 ret = ltt_channels_register(channel);
742 if (ret)
743 goto error_remove_marker;
744 ret = ltt_channels_get_index_from_name(channel);
745 if (ret < 0)
746 goto error_unregister_channel;
747 entry->channel_id = ret;
748 ret = ltt_channels_get_event_id(channel, name);
749 if (ret < 0)
750 goto error_unregister_channel;
751 entry->event_id = ret;
752 ret = 0;
753 trace_mark(metadata, core_marker_id,
754 "channel %s name %s event_id %hu "
755 "int #1u%zu long #1u%zu pointer #1u%zu "
756 "size_t #1u%zu alignment #1u%u",
757 channel, name, entry->event_id,
758 sizeof(int), sizeof(long), sizeof(void *),
759 sizeof(size_t), ltt_get_alignment());
760 } else if (format) {
761 if (!entry->format)
762 ret = marker_set_format(entry, format);
763 else if (strcmp(entry->format, format))
764 ret = -EPERM;
765 if (ret)
766 goto end;
767 }
768
769 /*
770 * If we detect that a call_rcu is pending for this marker,
771 * make sure it's executed now.
772 */
773 if (entry->rcu_pending)
774 rcu_barrier_sched();
775 old = marker_entry_add_probe(entry, probe, probe_private);
776 if (IS_ERR(old)) {
777 ret = PTR_ERR(old);
778 if (first_probe)
779 goto error_unregister_channel;
780 else
781 goto end;
782 }
783 mutex_unlock(&markers_mutex);
784
785 marker_update_probes();
786
787 mutex_lock(&markers_mutex);
788 entry = get_marker(channel, name);
789 if (!entry)
790 goto end;
791 if (entry->rcu_pending)
792 rcu_barrier_sched();
793 entry->oldptr = old;
794 entry->rcu_pending = 1;
795 /* write rcu_pending before calling the RCU callback */
796 smp_wmb();
797 call_rcu_sched(&entry->rcu, free_old_closure);
798 goto end;
799
800error_unregister_channel:
801 ret_err = ltt_channels_unregister(channel);
802 WARN_ON(ret_err);
803error_remove_marker:
804 ret_err = remove_marker(channel, name);
805 WARN_ON(ret_err);
806end:
807 mutex_unlock(&markers_mutex);
808 return ret;
809}
810EXPORT_SYMBOL_GPL(marker_probe_register);
811
812/**
813 * marker_probe_unregister - Disconnect a probe from a marker
814 * @channel: marker channel
815 * @name: marker name
816 * @probe: probe function pointer
817 * @probe_private: probe private data
818 *
819 * Returns the private data given to marker_probe_register, or an ERR_PTR().
820 * We do not need to call a synchronize_sched to make sure the probes have
821 * finished running before doing a module unload, because the module unload
822 * itself uses stop_machine(), which insures that every preempt disabled section
823 * have finished.
824 */
825int marker_probe_unregister(const char *channel, const char *name,
826 marker_probe_func *probe, void *probe_private)
827{
828 struct marker_entry *entry;
829 struct marker_probe_closure *old;
830 int ret = -ENOENT;
831
832 mutex_lock(&markers_mutex);
833 entry = get_marker(channel, name);
834 if (!entry)
835 goto end;
836 if (entry->rcu_pending)
837 rcu_barrier_sched();
838 old = marker_entry_remove_probe(entry, probe, probe_private);
839 mutex_unlock(&markers_mutex);
840
841 marker_update_probes();
842
843 mutex_lock(&markers_mutex);
844 entry = get_marker(channel, name);
845 if (!entry)
846 goto end;
847 if (entry->rcu_pending)
848 rcu_barrier_sched();
849 entry->oldptr = old;
850 entry->rcu_pending = 1;
851 /* write rcu_pending before calling the RCU callback */
852 smp_wmb();
853 call_rcu_sched(&entry->rcu, free_old_closure);
854 remove_marker(channel, name); /* Ignore busy error message */
855 ret = 0;
856end:
857 mutex_unlock(&markers_mutex);
858 return ret;
859}
860EXPORT_SYMBOL_GPL(marker_probe_unregister);
861
862static struct marker_entry *
863get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
864{
865 struct marker_entry *entry;
866 unsigned int i;
867 struct hlist_head *head;
868 struct hlist_node *node;
869
870 for (i = 0; i < MARKER_TABLE_SIZE; i++) {
871 head = &marker_table[i];
872 hlist_for_each_entry(entry, node, head, hlist) {
873 if (!entry->ptype) {
874 if (entry->single.func == probe
875 && entry->single.probe_private
876 == probe_private)
877 return entry;
878 } else {
879 struct marker_probe_closure *closure;
880 closure = entry->multi;
881 for (i = 0; closure[i].func; i++) {
882 if (closure[i].func == probe &&
883 closure[i].probe_private
884 == probe_private)
885 return entry;
886 }
887 }
888 }
889 }
890 return NULL;
891}
892
893/**
894 * marker_probe_unregister_private_data - Disconnect a probe from a marker
895 * @probe: probe function
896 * @probe_private: probe private data
897 *
898 * Unregister a probe by providing the registered private data.
899 * Only removes the first marker found in hash table.
900 * Return 0 on success or error value.
901 * We do not need to call a synchronize_sched to make sure the probes have
902 * finished running before doing a module unload, because the module unload
903 * itself uses stop_machine(), which insures that every preempt disabled section
904 * have finished.
905 */
906int marker_probe_unregister_private_data(marker_probe_func *probe,
907 void *probe_private)
908{
909 struct marker_entry *entry;
910 int ret = 0;
911 struct marker_probe_closure *old;
912 const char *channel = NULL, *name = NULL;
913
914 mutex_lock(&markers_mutex);
915 entry = get_marker_from_private_data(probe, probe_private);
916 if (!entry) {
917 ret = -ENOENT;
918 goto end;
919 }
920 if (entry->rcu_pending)
921 rcu_barrier_sched();
922 old = marker_entry_remove_probe(entry, NULL, probe_private);
923 channel = kstrdup(entry->channel, GFP_KERNEL);
924 name = kstrdup(entry->name, GFP_KERNEL);
925 mutex_unlock(&markers_mutex);
926
927 marker_update_probes();
928
929 mutex_lock(&markers_mutex);
930 entry = get_marker(channel, name);
931 if (!entry)
932 goto end;
933 if (entry->rcu_pending)
934 rcu_barrier_sched();
935 entry->oldptr = old;
936 entry->rcu_pending = 1;
937 /* write rcu_pending before calling the RCU callback */
938 smp_wmb();
939 call_rcu_sched(&entry->rcu, free_old_closure);
940 /* Ignore busy error message */
941 remove_marker(channel, name);
942end:
943 mutex_unlock(&markers_mutex);
944 kfree(channel);
945 kfree(name);
946 return ret;
947}
948EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
949
950/**
951 * marker_get_private_data - Get a marker's probe private data
952 * @channel: marker channel
953 * @name: marker name
954 * @probe: probe to match
955 * @num: get the nth matching probe's private data
956 *
957 * Returns the nth private data pointer (starting from 0) matching, or an
958 * ERR_PTR.
959 * Returns the private data pointer, or an ERR_PTR.
960 * The private data pointer should _only_ be dereferenced if the caller is the
961 * owner of the data, or its content could vanish. This is mostly used to
962 * confirm that a caller is the owner of a registered probe.
963 */
964void *marker_get_private_data(const char *channel, const char *name,
965 marker_probe_func *probe, int num)
966{
967 struct hlist_head *head;
968 struct hlist_node *node;
969 struct marker_entry *e;
970 size_t channel_len = strlen(channel) + 1;
971 size_t name_len = strlen(name) + 1;
972 int i;
973 u32 hash;
974
975 hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
976 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
977 hlist_for_each_entry(e, node, head, hlist) {
978 if (!strcmp(channel, e->channel) && !strcmp(name, e->name)) {
979 if (!e->ptype) {
980 if (num == 0 && e->single.func == probe)
981 return e->single.probe_private;
982 } else {
983 struct marker_probe_closure *closure;
984 int match = 0;
985 closure = e->multi;
986 for (i = 0; closure[i].func; i++) {
987 if (closure[i].func != probe)
988 continue;
989 if (match++ == num)
990 return closure[i].probe_private;
991 }
992 }
993 break;
994 }
995 }
996 return ERR_PTR(-ENOENT);
997}
998EXPORT_SYMBOL_GPL(marker_get_private_data);
999
1000/**
1001 * markers_compact_event_ids - Compact markers event IDs and reassign channels
1002 *
1003 * Called when no channel users are active by the channel infrastructure.
1004 * Called with lock_markers() and channel mutex held.
1005 */
1006void markers_compact_event_ids(void)
1007{
1008 struct marker_entry *entry;
1009 unsigned int i;
1010 struct hlist_head *head;
1011 struct hlist_node *node;
1012 int ret;
1013
1014 for (i = 0; i < MARKER_TABLE_SIZE; i++) {
1015 head = &marker_table[i];
1016 hlist_for_each_entry(entry, node, head, hlist) {
1017 ret = ltt_channels_get_index_from_name(entry->channel);
1018 WARN_ON(ret < 0);
1019 entry->channel_id = ret;
1020 ret = _ltt_channels_get_event_id(entry->channel,
1021 entry->name);
1022 WARN_ON(ret < 0);
1023 entry->event_id = ret;
1024 }
1025 }
1026}
1027
1028#ifdef CONFIG_MODULES
1029
1030/**
1031 * marker_get_iter_range - Get a next marker iterator given a range.
1032 * @marker: current markers (in), next marker (out)
1033 * @begin: beginning of the range
1034 * @end: end of the range
1035 *
1036 * Returns whether a next marker has been found (1) or not (0).
1037 * Will return the first marker in the range if the input marker is NULL.
1038 */
1039int marker_get_iter_range(struct marker **marker, struct marker *begin,
1040 struct marker *end)
1041{
1042 if (!*marker && begin != end) {
1043 *marker = begin;
1044 return 1;
1045 }
1046 if (*marker >= begin && *marker < end)
1047 return 1;
1048 return 0;
1049}
1050EXPORT_SYMBOL_GPL(marker_get_iter_range);
1051
1052static void marker_get_iter(struct marker_iter *iter)
1053{
1054 int found = 0;
1055
1056 /* Core kernel markers */
1057 if (!iter->module) {
1058 found = marker_get_iter_range(&iter->marker,
1059 __start___markers, __stop___markers);
1060 if (found)
1061 goto end;
1062 }
1063 /* Markers in modules. */
1064 found = module_get_iter_markers(iter);
1065end:
1066 if (!found)
1067 marker_iter_reset(iter);
1068}
1069
1070void marker_iter_start(struct marker_iter *iter)
1071{
1072 marker_get_iter(iter);
1073}
1074EXPORT_SYMBOL_GPL(marker_iter_start);
1075
1076void marker_iter_next(struct marker_iter *iter)
1077{
1078 iter->marker++;
1079 /*
1080 * iter->marker may be invalid because we blindly incremented it.
1081 * Make sure it is valid by marshalling on the markers, getting the
1082 * markers from following modules if necessary.
1083 */
1084 marker_get_iter(iter);
1085}
1086EXPORT_SYMBOL_GPL(marker_iter_next);
1087
1088void marker_iter_stop(struct marker_iter *iter)
1089{
1090}
1091EXPORT_SYMBOL_GPL(marker_iter_stop);
1092
1093void marker_iter_reset(struct marker_iter *iter)
1094{
1095 iter->module = NULL;
1096 iter->marker = NULL;
1097}
1098EXPORT_SYMBOL_GPL(marker_iter_reset);
1099
1100#ifdef CONFIG_MARKERS_USERSPACE
1101/*
1102 * must be called with current->user_markers_mutex held
1103 */
1104static void free_user_marker(char __user *state, struct hlist_head *head)
1105{
1106 struct user_marker *umark;
1107 struct hlist_node *pos, *n;
1108
1109 hlist_for_each_entry_safe(umark, pos, n, head, hlist) {
1110 if (umark->state == state) {
1111 hlist_del(&umark->hlist);
1112 kfree(umark);
1113 }
1114 }
1115}
1116
1117asmlinkage long sys_marker(char __user *name, char __user *format,
1118 char __user *state, int reg)
1119{
1120 struct user_marker *umark;
1121 long len;
1122 struct marker_entry *entry;
1123 int ret = 0;
1124
1125 printk(KERN_DEBUG "Program %s %s marker [%p, %p]\n",
1126 current->comm, reg ? "registers" : "unregisters",
1127 name, state);
1128 if (reg) {
1129 umark = kmalloc(sizeof(struct user_marker), GFP_KERNEL);
1130 umark->name[MAX_USER_MARKER_NAME_LEN - 1] = '\0';
1131 umark->format[MAX_USER_MARKER_FORMAT_LEN - 1] = '\0';
1132 umark->state = state;
1133 len = strncpy_from_user(umark->name, name,
1134 MAX_USER_MARKER_NAME_LEN - 1);
1135 if (len < 0) {
1136 ret = -EFAULT;
1137 goto error;
1138 }
1139 len = strncpy_from_user(umark->format, format,
1140 MAX_USER_MARKER_FORMAT_LEN - 1);
1141 if (len < 0) {
1142 ret = -EFAULT;
1143 goto error;
1144 }
1145 printk(KERN_DEBUG "Marker name : %s, format : %s", umark->name,
1146 umark->format);
1147 mutex_lock(&markers_mutex);
1148 entry = get_marker("userspace", umark->name);
1149 if (entry) {
1150 if (entry->format &&
1151 strcmp(entry->format, umark->format) != 0) {
1152 printk(" error, wrong format in process %s",
1153 current->comm);
1154 ret = -EPERM;
1155 goto error_unlock;
1156 }
1157 printk(" %s", !!entry->refcount
1158 ? "enabled" : "disabled");
1159 if (put_user(!!entry->refcount, state)) {
1160 ret = -EFAULT;
1161 goto error_unlock;
1162 }
1163 printk("\n");
1164 } else {
1165 printk(" disabled\n");
1166 if (put_user(0, umark->state)) {
1167 printk(KERN_WARNING
1168 "Marker in %s caused a fault\n",
1169 current->comm);
1170 goto error_unlock;
1171 }
1172 }
1173 mutex_lock(&current->group_leader->user_markers_mutex);
1174 hlist_add_head(&umark->hlist,
1175 &current->group_leader->user_markers);
1176 current->group_leader->user_markers_sequence++;
1177 mutex_unlock(&current->group_leader->user_markers_mutex);
1178 mutex_unlock(&markers_mutex);
1179 } else {
1180 mutex_lock(&current->group_leader->user_markers_mutex);
1181 free_user_marker(state,
1182 &current->group_leader->user_markers);
1183 current->group_leader->user_markers_sequence++;
1184 mutex_unlock(&current->group_leader->user_markers_mutex);
1185 }
1186 goto end;
1187error_unlock:
1188 mutex_unlock(&markers_mutex);
1189error:
1190 kfree(umark);
1191end:
1192 return ret;
1193}
1194
1195/*
1196 * Types :
1197 * string : 0
1198 */
1199asmlinkage long sys_trace(int type, uint16_t id,
1200 char __user *ubuf)
1201{
1202 long ret = -EPERM;
1203 char *page;
1204 int len;
1205
1206 switch (type) {
1207 case 0: /* String */
1208 ret = -ENOMEM;
1209 page = (char *)__get_free_page(GFP_TEMPORARY);
1210 if (!page)
1211 goto string_out;
1212 len = strncpy_from_user(page, ubuf, PAGE_SIZE);
1213 if (len < 0) {
1214 ret = -EFAULT;
1215 goto string_err;
1216 }
1217 trace_mark(userspace, string, "string %s", page);
1218string_err:
1219 free_page((unsigned long) page);
1220string_out:
1221 break;
1222 default:
1223 break;
1224 }
1225 return ret;
1226}
1227
1228static void marker_update_processes(void)
1229{
1230 struct task_struct *g, *t;
1231
1232 /*
1233 * markers_mutex is taken to protect the p->user_markers read.
1234 */
1235 mutex_lock(&markers_mutex);
1236 read_lock(&tasklist_lock);
1237 for_each_process(g) {
1238 WARN_ON(!thread_group_leader(g));
1239 if (hlist_empty(&g->user_markers))
1240 continue;
1241 if (strcmp(g->comm, "testprog") == 0)
1242 printk(KERN_DEBUG "set update pending for testprog\n");
1243 t = g;
1244 do {
1245 /* TODO : implement this thread flag in each arch. */
1246 set_tsk_thread_flag(t, TIF_MARKER_PENDING);
1247 } while ((t = next_thread(t)) != g);
1248 }
1249 read_unlock(&tasklist_lock);
1250 mutex_unlock(&markers_mutex);
1251}
1252
1253/*
1254 * Update current process.
1255 * Note that we have to wait a whole scheduler period before we are sure that
1256 * every running userspace threads have their markers updated.
1257 * (synchronize_sched() can be used to insure this).
1258 */
1259void marker_update_process(void)
1260{
1261 struct user_marker *umark;
1262 struct hlist_node *pos;
1263 struct marker_entry *entry;
1264
1265 mutex_lock(&markers_mutex);
1266 mutex_lock(&current->group_leader->user_markers_mutex);
1267 if (strcmp(current->comm, "testprog") == 0)
1268 printk(KERN_DEBUG "do update pending for testprog\n");
1269 hlist_for_each_entry(umark, pos,
1270 &current->group_leader->user_markers, hlist) {
1271 printk(KERN_DEBUG "Updating marker %s in %s\n",
1272 umark->name, current->comm);
1273 entry = get_marker("userspace", umark->name);
1274 if (entry) {
1275 if (entry->format &&
1276 strcmp(entry->format, umark->format) != 0) {
1277 printk(KERN_WARNING
1278 " error, wrong format in process %s\n",
1279 current->comm);
1280 break;
1281 }
1282 if (put_user(!!entry->refcount, umark->state)) {
1283 printk(KERN_WARNING
1284 "Marker in %s caused a fault\n",
1285 current->comm);
1286 break;
1287 }
1288 } else {
1289 if (put_user(0, umark->state)) {
1290 printk(KERN_WARNING
1291 "Marker in %s caused a fault\n",
1292 current->comm);
1293 break;
1294 }
1295 }
1296 }
1297 clear_thread_flag(TIF_MARKER_PENDING);
1298 mutex_unlock(&current->group_leader->user_markers_mutex);
1299 mutex_unlock(&markers_mutex);
1300}
1301
1302/*
1303 * Called at process exit and upon do_execve().
1304 * We assume that when the leader exits, no more references can be done to the
1305 * leader structure by the other threads.
1306 */
1307void exit_user_markers(struct task_struct *p)
1308{
1309 struct user_marker *umark;
1310 struct hlist_node *pos, *n;
1311
1312 if (thread_group_leader(p)) {
1313 mutex_lock(&markers_mutex);
1314 mutex_lock(&p->user_markers_mutex);
1315 hlist_for_each_entry_safe(umark, pos, n, &p->user_markers,
1316 hlist)
1317 kfree(umark);
1318 INIT_HLIST_HEAD(&p->user_markers);
1319 p->user_markers_sequence++;
1320 mutex_unlock(&p->user_markers_mutex);
1321 mutex_unlock(&markers_mutex);
1322 }
1323}
1324
1325int is_marker_enabled(const char *channel, const char *name)
1326{
1327 struct marker_entry *entry;
1328
1329 mutex_lock(&markers_mutex);
1330 entry = get_marker(channel, name);
1331 mutex_unlock(&markers_mutex);
1332
1333 return entry && !!entry->refcount;
1334}
1335#endif
1336
1337int marker_module_notify(struct notifier_block *self,
1338 unsigned long val, void *data)
1339{
1340 struct module *mod = data;
1341
1342 switch (val) {
1343 case MODULE_STATE_COMING:
1344 marker_update_probe_range(mod->markers,
1345 mod->markers + mod->num_markers);
1346 break;
1347 case MODULE_STATE_GOING:
1348 marker_update_probe_range(mod->markers,
1349 mod->markers + mod->num_markers);
1350 break;
1351 }
1352 return 0;
1353}
1354
1355struct notifier_block marker_module_nb = {
1356 .notifier_call = marker_module_notify,
1357 .priority = 0,
1358};
1359
1360static int init_markers(void)
1361{
1362 return register_module_notifier(&marker_module_nb);
1363}
1364__initcall(init_markers);
1365
1366#endif /* CONFIG_MODULES */
1367
1368void ltt_dump_marker_state(struct ltt_trace_struct *trace)
1369{
1370 struct marker_iter iter;
1371 struct ltt_probe_private_data call_data;
1372 const char *channel;
1373
1374 call_data.trace = trace;
1375 call_data.serializer = NULL;
1376
1377 marker_iter_reset(&iter);
1378 marker_iter_start(&iter);
1379 for (; iter.marker != NULL; marker_iter_next(&iter)) {
1380 if (!_imv_read(iter.marker->state))
1381 continue;
1382 channel = ltt_channels_get_name_from_index(
1383 iter.marker->channel_id);
1384 __trace_mark(0, metadata, core_marker_id,
1385 &call_data,
1386 "channel %s name %s event_id %hu "
1387 "int #1u%zu long #1u%zu pointer #1u%zu "
1388 "size_t #1u%zu alignment #1u%u",
1389 channel,
1390 iter.marker->name,
1391 iter.marker->event_id,
1392 sizeof(int), sizeof(long),
1393 sizeof(void *), sizeof(size_t),
1394 ltt_get_alignment());
1395 if (iter.marker->format)
1396 __trace_mark(0, metadata,
1397 core_marker_format,
1398 &call_data,
1399 "channel %s name %s format %s",
1400 channel,
1401 iter.marker->name,
1402 iter.marker->format);
1403 }
1404 marker_iter_stop(&iter);
1405}
1406EXPORT_SYMBOL_GPL(ltt_dump_marker_state);
This page took 0.06975 seconds and 4 git commands to generate.